x2cs 1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/Gemfile +7 -0
- data/README.md +3 -0
- data/bin/x2cs +8 -0
- data/spec/spec_helper.rb +70 -0
- data/spec/xmlparse_spec.rb +168 -0
- data/xmlparse.rb +119 -0
- metadata +65 -0
data/Gemfile
ADDED
data/README.md
ADDED
data/bin/x2cs
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.expand_path('../..', __FILE__))
|
2
|
+
require 'xmlparse.rb'
|
3
|
+
include XmlParser
|
4
|
+
|
5
|
+
class Person
|
6
|
+
attr_accessor :name, :email, :phone, :address
|
7
|
+
|
8
|
+
def initialize(args)
|
9
|
+
args.each do |k, v|
|
10
|
+
send("#{k}=".to_sym, v)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_xml
|
15
|
+
"<Person><name>#{name}</name><email>#{email}</email><phone>#{phone}</phone><address>#{address}</address></Person>"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def xml_header
|
20
|
+
'<?xml version="1.0" encoding="utf-8" ?>'
|
21
|
+
end
|
22
|
+
|
23
|
+
def load_test_files
|
24
|
+
write_tests_to_file(add_people)
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_tests_to_file(people)
|
28
|
+
File.open('test_file.xml', 'w+') do |f|
|
29
|
+
f.write("#{xml_header}\n")
|
30
|
+
f.write("<people>")
|
31
|
+
people.each do |p|
|
32
|
+
f.write("#{p.to_xml}\n")
|
33
|
+
end
|
34
|
+
f.write("</people>")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def load_test_files_with_extra
|
39
|
+
people = add_people
|
40
|
+
people << Person.new(name: 'Jack johnson', email: '<emails><email>1@test.com</email><email>2@test.com</email></emails>',
|
41
|
+
phone: '555-233-3333', address: '444 test')
|
42
|
+
people
|
43
|
+
write_tests_to_file(people)
|
44
|
+
end
|
45
|
+
|
46
|
+
def load_test_files_with_missing
|
47
|
+
people = add_people
|
48
|
+
File.open('test_file.xml', 'w+') do |f|
|
49
|
+
f.write("#{xml_header}\n")
|
50
|
+
f.write("<people>")
|
51
|
+
people.each do |p|
|
52
|
+
f.write("#{p.to_xml}\n")
|
53
|
+
end
|
54
|
+
f.write("<Person><name>A test name</name><email>something@you.com</email><address>test address</address></Person>\n")
|
55
|
+
f.write("</people>")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def add_people
|
60
|
+
people = []
|
61
|
+
people << Person.new(name: 'A person', email: 'test@something.com', phone: '555-233-3333', address: '444 test')
|
62
|
+
people << Person.new(name: 'John Smith', email: 'jsmith@fake.com', phone: '555-249-8833', address: '123 fake st')
|
63
|
+
people << Person.new(name: 'Jane Doe', email: 'example@something.ca', phone: '232-855-3422', address: '8643 Avenue Lane')
|
64
|
+
people << Person.new(name: 'Willis', email: 'nonexistant@example.com', phone: '416-233-7688', address: 'a test address')
|
65
|
+
people
|
66
|
+
end
|
67
|
+
|
68
|
+
def remove_test_files
|
69
|
+
File.delete('test_file.xml')
|
70
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe XmlParser do
|
4
|
+
describe "#init" do
|
5
|
+
before(:each) do
|
6
|
+
load_test_files
|
7
|
+
XmlParser.stub!(:gets).and_return('test_file.xml')
|
8
|
+
XmlParser.stub!(:puts)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should give a greeting message 'type xml file name'" do
|
12
|
+
XmlParser.stub!(:build_headers)
|
13
|
+
XmlParser.should_receive(:puts).with('type xml file name')
|
14
|
+
XmlParser::init
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should call build_headers" do
|
18
|
+
XmlParser.stub!(:build_headers)
|
19
|
+
XmlParser.should_receive(:build_headers)
|
20
|
+
XmlParser::init
|
21
|
+
end
|
22
|
+
|
23
|
+
after(:each) do
|
24
|
+
remove_test_files
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "#build_headers" do
|
29
|
+
context "Load test files with 4 headers" do
|
30
|
+
before(:each) do
|
31
|
+
load_test_files
|
32
|
+
XmlParser.stub!(:gets).and_return('Person')
|
33
|
+
XmlParser.stub!(:puts)
|
34
|
+
f = File.open('test_file.xml')
|
35
|
+
@doc = Nokogiri::XML(f)
|
36
|
+
f.close
|
37
|
+
XmlParser.stub!(:traverse_through_each_row)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "@headers should be of size 4" do
|
41
|
+
XmlParser.send(:build_headers, @doc)
|
42
|
+
XmlParser.headers.size.should == 4
|
43
|
+
end
|
44
|
+
|
45
|
+
it "@headers should contain name" do
|
46
|
+
XmlParser.send(:build_headers, @doc)
|
47
|
+
XmlParser.headers.include?('name').should be_true
|
48
|
+
end
|
49
|
+
|
50
|
+
it "@headers should contain email" do
|
51
|
+
XmlParser.send(:build_headers, @doc)
|
52
|
+
XmlParser.headers.include?('email').should be_true
|
53
|
+
end
|
54
|
+
|
55
|
+
it "@headers should contain address" do
|
56
|
+
XmlParser.send(:build_headers, @doc)
|
57
|
+
XmlParser.headers.include?('address').should be_true
|
58
|
+
end
|
59
|
+
|
60
|
+
it "@headers should contain phone" do
|
61
|
+
XmlParser.send(:build_headers, @doc)
|
62
|
+
XmlParser.headers.include?('phone').should be_true
|
63
|
+
end
|
64
|
+
|
65
|
+
after(:each) do
|
66
|
+
remove_test_files
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
context "Traversing node is non-existant" do
|
71
|
+
before(:each) do
|
72
|
+
load_test_files
|
73
|
+
XmlParser.stub!(:gets).and_return('Badtagname')
|
74
|
+
XmlParser.stub!(:puts)
|
75
|
+
f = File.open('test_file.xml')
|
76
|
+
@doc = Nokogiri::XML(f)
|
77
|
+
f.close
|
78
|
+
XmlParser.stub!(:traverse_through_each_row)
|
79
|
+
end
|
80
|
+
|
81
|
+
it "raises an exception if tag is not found" do
|
82
|
+
lambda { XmlParser.send(:build_headers, @doc) }.should raise_error(RuntimeError, "Tag not found")
|
83
|
+
end
|
84
|
+
|
85
|
+
after(:each) do
|
86
|
+
remove_test_files
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
describe "#traverse_through_each_row" do
|
92
|
+
before(:each) do
|
93
|
+
load_test_files_with_extra
|
94
|
+
f = File.open('test_file.xml')
|
95
|
+
@doc = Nokogiri::XML(f)
|
96
|
+
f.close
|
97
|
+
|
98
|
+
XmlParser.stub!(:gets).and_return('Person')
|
99
|
+
@headers = XmlParser.send(:build_headers, @doc).headers
|
100
|
+
XmlParser.stub!(:write_to_file)
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should populate @rows with 5 objects" do
|
104
|
+
XmlParser.send(:traverse_through_each_row, @doc, 'Person')
|
105
|
+
XmlParser.rows.size.should == 5
|
106
|
+
end
|
107
|
+
|
108
|
+
it "row[4] should contain multiple values in email field" do
|
109
|
+
XmlParser.send(:traverse_through_each_row, @doc, 'Person')
|
110
|
+
r = XmlParser.rows[4]
|
111
|
+
r.index("1@test.com|2@test.com").should_not be_nil
|
112
|
+
end
|
113
|
+
|
114
|
+
after(:each) do
|
115
|
+
remove_test_files
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
describe "#populate_missing" do
|
121
|
+
|
122
|
+
context "files have all headers for each record" do
|
123
|
+
before(:each) do
|
124
|
+
load_test_files
|
125
|
+
f = File.open('test_file.xml')
|
126
|
+
@doc = Nokogiri::XML(f)
|
127
|
+
f.close
|
128
|
+
|
129
|
+
XmlParser.stub!(:gets).and_return('Person')
|
130
|
+
end
|
131
|
+
|
132
|
+
it "first_node have a child size of 4 after the method call" do
|
133
|
+
first_node = @doc.css('Person').first
|
134
|
+
XmlParser.send(:populate_missing, first_node)
|
135
|
+
first_node.children.size.should == 4
|
136
|
+
end
|
137
|
+
|
138
|
+
after(:each) do
|
139
|
+
remove_test_files
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
context "One row is missing a header" do
|
144
|
+
before(:each) do
|
145
|
+
load_test_files_with_missing
|
146
|
+
f = File.open('test_file.xml')
|
147
|
+
@doc = Nokogiri::XML(f)
|
148
|
+
f.close
|
149
|
+
XmlParser.stub!(:gets).and_return('Person')
|
150
|
+
end
|
151
|
+
|
152
|
+
it "last_node should have a child size of 3 before the method call" do
|
153
|
+
last_node = @doc.css('Person').last
|
154
|
+
last_node.children.size.should == 3
|
155
|
+
end
|
156
|
+
|
157
|
+
it "last_node should have a child size of 4 after the method call" do
|
158
|
+
last_node = @doc.css('Person').last
|
159
|
+
XmlParser.send(:populate_missing, last_node)
|
160
|
+
last_node.children.size.should == 4
|
161
|
+
end
|
162
|
+
|
163
|
+
after(:each) do
|
164
|
+
remove_test_files
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
data/xmlparse.rb
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module XmlParser
|
5
|
+
|
6
|
+
def init
|
7
|
+
puts 'type xml file name'
|
8
|
+
@xml_name = gets.chomp
|
9
|
+
f = File.open(@xml_name)
|
10
|
+
doc = Nokogiri::XML(f)
|
11
|
+
f.close
|
12
|
+
build_headers(doc)
|
13
|
+
end
|
14
|
+
|
15
|
+
def headers
|
16
|
+
@headers
|
17
|
+
end
|
18
|
+
|
19
|
+
def rows
|
20
|
+
@rows
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def build_headers(doc)
|
26
|
+
@headers = []
|
27
|
+
puts "Enter in node name to traverse. If it's an xml table of <Person> objects, type \"Person\" (without quotes)"
|
28
|
+
node_to_traverse = gets.chomp
|
29
|
+
nodes = doc.css(node_to_traverse)
|
30
|
+
if nodes.size == 0
|
31
|
+
raise "Tag not found"
|
32
|
+
else
|
33
|
+
nodes.each do |loc|
|
34
|
+
loc.children.each do |field|
|
35
|
+
@headers << field.name if field.name != "text" && !@headers.include?(field.name)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
tag_found = true
|
39
|
+
end
|
40
|
+
traverse_through_each_row(doc, node_to_traverse)
|
41
|
+
end
|
42
|
+
|
43
|
+
def traverse_through_each_row(doc, node_to_traverse)
|
44
|
+
@rows = []
|
45
|
+
count = 0
|
46
|
+
doc.css(node_to_traverse).each do |loc|
|
47
|
+
fields = Array.new(@headers.size)
|
48
|
+
loc = populate_missing(loc)
|
49
|
+
loc.children.each do |field|
|
50
|
+
idx = @headers.index(field.name)
|
51
|
+
if field.class == Nokogiri::XML::Element
|
52
|
+
f = []
|
53
|
+
field.children.each do |c|
|
54
|
+
f = recursive_iteration(c, f)
|
55
|
+
end
|
56
|
+
fields[idx] = "#{enclose(f.join('|'))}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
count += 1
|
60
|
+
@rows << fields.join(',')
|
61
|
+
end
|
62
|
+
|
63
|
+
write_to_file
|
64
|
+
end
|
65
|
+
|
66
|
+
def recursive_iteration(c, f)
|
67
|
+
if c.class == Nokogiri::XML::Element
|
68
|
+
if c.children.size > 0
|
69
|
+
c.children.each do |child|
|
70
|
+
f = recursive_iteration(child, f)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
else
|
74
|
+
text = strip(c.text)
|
75
|
+
f << text unless text.empty?
|
76
|
+
end
|
77
|
+
f
|
78
|
+
end
|
79
|
+
|
80
|
+
def write_to_file
|
81
|
+
File.open("#{@xml_name}.csv", 'w+') do |file|
|
82
|
+
file.write(@headers.join(',') + "\n")
|
83
|
+
@rows.each do |r|
|
84
|
+
file.write("#{r}\n")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def enclose(str)
|
90
|
+
if str.index(',')
|
91
|
+
return "\"#{str}\""
|
92
|
+
else
|
93
|
+
return str
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def enclose_and_strip(str)
|
98
|
+
return enclose(strip(str))
|
99
|
+
end
|
100
|
+
|
101
|
+
def strip(str)
|
102
|
+
return str.gsub(/\n|\t/, '').strip
|
103
|
+
end
|
104
|
+
|
105
|
+
def populate_missing(location)
|
106
|
+
h = []
|
107
|
+
location.children.each do |c|
|
108
|
+
h << c.name if c.name != "text"
|
109
|
+
end
|
110
|
+
h.sort
|
111
|
+
if h != @headers
|
112
|
+
h = @headers - h
|
113
|
+
h.each { |mh| location.add_child("<#{mh} />") }
|
114
|
+
end
|
115
|
+
return location
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: x2cs
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '1.0'
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Aaron McLeod
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-09-21 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &70350295279760 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70350295279760
|
25
|
+
description: ''
|
26
|
+
email:
|
27
|
+
- aaron.g.mcleod@gmail.com
|
28
|
+
executables:
|
29
|
+
- x2cs
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- spec/spec_helper.rb
|
34
|
+
- spec/xmlparse_spec.rb
|
35
|
+
- bin/x2cs
|
36
|
+
- README.md
|
37
|
+
- .gitignore
|
38
|
+
- Gemfile
|
39
|
+
- xmlparse.rb
|
40
|
+
homepage: http://github.com/agmcleod/x2c
|
41
|
+
licenses: []
|
42
|
+
post_install_message:
|
43
|
+
rdoc_options: []
|
44
|
+
require_paths:
|
45
|
+
- .
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
requirements: []
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 1.8.7
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: A tool for converting XML data to CSV. Generally meant for entities with
|
64
|
+
fields, as supposed to many nodes deep.
|
65
|
+
test_files: []
|