x2cs 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/Gemfile +7 -0
- data/README.md +3 -0
- data/bin/x2cs +8 -0
- data/spec/spec_helper.rb +70 -0
- data/spec/xmlparse_spec.rb +168 -0
- data/xmlparse.rb +119 -0
- metadata +65 -0
data/Gemfile
ADDED
data/README.md
ADDED
data/bin/x2cs
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.expand_path('../..', __FILE__))
|
2
|
+
require 'xmlparse.rb'
|
3
|
+
include XmlParser
|
4
|
+
|
5
|
+
class Person
|
6
|
+
attr_accessor :name, :email, :phone, :address
|
7
|
+
|
8
|
+
def initialize(args)
|
9
|
+
args.each do |k, v|
|
10
|
+
send("#{k}=".to_sym, v)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_xml
|
15
|
+
"<Person><name>#{name}</name><email>#{email}</email><phone>#{phone}</phone><address>#{address}</address></Person>"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def xml_header
|
20
|
+
'<?xml version="1.0" encoding="utf-8" ?>'
|
21
|
+
end
|
22
|
+
|
23
|
+
def load_test_files
|
24
|
+
write_tests_to_file(add_people)
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_tests_to_file(people)
|
28
|
+
File.open('test_file.xml', 'w+') do |f|
|
29
|
+
f.write("#{xml_header}\n")
|
30
|
+
f.write("<people>")
|
31
|
+
people.each do |p|
|
32
|
+
f.write("#{p.to_xml}\n")
|
33
|
+
end
|
34
|
+
f.write("</people>")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def load_test_files_with_extra
|
39
|
+
people = add_people
|
40
|
+
people << Person.new(name: 'Jack johnson', email: '<emails><email>1@test.com</email><email>2@test.com</email></emails>',
|
41
|
+
phone: '555-233-3333', address: '444 test')
|
42
|
+
people
|
43
|
+
write_tests_to_file(people)
|
44
|
+
end
|
45
|
+
|
46
|
+
def load_test_files_with_missing
|
47
|
+
people = add_people
|
48
|
+
File.open('test_file.xml', 'w+') do |f|
|
49
|
+
f.write("#{xml_header}\n")
|
50
|
+
f.write("<people>")
|
51
|
+
people.each do |p|
|
52
|
+
f.write("#{p.to_xml}\n")
|
53
|
+
end
|
54
|
+
f.write("<Person><name>A test name</name><email>something@you.com</email><address>test address</address></Person>\n")
|
55
|
+
f.write("</people>")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def add_people
|
60
|
+
people = []
|
61
|
+
people << Person.new(name: 'A person', email: 'test@something.com', phone: '555-233-3333', address: '444 test')
|
62
|
+
people << Person.new(name: 'John Smith', email: 'jsmith@fake.com', phone: '555-249-8833', address: '123 fake st')
|
63
|
+
people << Person.new(name: 'Jane Doe', email: 'example@something.ca', phone: '232-855-3422', address: '8643 Avenue Lane')
|
64
|
+
people << Person.new(name: 'Willis', email: 'nonexistant@example.com', phone: '416-233-7688', address: 'a test address')
|
65
|
+
people
|
66
|
+
end
|
67
|
+
|
68
|
+
def remove_test_files
|
69
|
+
File.delete('test_file.xml')
|
70
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe XmlParser do
|
4
|
+
describe "#init" do
|
5
|
+
before(:each) do
|
6
|
+
load_test_files
|
7
|
+
XmlParser.stub!(:gets).and_return('test_file.xml')
|
8
|
+
XmlParser.stub!(:puts)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should give a greeting message 'type xml file name'" do
|
12
|
+
XmlParser.stub!(:build_headers)
|
13
|
+
XmlParser.should_receive(:puts).with('type xml file name')
|
14
|
+
XmlParser::init
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should call build_headers" do
|
18
|
+
XmlParser.stub!(:build_headers)
|
19
|
+
XmlParser.should_receive(:build_headers)
|
20
|
+
XmlParser::init
|
21
|
+
end
|
22
|
+
|
23
|
+
after(:each) do
|
24
|
+
remove_test_files
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "#build_headers" do
|
29
|
+
context "Load test files with 4 headers" do
|
30
|
+
before(:each) do
|
31
|
+
load_test_files
|
32
|
+
XmlParser.stub!(:gets).and_return('Person')
|
33
|
+
XmlParser.stub!(:puts)
|
34
|
+
f = File.open('test_file.xml')
|
35
|
+
@doc = Nokogiri::XML(f)
|
36
|
+
f.close
|
37
|
+
XmlParser.stub!(:traverse_through_each_row)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "@headers should be of size 4" do
|
41
|
+
XmlParser.send(:build_headers, @doc)
|
42
|
+
XmlParser.headers.size.should == 4
|
43
|
+
end
|
44
|
+
|
45
|
+
it "@headers should contain name" do
|
46
|
+
XmlParser.send(:build_headers, @doc)
|
47
|
+
XmlParser.headers.include?('name').should be_true
|
48
|
+
end
|
49
|
+
|
50
|
+
it "@headers should contain email" do
|
51
|
+
XmlParser.send(:build_headers, @doc)
|
52
|
+
XmlParser.headers.include?('email').should be_true
|
53
|
+
end
|
54
|
+
|
55
|
+
it "@headers should contain address" do
|
56
|
+
XmlParser.send(:build_headers, @doc)
|
57
|
+
XmlParser.headers.include?('address').should be_true
|
58
|
+
end
|
59
|
+
|
60
|
+
it "@headers should contain phone" do
|
61
|
+
XmlParser.send(:build_headers, @doc)
|
62
|
+
XmlParser.headers.include?('phone').should be_true
|
63
|
+
end
|
64
|
+
|
65
|
+
after(:each) do
|
66
|
+
remove_test_files
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
context "Traversing node is non-existant" do
|
71
|
+
before(:each) do
|
72
|
+
load_test_files
|
73
|
+
XmlParser.stub!(:gets).and_return('Badtagname')
|
74
|
+
XmlParser.stub!(:puts)
|
75
|
+
f = File.open('test_file.xml')
|
76
|
+
@doc = Nokogiri::XML(f)
|
77
|
+
f.close
|
78
|
+
XmlParser.stub!(:traverse_through_each_row)
|
79
|
+
end
|
80
|
+
|
81
|
+
it "raises an exception if tag is not found" do
|
82
|
+
lambda { XmlParser.send(:build_headers, @doc) }.should raise_error(RuntimeError, "Tag not found")
|
83
|
+
end
|
84
|
+
|
85
|
+
after(:each) do
|
86
|
+
remove_test_files
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
describe "#traverse_through_each_row" do
|
92
|
+
before(:each) do
|
93
|
+
load_test_files_with_extra
|
94
|
+
f = File.open('test_file.xml')
|
95
|
+
@doc = Nokogiri::XML(f)
|
96
|
+
f.close
|
97
|
+
|
98
|
+
XmlParser.stub!(:gets).and_return('Person')
|
99
|
+
@headers = XmlParser.send(:build_headers, @doc).headers
|
100
|
+
XmlParser.stub!(:write_to_file)
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should populate @rows with 5 objects" do
|
104
|
+
XmlParser.send(:traverse_through_each_row, @doc, 'Person')
|
105
|
+
XmlParser.rows.size.should == 5
|
106
|
+
end
|
107
|
+
|
108
|
+
it "row[4] should contain multiple values in email field" do
|
109
|
+
XmlParser.send(:traverse_through_each_row, @doc, 'Person')
|
110
|
+
r = XmlParser.rows[4]
|
111
|
+
r.index("1@test.com|2@test.com").should_not be_nil
|
112
|
+
end
|
113
|
+
|
114
|
+
after(:each) do
|
115
|
+
remove_test_files
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
describe "#populate_missing" do
|
121
|
+
|
122
|
+
context "files have all headers for each record" do
|
123
|
+
before(:each) do
|
124
|
+
load_test_files
|
125
|
+
f = File.open('test_file.xml')
|
126
|
+
@doc = Nokogiri::XML(f)
|
127
|
+
f.close
|
128
|
+
|
129
|
+
XmlParser.stub!(:gets).and_return('Person')
|
130
|
+
end
|
131
|
+
|
132
|
+
it "first_node have a child size of 4 after the method call" do
|
133
|
+
first_node = @doc.css('Person').first
|
134
|
+
XmlParser.send(:populate_missing, first_node)
|
135
|
+
first_node.children.size.should == 4
|
136
|
+
end
|
137
|
+
|
138
|
+
after(:each) do
|
139
|
+
remove_test_files
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
context "One row is missing a header" do
|
144
|
+
before(:each) do
|
145
|
+
load_test_files_with_missing
|
146
|
+
f = File.open('test_file.xml')
|
147
|
+
@doc = Nokogiri::XML(f)
|
148
|
+
f.close
|
149
|
+
XmlParser.stub!(:gets).and_return('Person')
|
150
|
+
end
|
151
|
+
|
152
|
+
it "last_node should have a child size of 3 before the method call" do
|
153
|
+
last_node = @doc.css('Person').last
|
154
|
+
last_node.children.size.should == 3
|
155
|
+
end
|
156
|
+
|
157
|
+
it "last_node should have a child size of 4 after the method call" do
|
158
|
+
last_node = @doc.css('Person').last
|
159
|
+
XmlParser.send(:populate_missing, last_node)
|
160
|
+
last_node.children.size.should == 4
|
161
|
+
end
|
162
|
+
|
163
|
+
after(:each) do
|
164
|
+
remove_test_files
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
data/xmlparse.rb
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module XmlParser
|
5
|
+
|
6
|
+
def init
|
7
|
+
puts 'type xml file name'
|
8
|
+
@xml_name = gets.chomp
|
9
|
+
f = File.open(@xml_name)
|
10
|
+
doc = Nokogiri::XML(f)
|
11
|
+
f.close
|
12
|
+
build_headers(doc)
|
13
|
+
end
|
14
|
+
|
15
|
+
def headers
|
16
|
+
@headers
|
17
|
+
end
|
18
|
+
|
19
|
+
def rows
|
20
|
+
@rows
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def build_headers(doc)
|
26
|
+
@headers = []
|
27
|
+
puts "Enter in node name to traverse. If it's an xml table of <Person> objects, type \"Person\" (without quotes)"
|
28
|
+
node_to_traverse = gets.chomp
|
29
|
+
nodes = doc.css(node_to_traverse)
|
30
|
+
if nodes.size == 0
|
31
|
+
raise "Tag not found"
|
32
|
+
else
|
33
|
+
nodes.each do |loc|
|
34
|
+
loc.children.each do |field|
|
35
|
+
@headers << field.name if field.name != "text" && !@headers.include?(field.name)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
tag_found = true
|
39
|
+
end
|
40
|
+
traverse_through_each_row(doc, node_to_traverse)
|
41
|
+
end
|
42
|
+
|
43
|
+
def traverse_through_each_row(doc, node_to_traverse)
|
44
|
+
@rows = []
|
45
|
+
count = 0
|
46
|
+
doc.css(node_to_traverse).each do |loc|
|
47
|
+
fields = Array.new(@headers.size)
|
48
|
+
loc = populate_missing(loc)
|
49
|
+
loc.children.each do |field|
|
50
|
+
idx = @headers.index(field.name)
|
51
|
+
if field.class == Nokogiri::XML::Element
|
52
|
+
f = []
|
53
|
+
field.children.each do |c|
|
54
|
+
f = recursive_iteration(c, f)
|
55
|
+
end
|
56
|
+
fields[idx] = "#{enclose(f.join('|'))}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
count += 1
|
60
|
+
@rows << fields.join(',')
|
61
|
+
end
|
62
|
+
|
63
|
+
write_to_file
|
64
|
+
end
|
65
|
+
|
66
|
+
def recursive_iteration(c, f)
|
67
|
+
if c.class == Nokogiri::XML::Element
|
68
|
+
if c.children.size > 0
|
69
|
+
c.children.each do |child|
|
70
|
+
f = recursive_iteration(child, f)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
else
|
74
|
+
text = strip(c.text)
|
75
|
+
f << text unless text.empty?
|
76
|
+
end
|
77
|
+
f
|
78
|
+
end
|
79
|
+
|
80
|
+
def write_to_file
|
81
|
+
File.open("#{@xml_name}.csv", 'w+') do |file|
|
82
|
+
file.write(@headers.join(',') + "\n")
|
83
|
+
@rows.each do |r|
|
84
|
+
file.write("#{r}\n")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def enclose(str)
|
90
|
+
if str.index(',')
|
91
|
+
return "\"#{str}\""
|
92
|
+
else
|
93
|
+
return str
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def enclose_and_strip(str)
|
98
|
+
return enclose(strip(str))
|
99
|
+
end
|
100
|
+
|
101
|
+
def strip(str)
|
102
|
+
return str.gsub(/\n|\t/, '').strip
|
103
|
+
end
|
104
|
+
|
105
|
+
def populate_missing(location)
|
106
|
+
h = []
|
107
|
+
location.children.each do |c|
|
108
|
+
h << c.name if c.name != "text"
|
109
|
+
end
|
110
|
+
h.sort
|
111
|
+
if h != @headers
|
112
|
+
h = @headers - h
|
113
|
+
h.each { |mh| location.add_child("<#{mh} />") }
|
114
|
+
end
|
115
|
+
return location
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: x2cs
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '1.0'
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Aaron McLeod
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-09-21 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &70350295279760 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70350295279760
|
25
|
+
description: ''
|
26
|
+
email:
|
27
|
+
- aaron.g.mcleod@gmail.com
|
28
|
+
executables:
|
29
|
+
- x2cs
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- spec/spec_helper.rb
|
34
|
+
- spec/xmlparse_spec.rb
|
35
|
+
- bin/x2cs
|
36
|
+
- README.md
|
37
|
+
- .gitignore
|
38
|
+
- Gemfile
|
39
|
+
- xmlparse.rb
|
40
|
+
homepage: http://github.com/agmcleod/x2c
|
41
|
+
licenses: []
|
42
|
+
post_install_message:
|
43
|
+
rdoc_options: []
|
44
|
+
require_paths:
|
45
|
+
- .
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
requirements: []
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 1.8.7
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: A tool for converting XML data to CSV. Generally meant for entities with
|
64
|
+
fields, as supposed to many nodes deep.
|
65
|
+
test_files: []
|