x2cs 1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ *.XML
2
+ *.xml
3
+ *.csv
4
+ .redcar
5
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source :rubygems
2
+
3
+ group :dev do
4
+ gem 'rspec'
5
+ end
6
+
7
+ gem 'nokogiri'
@@ -0,0 +1,3 @@
1
+ # X2C - An XML data to CSV conversion Tool.
2
+
3
+ _Note that this software is in progress, and is not release mode yet. The bin/run.rb works, but tests are not done. Use at own risk._
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../..', __FILE__))
3
+
4
+ require 'xmlparse.rb'
5
+
6
+ include XmlParser
7
+
8
+ XmlParser::init
@@ -0,0 +1,70 @@
1
+ $LOAD_PATH.unshift(File.expand_path('../..', __FILE__))
2
+ require 'xmlparse.rb'
3
+ include XmlParser
4
+
5
+ class Person
6
+ attr_accessor :name, :email, :phone, :address
7
+
8
+ def initialize(args)
9
+ args.each do |k, v|
10
+ send("#{k}=".to_sym, v)
11
+ end
12
+ end
13
+
14
+ def to_xml
15
+ "<Person><name>#{name}</name><email>#{email}</email><phone>#{phone}</phone><address>#{address}</address></Person>"
16
+ end
17
+ end
18
+
19
+ def xml_header
20
+ '<?xml version="1.0" encoding="utf-8" ?>'
21
+ end
22
+
23
+ def load_test_files
24
+ write_tests_to_file(add_people)
25
+ end
26
+
27
+ def write_tests_to_file(people)
28
+ File.open('test_file.xml', 'w+') do |f|
29
+ f.write("#{xml_header}\n")
30
+ f.write("<people>")
31
+ people.each do |p|
32
+ f.write("#{p.to_xml}\n")
33
+ end
34
+ f.write("</people>")
35
+ end
36
+ end
37
+
38
+ def load_test_files_with_extra
39
+ people = add_people
40
+ people << Person.new(name: 'Jack johnson', email: '<emails><email>1@test.com</email><email>2@test.com</email></emails>',
41
+ phone: '555-233-3333', address: '444 test')
42
+ people
43
+ write_tests_to_file(people)
44
+ end
45
+
46
+ def load_test_files_with_missing
47
+ people = add_people
48
+ File.open('test_file.xml', 'w+') do |f|
49
+ f.write("#{xml_header}\n")
50
+ f.write("<people>")
51
+ people.each do |p|
52
+ f.write("#{p.to_xml}\n")
53
+ end
54
+ f.write("<Person><name>A test name</name><email>something@you.com</email><address>test address</address></Person>\n")
55
+ f.write("</people>")
56
+ end
57
+ end
58
+
59
+ def add_people
60
+ people = []
61
+ people << Person.new(name: 'A person', email: 'test@something.com', phone: '555-233-3333', address: '444 test')
62
+ people << Person.new(name: 'John Smith', email: 'jsmith@fake.com', phone: '555-249-8833', address: '123 fake st')
63
+ people << Person.new(name: 'Jane Doe', email: 'example@something.ca', phone: '232-855-3422', address: '8643 Avenue Lane')
64
+ people << Person.new(name: 'Willis', email: 'nonexistant@example.com', phone: '416-233-7688', address: 'a test address')
65
+ people
66
+ end
67
+
68
+ def remove_test_files
69
+ File.delete('test_file.xml')
70
+ end
@@ -0,0 +1,168 @@
1
+ require 'spec_helper'
2
+
3
+ describe XmlParser do
4
+ describe "#init" do
5
+ before(:each) do
6
+ load_test_files
7
+ XmlParser.stub!(:gets).and_return('test_file.xml')
8
+ XmlParser.stub!(:puts)
9
+ end
10
+
11
+ it "should give a greeting message 'type xml file name'" do
12
+ XmlParser.stub!(:build_headers)
13
+ XmlParser.should_receive(:puts).with('type xml file name')
14
+ XmlParser::init
15
+ end
16
+
17
+ it "should call build_headers" do
18
+ XmlParser.stub!(:build_headers)
19
+ XmlParser.should_receive(:build_headers)
20
+ XmlParser::init
21
+ end
22
+
23
+ after(:each) do
24
+ remove_test_files
25
+ end
26
+ end
27
+
28
+ describe "#build_headers" do
29
+ context "Load test files with 4 headers" do
30
+ before(:each) do
31
+ load_test_files
32
+ XmlParser.stub!(:gets).and_return('Person')
33
+ XmlParser.stub!(:puts)
34
+ f = File.open('test_file.xml')
35
+ @doc = Nokogiri::XML(f)
36
+ f.close
37
+ XmlParser.stub!(:traverse_through_each_row)
38
+ end
39
+
40
+ it "@headers should be of size 4" do
41
+ XmlParser.send(:build_headers, @doc)
42
+ XmlParser.headers.size.should == 4
43
+ end
44
+
45
+ it "@headers should contain name" do
46
+ XmlParser.send(:build_headers, @doc)
47
+ XmlParser.headers.include?('name').should be_true
48
+ end
49
+
50
+ it "@headers should contain email" do
51
+ XmlParser.send(:build_headers, @doc)
52
+ XmlParser.headers.include?('email').should be_true
53
+ end
54
+
55
+ it "@headers should contain address" do
56
+ XmlParser.send(:build_headers, @doc)
57
+ XmlParser.headers.include?('address').should be_true
58
+ end
59
+
60
+ it "@headers should contain phone" do
61
+ XmlParser.send(:build_headers, @doc)
62
+ XmlParser.headers.include?('phone').should be_true
63
+ end
64
+
65
+ after(:each) do
66
+ remove_test_files
67
+ end
68
+ end
69
+
70
+ context "Traversing node is non-existant" do
71
+ before(:each) do
72
+ load_test_files
73
+ XmlParser.stub!(:gets).and_return('Badtagname')
74
+ XmlParser.stub!(:puts)
75
+ f = File.open('test_file.xml')
76
+ @doc = Nokogiri::XML(f)
77
+ f.close
78
+ XmlParser.stub!(:traverse_through_each_row)
79
+ end
80
+
81
+ it "raises an exception if tag is not found" do
82
+ lambda { XmlParser.send(:build_headers, @doc) }.should raise_error(RuntimeError, "Tag not found")
83
+ end
84
+
85
+ after(:each) do
86
+ remove_test_files
87
+ end
88
+ end
89
+ end
90
+
91
+ describe "#traverse_through_each_row" do
92
+ before(:each) do
93
+ load_test_files_with_extra
94
+ f = File.open('test_file.xml')
95
+ @doc = Nokogiri::XML(f)
96
+ f.close
97
+
98
+ XmlParser.stub!(:gets).and_return('Person')
99
+ @headers = XmlParser.send(:build_headers, @doc).headers
100
+ XmlParser.stub!(:write_to_file)
101
+ end
102
+
103
+ it "should populate @rows with 5 objects" do
104
+ XmlParser.send(:traverse_through_each_row, @doc, 'Person')
105
+ XmlParser.rows.size.should == 5
106
+ end
107
+
108
+ it "row[4] should contain multiple values in email field" do
109
+ XmlParser.send(:traverse_through_each_row, @doc, 'Person')
110
+ r = XmlParser.rows[4]
111
+ r.index("1@test.com|2@test.com").should_not be_nil
112
+ end
113
+
114
+ after(:each) do
115
+ remove_test_files
116
+ end
117
+ end
118
+
119
+
120
+ describe "#populate_missing" do
121
+
122
+ context "files have all headers for each record" do
123
+ before(:each) do
124
+ load_test_files
125
+ f = File.open('test_file.xml')
126
+ @doc = Nokogiri::XML(f)
127
+ f.close
128
+
129
+ XmlParser.stub!(:gets).and_return('Person')
130
+ end
131
+
132
+ it "first_node have a child size of 4 after the method call" do
133
+ first_node = @doc.css('Person').first
134
+ XmlParser.send(:populate_missing, first_node)
135
+ first_node.children.size.should == 4
136
+ end
137
+
138
+ after(:each) do
139
+ remove_test_files
140
+ end
141
+ end
142
+
143
+ context "One row is missing a header" do
144
+ before(:each) do
145
+ load_test_files_with_missing
146
+ f = File.open('test_file.xml')
147
+ @doc = Nokogiri::XML(f)
148
+ f.close
149
+ XmlParser.stub!(:gets).and_return('Person')
150
+ end
151
+
152
+ it "last_node should have a child size of 3 before the method call" do
153
+ last_node = @doc.css('Person').last
154
+ last_node.children.size.should == 3
155
+ end
156
+
157
+ it "last_node should have a child size of 4 after the method call" do
158
+ last_node = @doc.css('Person').last
159
+ XmlParser.send(:populate_missing, last_node)
160
+ last_node.children.size.should == 4
161
+ end
162
+
163
+ after(:each) do
164
+ remove_test_files
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,119 @@
1
+ require 'rubygems'
2
+ require 'nokogiri'
3
+
4
+ module XmlParser
5
+
6
+ def init
7
+ puts 'type xml file name'
8
+ @xml_name = gets.chomp
9
+ f = File.open(@xml_name)
10
+ doc = Nokogiri::XML(f)
11
+ f.close
12
+ build_headers(doc)
13
+ end
14
+
15
+ def headers
16
+ @headers
17
+ end
18
+
19
+ def rows
20
+ @rows
21
+ end
22
+
23
+ private
24
+
25
+ def build_headers(doc)
26
+ @headers = []
27
+ puts "Enter in node name to traverse. If it's an xml table of <Person> objects, type \"Person\" (without quotes)"
28
+ node_to_traverse = gets.chomp
29
+ nodes = doc.css(node_to_traverse)
30
+ if nodes.size == 0
31
+ raise "Tag not found"
32
+ else
33
+ nodes.each do |loc|
34
+ loc.children.each do |field|
35
+ @headers << field.name if field.name != "text" && !@headers.include?(field.name)
36
+ end
37
+ end
38
+ tag_found = true
39
+ end
40
+ traverse_through_each_row(doc, node_to_traverse)
41
+ end
42
+
43
+ def traverse_through_each_row(doc, node_to_traverse)
44
+ @rows = []
45
+ count = 0
46
+ doc.css(node_to_traverse).each do |loc|
47
+ fields = Array.new(@headers.size)
48
+ loc = populate_missing(loc)
49
+ loc.children.each do |field|
50
+ idx = @headers.index(field.name)
51
+ if field.class == Nokogiri::XML::Element
52
+ f = []
53
+ field.children.each do |c|
54
+ f = recursive_iteration(c, f)
55
+ end
56
+ fields[idx] = "#{enclose(f.join('|'))}"
57
+ end
58
+ end
59
+ count += 1
60
+ @rows << fields.join(',')
61
+ end
62
+
63
+ write_to_file
64
+ end
65
+
66
+ def recursive_iteration(c, f)
67
+ if c.class == Nokogiri::XML::Element
68
+ if c.children.size > 0
69
+ c.children.each do |child|
70
+ f = recursive_iteration(child, f)
71
+ end
72
+ end
73
+ else
74
+ text = strip(c.text)
75
+ f << text unless text.empty?
76
+ end
77
+ f
78
+ end
79
+
80
+ def write_to_file
81
+ File.open("#{@xml_name}.csv", 'w+') do |file|
82
+ file.write(@headers.join(',') + "\n")
83
+ @rows.each do |r|
84
+ file.write("#{r}\n")
85
+ end
86
+ end
87
+ end
88
+
89
+ def enclose(str)
90
+ if str.index(',')
91
+ return "\"#{str}\""
92
+ else
93
+ return str
94
+ end
95
+ end
96
+
97
+ def enclose_and_strip(str)
98
+ return enclose(strip(str))
99
+ end
100
+
101
+ def strip(str)
102
+ return str.gsub(/\n|\t/, '').strip
103
+ end
104
+
105
+ def populate_missing(location)
106
+ h = []
107
+ location.children.each do |c|
108
+ h << c.name if c.name != "text"
109
+ end
110
+ h.sort
111
+ if h != @headers
112
+ h = @headers - h
113
+ h.each { |mh| location.add_child("<#{mh} />") }
114
+ end
115
+ return location
116
+ end
117
+
118
+
119
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: x2cs
3
+ version: !ruby/object:Gem::Version
4
+ version: '1.0'
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Aaron McLeod
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-09-21 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &70350295279760 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70350295279760
25
+ description: ''
26
+ email:
27
+ - aaron.g.mcleod@gmail.com
28
+ executables:
29
+ - x2cs
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - spec/spec_helper.rb
34
+ - spec/xmlparse_spec.rb
35
+ - bin/x2cs
36
+ - README.md
37
+ - .gitignore
38
+ - Gemfile
39
+ - xmlparse.rb
40
+ homepage: http://github.com/agmcleod/x2c
41
+ licenses: []
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - .
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 1.8.7
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: A tool for converting XML data to CSV. Generally meant for entities with
64
+ fields, as supposed to many nodes deep.
65
+ test_files: []