x2cs 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ *.XML
2
+ *.xml
3
+ *.csv
4
+ .redcar
5
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source :rubygems
2
+
3
+ group :dev do
4
+ gem 'rspec'
5
+ end
6
+
7
+ gem 'nokogiri'
@@ -0,0 +1,3 @@
1
+ # X2C - An XML data to CSV conversion Tool.
2
+
3
+ _Note that this software is in progress, and is not release mode yet. The bin/run.rb works, but tests are not done. Use at own risk._
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../..', __FILE__))
3
+
4
+ require 'xmlparse.rb'
5
+
6
+ include XmlParser
7
+
8
+ XmlParser::init
@@ -0,0 +1,70 @@
1
+ $LOAD_PATH.unshift(File.expand_path('../..', __FILE__))
2
+ require 'xmlparse.rb'
3
+ include XmlParser
4
+
5
+ class Person
6
+ attr_accessor :name, :email, :phone, :address
7
+
8
+ def initialize(args)
9
+ args.each do |k, v|
10
+ send("#{k}=".to_sym, v)
11
+ end
12
+ end
13
+
14
+ def to_xml
15
+ "<Person><name>#{name}</name><email>#{email}</email><phone>#{phone}</phone><address>#{address}</address></Person>"
16
+ end
17
+ end
18
+
19
+ def xml_header
20
+ '<?xml version="1.0" encoding="utf-8" ?>'
21
+ end
22
+
23
+ def load_test_files
24
+ write_tests_to_file(add_people)
25
+ end
26
+
27
+ def write_tests_to_file(people)
28
+ File.open('test_file.xml', 'w+') do |f|
29
+ f.write("#{xml_header}\n")
30
+ f.write("<people>")
31
+ people.each do |p|
32
+ f.write("#{p.to_xml}\n")
33
+ end
34
+ f.write("</people>")
35
+ end
36
+ end
37
+
38
+ def load_test_files_with_extra
39
+ people = add_people
40
+ people << Person.new(name: 'Jack johnson', email: '<emails><email>1@test.com</email><email>2@test.com</email></emails>',
41
+ phone: '555-233-3333', address: '444 test')
42
+ people
43
+ write_tests_to_file(people)
44
+ end
45
+
46
+ def load_test_files_with_missing
47
+ people = add_people
48
+ File.open('test_file.xml', 'w+') do |f|
49
+ f.write("#{xml_header}\n")
50
+ f.write("<people>")
51
+ people.each do |p|
52
+ f.write("#{p.to_xml}\n")
53
+ end
54
+ f.write("<Person><name>A test name</name><email>something@you.com</email><address>test address</address></Person>\n")
55
+ f.write("</people>")
56
+ end
57
+ end
58
+
59
+ def add_people
60
+ people = []
61
+ people << Person.new(name: 'A person', email: 'test@something.com', phone: '555-233-3333', address: '444 test')
62
+ people << Person.new(name: 'John Smith', email: 'jsmith@fake.com', phone: '555-249-8833', address: '123 fake st')
63
+ people << Person.new(name: 'Jane Doe', email: 'example@something.ca', phone: '232-855-3422', address: '8643 Avenue Lane')
64
+ people << Person.new(name: 'Willis', email: 'nonexistant@example.com', phone: '416-233-7688', address: 'a test address')
65
+ people
66
+ end
67
+
68
+ def remove_test_files
69
+ File.delete('test_file.xml')
70
+ end
@@ -0,0 +1,168 @@
1
+ require 'spec_helper'
2
+
3
+ describe XmlParser do
4
+ describe "#init" do
5
+ before(:each) do
6
+ load_test_files
7
+ XmlParser.stub!(:gets).and_return('test_file.xml')
8
+ XmlParser.stub!(:puts)
9
+ end
10
+
11
+ it "should give a greeting message 'type xml file name'" do
12
+ XmlParser.stub!(:build_headers)
13
+ XmlParser.should_receive(:puts).with('type xml file name')
14
+ XmlParser::init
15
+ end
16
+
17
+ it "should call build_headers" do
18
+ XmlParser.stub!(:build_headers)
19
+ XmlParser.should_receive(:build_headers)
20
+ XmlParser::init
21
+ end
22
+
23
+ after(:each) do
24
+ remove_test_files
25
+ end
26
+ end
27
+
28
+ describe "#build_headers" do
29
+ context "Load test files with 4 headers" do
30
+ before(:each) do
31
+ load_test_files
32
+ XmlParser.stub!(:gets).and_return('Person')
33
+ XmlParser.stub!(:puts)
34
+ f = File.open('test_file.xml')
35
+ @doc = Nokogiri::XML(f)
36
+ f.close
37
+ XmlParser.stub!(:traverse_through_each_row)
38
+ end
39
+
40
+ it "@headers should be of size 4" do
41
+ XmlParser.send(:build_headers, @doc)
42
+ XmlParser.headers.size.should == 4
43
+ end
44
+
45
+ it "@headers should contain name" do
46
+ XmlParser.send(:build_headers, @doc)
47
+ XmlParser.headers.include?('name').should be_true
48
+ end
49
+
50
+ it "@headers should contain email" do
51
+ XmlParser.send(:build_headers, @doc)
52
+ XmlParser.headers.include?('email').should be_true
53
+ end
54
+
55
+ it "@headers should contain address" do
56
+ XmlParser.send(:build_headers, @doc)
57
+ XmlParser.headers.include?('address').should be_true
58
+ end
59
+
60
+ it "@headers should contain phone" do
61
+ XmlParser.send(:build_headers, @doc)
62
+ XmlParser.headers.include?('phone').should be_true
63
+ end
64
+
65
+ after(:each) do
66
+ remove_test_files
67
+ end
68
+ end
69
+
70
+ context "Traversing node is non-existant" do
71
+ before(:each) do
72
+ load_test_files
73
+ XmlParser.stub!(:gets).and_return('Badtagname')
74
+ XmlParser.stub!(:puts)
75
+ f = File.open('test_file.xml')
76
+ @doc = Nokogiri::XML(f)
77
+ f.close
78
+ XmlParser.stub!(:traverse_through_each_row)
79
+ end
80
+
81
+ it "raises an exception if tag is not found" do
82
+ lambda { XmlParser.send(:build_headers, @doc) }.should raise_error(RuntimeError, "Tag not found")
83
+ end
84
+
85
+ after(:each) do
86
+ remove_test_files
87
+ end
88
+ end
89
+ end
90
+
91
+ describe "#traverse_through_each_row" do
92
+ before(:each) do
93
+ load_test_files_with_extra
94
+ f = File.open('test_file.xml')
95
+ @doc = Nokogiri::XML(f)
96
+ f.close
97
+
98
+ XmlParser.stub!(:gets).and_return('Person')
99
+ @headers = XmlParser.send(:build_headers, @doc).headers
100
+ XmlParser.stub!(:write_to_file)
101
+ end
102
+
103
+ it "should populate @rows with 5 objects" do
104
+ XmlParser.send(:traverse_through_each_row, @doc, 'Person')
105
+ XmlParser.rows.size.should == 5
106
+ end
107
+
108
+ it "row[4] should contain multiple values in email field" do
109
+ XmlParser.send(:traverse_through_each_row, @doc, 'Person')
110
+ r = XmlParser.rows[4]
111
+ r.index("1@test.com|2@test.com").should_not be_nil
112
+ end
113
+
114
+ after(:each) do
115
+ remove_test_files
116
+ end
117
+ end
118
+
119
+
120
+ describe "#populate_missing" do
121
+
122
+ context "files have all headers for each record" do
123
+ before(:each) do
124
+ load_test_files
125
+ f = File.open('test_file.xml')
126
+ @doc = Nokogiri::XML(f)
127
+ f.close
128
+
129
+ XmlParser.stub!(:gets).and_return('Person')
130
+ end
131
+
132
+ it "first_node have a child size of 4 after the method call" do
133
+ first_node = @doc.css('Person').first
134
+ XmlParser.send(:populate_missing, first_node)
135
+ first_node.children.size.should == 4
136
+ end
137
+
138
+ after(:each) do
139
+ remove_test_files
140
+ end
141
+ end
142
+
143
+ context "One row is missing a header" do
144
+ before(:each) do
145
+ load_test_files_with_missing
146
+ f = File.open('test_file.xml')
147
+ @doc = Nokogiri::XML(f)
148
+ f.close
149
+ XmlParser.stub!(:gets).and_return('Person')
150
+ end
151
+
152
+ it "last_node should have a child size of 3 before the method call" do
153
+ last_node = @doc.css('Person').last
154
+ last_node.children.size.should == 3
155
+ end
156
+
157
+ it "last_node should have a child size of 4 after the method call" do
158
+ last_node = @doc.css('Person').last
159
+ XmlParser.send(:populate_missing, last_node)
160
+ last_node.children.size.should == 4
161
+ end
162
+
163
+ after(:each) do
164
+ remove_test_files
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,119 @@
1
+ require 'rubygems'
2
+ require 'nokogiri'
3
+
4
+ module XmlParser
5
+
6
+ def init
7
+ puts 'type xml file name'
8
+ @xml_name = gets.chomp
9
+ f = File.open(@xml_name)
10
+ doc = Nokogiri::XML(f)
11
+ f.close
12
+ build_headers(doc)
13
+ end
14
+
15
+ def headers
16
+ @headers
17
+ end
18
+
19
+ def rows
20
+ @rows
21
+ end
22
+
23
+ private
24
+
25
+ def build_headers(doc)
26
+ @headers = []
27
+ puts "Enter in node name to traverse. If it's an xml table of <Person> objects, type \"Person\" (without quotes)"
28
+ node_to_traverse = gets.chomp
29
+ nodes = doc.css(node_to_traverse)
30
+ if nodes.size == 0
31
+ raise "Tag not found"
32
+ else
33
+ nodes.each do |loc|
34
+ loc.children.each do |field|
35
+ @headers << field.name if field.name != "text" && !@headers.include?(field.name)
36
+ end
37
+ end
38
+ tag_found = true
39
+ end
40
+ traverse_through_each_row(doc, node_to_traverse)
41
+ end
42
+
43
+ def traverse_through_each_row(doc, node_to_traverse)
44
+ @rows = []
45
+ count = 0
46
+ doc.css(node_to_traverse).each do |loc|
47
+ fields = Array.new(@headers.size)
48
+ loc = populate_missing(loc)
49
+ loc.children.each do |field|
50
+ idx = @headers.index(field.name)
51
+ if field.class == Nokogiri::XML::Element
52
+ f = []
53
+ field.children.each do |c|
54
+ f = recursive_iteration(c, f)
55
+ end
56
+ fields[idx] = "#{enclose(f.join('|'))}"
57
+ end
58
+ end
59
+ count += 1
60
+ @rows << fields.join(',')
61
+ end
62
+
63
+ write_to_file
64
+ end
65
+
66
+ def recursive_iteration(c, f)
67
+ if c.class == Nokogiri::XML::Element
68
+ if c.children.size > 0
69
+ c.children.each do |child|
70
+ f = recursive_iteration(child, f)
71
+ end
72
+ end
73
+ else
74
+ text = strip(c.text)
75
+ f << text unless text.empty?
76
+ end
77
+ f
78
+ end
79
+
80
+ def write_to_file
81
+ File.open("#{@xml_name}.csv", 'w+') do |file|
82
+ file.write(@headers.join(',') + "\n")
83
+ @rows.each do |r|
84
+ file.write("#{r}\n")
85
+ end
86
+ end
87
+ end
88
+
89
+ def enclose(str)
90
+ if str.index(',')
91
+ return "\"#{str}\""
92
+ else
93
+ return str
94
+ end
95
+ end
96
+
97
+ def enclose_and_strip(str)
98
+ return enclose(strip(str))
99
+ end
100
+
101
+ def strip(str)
102
+ return str.gsub(/\n|\t/, '').strip
103
+ end
104
+
105
+ def populate_missing(location)
106
+ h = []
107
+ location.children.each do |c|
108
+ h << c.name if c.name != "text"
109
+ end
110
+ h.sort
111
+ if h != @headers
112
+ h = @headers - h
113
+ h.each { |mh| location.add_child("<#{mh} />") }
114
+ end
115
+ return location
116
+ end
117
+
118
+
119
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: x2cs
3
+ version: !ruby/object:Gem::Version
4
+ version: '1.0'
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Aaron McLeod
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-09-21 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &70350295279760 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70350295279760
25
+ description: ''
26
+ email:
27
+ - aaron.g.mcleod@gmail.com
28
+ executables:
29
+ - x2cs
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - spec/spec_helper.rb
34
+ - spec/xmlparse_spec.rb
35
+ - bin/x2cs
36
+ - README.md
37
+ - .gitignore
38
+ - Gemfile
39
+ - xmlparse.rb
40
+ homepage: http://github.com/agmcleod/x2c
41
+ licenses: []
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - .
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 1.8.7
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: A tool for converting XML data to CSV. Generally meant for entities with
64
+ fields, as supposed to many nodes deep.
65
+ test_files: []