eatr 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ module Eatr
2
+ VERSION = "0.1.1"
3
+ end
@@ -0,0 +1,87 @@
1
+ require "yaml"
2
+ require "nokogiri"
3
+
4
+ module Eatr
5
+ module Xml
6
+ NodeNotFound = Class.new(StandardError)
7
+
8
+ class Document
9
+ include ParseValue
10
+ extend Forwardable
11
+
12
+ attr_reader :schema
13
+
14
+ def_delegator :schema,
15
+ :transformation_pipeline
16
+
17
+ def initialize(schema_path)
18
+ @schema = Schema.new(YAML.load(File.read(schema_path)))
19
+ end
20
+
21
+ def parse(xml_document_path)
22
+ @namespaces = {}
23
+
24
+ doc = Nokogiri::XML(File.open(xml_document_path)) do |config|
25
+ config.strict.nonet
26
+ end
27
+
28
+ if @schema.remove_namespaces?
29
+ doc.remove_namespaces!
30
+ @namespaces = {}
31
+ else
32
+ @namespaces = doc.collect_namespaces
33
+ end
34
+
35
+ cardinality = @schema.fields.inject(1) do |memo, field|
36
+ if field.node?
37
+ memo * [doc.xpath(field.xpath, @namespaces).count, 1].max
38
+ else
39
+ memo
40
+ end
41
+ end
42
+
43
+ objects = []
44
+
45
+ cardinality.times do |n|
46
+ objects << @schema.to_struct.new
47
+ end
48
+
49
+ @schema.fields.each do |field|
50
+ objects = set_field(objects, doc, field)
51
+ end
52
+
53
+ objects
54
+ end
55
+
56
+ private
57
+
58
+ def set_field(objects, doc, field)
59
+ if field.node?
60
+ doc.xpath(field.xpath, @namespaces).each_with_index do |child_xml, idx|
61
+ field.children.flat_map do |child|
62
+ set_field([objects[idx]], child_xml, child)
63
+ end
64
+ end
65
+ elsif field.name
66
+ objects.each do |o|
67
+ o.public_send("#{field.name}=", value_at(doc, field))
68
+ end
69
+ end
70
+
71
+ objects
72
+ end
73
+
74
+ def value_at(doc, field)
75
+ if field.value
76
+ field.value
77
+ elsif field.xpath
78
+ if node = doc.at_xpath(field.xpath, @namespaces)
79
+ parse_value(field, node.content)
80
+ elsif field.required?
81
+ raise NodeNotFound, "Unable to find '#{field.name}' using xpath '#{field.xpath}'"
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,69 @@
1
+ module Eatr
2
+ module Xml
3
+ class SchemaGenerator
4
+ def initialize(xml_path)
5
+ @xml_path = xml_path
6
+ end
7
+
8
+ def schema(starting_point)
9
+ doc = Nokogiri::XML(File.open(@xml_path)) do |config|
10
+ config.strict.nonet
11
+ end
12
+
13
+ doc.remove_namespaces!
14
+
15
+ fields = doc.at_xpath(starting_point).element_children.flat_map do |child|
16
+ field_def(child)
17
+ end
18
+
19
+ schema = {
20
+ 'name' => '',
21
+ 'remove_namespaces' => true,
22
+ 'fields' => fields
23
+ }
24
+
25
+ YAML.dump(schema)
26
+ end
27
+
28
+ private
29
+
30
+ def field_def(child, name_prefix: '', xpath_relative_to: nil)
31
+ if unique_children_count(child) == 1 && child.element_children.map(&:name).count > 1
32
+ relative_path = Regexp.new(child.element_children.first.path.gsub(/\[\d+\]/, "\\[\\d+\\]"))
33
+ node_path = child.element_children.first.path.gsub(/\[\d+\]/, "")
34
+
35
+ {
36
+ 'node' => name_prefix + underscore(child.name),
37
+ 'xpath' => xpath_relative_to ? child.path.gsub(xpath_relative_to, ".") : node_path,
38
+ 'children' => child.element_children.first.element_children.flat_map do |c|
39
+ field_def(c, name_prefix: "#{underscore(child.name)}_", xpath_relative_to: relative_path)
40
+ end
41
+ }
42
+ elsif unique_children_count(child) >= 1
43
+ child.element_children.flat_map do |c|
44
+ field_def(c, name_prefix: "#{underscore(child.name)}_", xpath_relative_to: xpath_relative_to)
45
+ end
46
+ else
47
+ {
48
+ 'name' => name_prefix + underscore(child.name),
49
+ 'xpath' => xpath_relative_to ? child.path.gsub(xpath_relative_to, ".") : child.path,
50
+ 'type' => 'string',
51
+ 'required' => false
52
+ }
53
+ end
54
+ end
55
+
56
+ def unique_children_count(element)
57
+ element.element_children.map(&:name).uniq.count
58
+ end
59
+
60
+ def underscore(str)
61
+ str.gsub(/::/, '/').
62
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
63
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
64
+ tr("-", "_").
65
+ downcase
66
+ end
67
+ end
68
+ end
69
+ end
data/sample.dot ADDED
@@ -0,0 +1,42 @@
1
+ strict digraph g {
2
+ ranksep="1.6"
3
+ graph [
4
+ rankdir = "LR"
5
+ ];
6
+ node [
7
+ fontsize = "16"
8
+ ];
9
+ edge [
10
+ arrowhead = "none"
11
+ ];
12
+ "books" [shape=none, margin=0, label=<
13
+ <table border="0" cellborder="1" cellspacing="0" cellpadding="4">
14
+ <tr><td bgcolor="lightblue">books</td></tr>
15
+ <tr><td port="id" align="left">id</td></tr>
16
+ <tr><td port="author" align="left">author</td></tr>
17
+ <tr><td port="library_id" align="left">library_id</td></tr>
18
+ <tr><td port="pages" align="left">pages</td></tr>
19
+ <tr><td port="for_sale" align="left">for_sale</td></tr>
20
+ <tr><td port="published_at" align="left">published_at</td></tr>
21
+ <tr><td port="rating" align="left">rating</td></tr>
22
+ <tr><td port="icbn" align="left">icbn</td></tr>
23
+ <tr><td port="summary" align="left">summary</td></tr>
24
+ <tr><td port="age" align="left">age</td></tr>
25
+ </table>>];
26
+ "chapters" [shape=none, margin=0, label=<
27
+ <table border="0" cellborder="1" cellspacing="0" cellpadding="4">
28
+ <tr><td bgcolor="lightblue">chapters</td></tr>
29
+ <tr><td port="book_id" align="left">book_id</td></tr>
30
+ <tr><td port="title" align="left">title</td></tr>
31
+ </table>>];
32
+ "libraries" [shape=none, margin=0, label=<
33
+ <table border="0" cellborder="1" cellspacing="0" cellpadding="4">
34
+ <tr><td bgcolor="lightblue">libraries</td></tr>
35
+ <tr><td port="id" align="left">id</td></tr>
36
+ <tr><td port="book_title" align="left">book_title</td></tr>
37
+ <tr><td port="desk_number" align="left">desk_number</td></tr>
38
+ </table>>];
39
+ "books":"id" -> "chapters":"book_id" [arrowhead="crow"];
40
+ "books":"library_id" -> "libraries":"id" [arrowhead="teeodot"];
41
+ "chapters":"book_id" -> "books":"id" [arrowhead="tee"];
42
+ }
metadata ADDED
@@ -0,0 +1,142 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: eatr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Greggory Rothmeier
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-01-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.13'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.13'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.10'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.10'
83
+ description: Configuration-based document parsing and transformation framework.
84
+ email:
85
+ - greggroth@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".rspec"
92
+ - ".travis.yml"
93
+ - CODE_OF_CONDUCT.md
94
+ - Gemfile
95
+ - LICENSE.txt
96
+ - README.md
97
+ - Rakefile
98
+ - bin/console
99
+ - bin/rspec
100
+ - bin/setup
101
+ - eatr.gemspec
102
+ - lib/eatr.rb
103
+ - lib/eatr/csv/document.rb
104
+ - lib/eatr/dot_generator.rb
105
+ - lib/eatr/dot_template.dot
106
+ - lib/eatr/parse_value.rb
107
+ - lib/eatr/pipeline.rb
108
+ - lib/eatr/pipeline_spec.rb
109
+ - lib/eatr/schema.rb
110
+ - lib/eatr/sql/table_generator.rb
111
+ - lib/eatr/transformation/add_date_id.rb
112
+ - lib/eatr/transformation_set.rb
113
+ - lib/eatr/version.rb
114
+ - lib/eatr/xml/document.rb
115
+ - lib/eatr/xml/schema_generator.rb
116
+ - sample.dot
117
+ homepage: ''
118
+ licenses:
119
+ - MIT
120
+ metadata: {}
121
+ post_install_message:
122
+ rdoc_options: []
123
+ require_paths:
124
+ - lib
125
+ required_ruby_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ required_rubygems_version: !ruby/object:Gem::Requirement
131
+ requirements:
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ version: '0'
135
+ requirements: []
136
+ rubyforge_project:
137
+ rubygems_version: 2.5.1
138
+ signing_key:
139
+ specification_version: 4
140
+ summary: Configuration-based document parsing and transformation framework.
141
+ test_files: []
142
+ has_rdoc: