sax_stream 0.3.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown CHANGED
@@ -9,7 +9,7 @@ The two main goals of this process are:
9
9
  1. To avoid loading the entire XML file stream into memory at once.
10
10
  2. To avoid loading all the mapped objects into memory simultaneously.
11
11
 
12
- This is currently only for XML importing. Supporting exporting too would be nice if I need it.
12
+ This library handles both importing and exporting, but at present no steps have been taken to conserve memory when exporting XML. Using this library for export is comparable to ROXML or Happymapper (and is heavily based on ROXML's implementation).
13
13
 
14
14
  ## Status
15
15
 
@@ -68,7 +68,7 @@ class UnusualDate
68
68
  end
69
69
  ```
70
70
 
71
- ### Run the parser
71
+ ### Importing XML: Run the parser
72
72
 
73
73
  The parser object must be supplied with a collector and an array of mapping classes to use.
74
74
 
@@ -90,6 +90,10 @@ To get the full benefits of this library, supply a collector which does somethin
90
90
 
91
91
  I plan to supply a batching collector which will collect a certain number of objects before passing them off to another collector you supply, so you can save objects in batches of 100 or whatever is optimal for your application.
92
92
 
93
+ ### Exporting XML: Call to_xml
94
+
95
+ No parser or collector object is used to export XML. Simply call to_xml on the root object. The exporter will expect data to be in the same place it was imported to. It will look in attributes for data for this object, and relations to find associated objects. If you defined custom setters on your object to manipulate the values, then you may need to also supply custom getters.
96
+
93
97
  ## Credits
94
98
 
95
99
  Author: [Craig Ambrose](http://www.craigambrose.com)
@@ -0,0 +1,101 @@
1
+ # Taken from the "Thor" gem.
2
+
3
+ module SaxStream
4
+ module CoreExtensions
5
+ if RUBY_VERSION >= '1.9'
6
+ class OrderedHash < ::Hash
7
+ end
8
+ else
9
+ # This class is based on the Ruby 1.9 ordered hashes.
10
+ #
11
+ # It keeps the semantics and most of the efficiency of normal hashes
12
+ # while also keeping track of the order in which elements were set.
13
+ #
14
+ class OrderedHash #:nodoc:
15
+ include Enumerable
16
+
17
+ Node = Struct.new(:key, :value, :next, :prev)
18
+
19
+ def initialize
20
+ @hash = {}
21
+ end
22
+
23
+ def [](key)
24
+ @hash[key] && @hash[key].value
25
+ end
26
+
27
+ def []=(key, value)
28
+ if node = @hash[key]
29
+ node.value = value
30
+ else
31
+ node = Node.new(key, value)
32
+
33
+ if @first.nil?
34
+ @first = @last = node
35
+ else
36
+ node.prev = @last
37
+ @last.next = node
38
+ @last = node
39
+ end
40
+ end
41
+
42
+ @hash[key] = node
43
+ value
44
+ end
45
+
46
+ def delete(key)
47
+ if node = @hash[key]
48
+ prev_node = node.prev
49
+ next_node = node.next
50
+
51
+ next_node.prev = prev_node if next_node
52
+ prev_node.next = next_node if prev_node
53
+
54
+ @first = next_node if @first == node
55
+ @last = prev_node if @last == node
56
+
57
+ value = node.value
58
+ end
59
+
60
+ @hash.delete(key)
61
+ value
62
+ end
63
+
64
+ def keys
65
+ self.map { |k, v| k }
66
+ end
67
+
68
+ def values
69
+ self.map { |k, v| v }
70
+ end
71
+
72
+ def each
73
+ return unless @first
74
+ yield [@first.key, @first.value]
75
+ node = @first
76
+ yield [node.key, node.value] while node = node.next
77
+ self
78
+ end
79
+
80
+ def merge(other)
81
+ hash = self.class.new
82
+
83
+ self.each do |key, value|
84
+ hash[key] = value
85
+ end
86
+
87
+ other.each do |key, value|
88
+ hash[key] = value
89
+ end
90
+
91
+ hash
92
+ end
93
+
94
+ def empty?
95
+ @hash.empty?
96
+ end
97
+ end
98
+ end
99
+
100
+ end
101
+ end
@@ -0,0 +1,19 @@
1
+ require 'sax_stream/internal/mappings/element_content'
2
+ require 'sax_stream/internal/mappings/element_attribute'
3
+ require 'sax_stream/internal/mappings/child'
4
+
5
+ module SaxStream
6
+ module Internal
7
+ class MappingFactory
8
+ def self.build_mapping(name, options)
9
+ last_part = options[:to].split('/').last
10
+ klass = (last_part =~ /^@/ ? Mappings::ElementAttribute : Mappings::ElementContent)
11
+ klass.new(name, options)
12
+ end
13
+
14
+ def self.build_relation(name, options)
15
+ Mappings::Child.new(name, options)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,56 @@
1
+ module SaxStream
2
+ module Internal
3
+ module Mappings
4
+ class Base
5
+ attr_reader :name
6
+
7
+ def initialize(name, options = {})
8
+ @name = name.to_s
9
+ @path = options[:to]
10
+ process_conversion_type(options[:as])
11
+ end
12
+
13
+ def handler_for(name, collector, handler_stack, parent_object)
14
+ end
15
+
16
+ def path_parts
17
+ @path.split('/')
18
+ rescue => e
19
+ raise "could not split #{@path.inspect} for #{@name.inspect}"
20
+ end
21
+
22
+ def map_value_onto_object(object, value)
23
+ end
24
+
25
+ def find_or_insert_parent_node(doc, base)
26
+ find_or_insert_nested_node(doc, base, path_parts.tap(&:pop))
27
+ end
28
+
29
+ def update_parent_node(builder, doc, parent, object)
30
+ raise NotImplementedError
31
+ end
32
+
33
+ private
34
+
35
+ def find_or_insert_nested_node(doc, base, remaining_parts)
36
+ part = remaining_parts.shift
37
+ return base unless part
38
+ node = find_or_insert_child_element(doc, base, part)
39
+ find_or_insert_nested_node(doc, node, remaining_parts)
40
+ end
41
+
42
+ def find_or_insert_child_element(doc, base, part)
43
+ base.search(part).first || insert_child_element(doc, base, part)
44
+ end
45
+
46
+ def insert_child_element(doc, base, part)
47
+ doc.create_element(part).tap do |element|
48
+ base << element
49
+ end
50
+ end
51
+
52
+
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,74 @@
1
+ require 'sax_stream/internal/mapper_handler'
2
+ require 'sax_stream/internal/singular_relationship_collector'
3
+ require 'sax_stream/internal/mappings/base'
4
+
5
+ module SaxStream
6
+ module Internal
7
+ module Mappings
8
+ class Child < Base
9
+ # Supported options are :to, :as & :parent_collects. See Mapper.relate documentation for more details.
10
+ def initialize(name, options)
11
+ @parent_collects = options[:parent_collects]
12
+ super
13
+ end
14
+
15
+ def handler_for(node_path, collector, handler_stack, parent_object)
16
+ node_name = node_path.split('/').last
17
+ @mapper_classes.each do |mapper_class|
18
+ if mapper_class.maps_node?(node_name)
19
+ return MapperHandler.new(mapper_class, child_collector(parent_object, collector), handler_stack)
20
+ end
21
+ end
22
+ nil
23
+ end
24
+
25
+ def build_empty_relation
26
+ [] if @plural
27
+ end
28
+
29
+ def update_parent_node(builder, doc, parent, object)
30
+ value_from_object(object).each do |child_object|
31
+ parent << builder.build_xml_for(child_object, parent)
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def value_from_object(object)
38
+ result = object.relations[@name]
39
+ result = [result] unless @plural
40
+ result
41
+ end
42
+
43
+ def child_collector(parent_object, collector)
44
+ if @parent_collects
45
+ if @plural
46
+ parent_object.relations[name]
47
+ else
48
+ SingularRelationshipCollector.new(parent_object, @name)
49
+ end
50
+ else
51
+ collector
52
+ end
53
+ end
54
+
55
+ def arrayify(value)
56
+ value.is_a?(Enumerable) ? value : [value]
57
+ end
58
+
59
+ def process_conversion_type(as)
60
+ @plural = as.is_a?(Enumerable)
61
+ @mapper_classes = arrayify(as).compact
62
+ if @mapper_classes.empty?
63
+ raise ":as options for #{@name} field is empty, for child nodes it must be a mapper class or array of mapper classes"
64
+ end
65
+ @mapper_classes.each do |mapper_class|
66
+ unless mapper_class.respond_to?(:map_key_onto_object)
67
+ raise ":as options for #{@name} field contains #{mapper_class.inspect} which does not appear to be a valid mapper class"
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,53 @@
1
+ require 'sax_stream/internal/mappings/base'
2
+
3
+ module SaxStream
4
+ module Internal
5
+ module Mappings
6
+ class Element < Base
7
+ def map_value_onto_object(object, value)
8
+ if value && @parser
9
+ value = @parser.parse(value)
10
+ end
11
+ if object.respond_to?(setter_method)
12
+ object.send(setter_method, value)
13
+ else
14
+ object[@name] = value
15
+ end
16
+ end
17
+
18
+ def string_value_from_object(object)
19
+ result = raw_value_from_object(object)
20
+ result = @parser.format(result) if @parser && @parser.respond_to?(:format)
21
+ result.to_s
22
+ end
23
+
24
+ def find_or_insert_node(doc, base)
25
+ find_or_insert_nested_node(doc, base, path_parts)
26
+ end
27
+
28
+ private
29
+ def raw_value_from_object(object)
30
+ if object.respond_to?(@name) && !Object.new.respond_to?(@name)
31
+ object.send(@name)
32
+ else
33
+ object[@name]
34
+ end
35
+ end
36
+
37
+ def setter_method
38
+ "#{@name}=".to_sym
39
+ end
40
+
41
+ def process_conversion_type(as)
42
+ if as
43
+ if as.respond_to?(:parse)
44
+ @parser = as
45
+ else
46
+ raise ArgumentError, ":as options for #{@name} field is a #{as.inspect} which must respond to parse"
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,20 @@
1
+ require 'sax_stream/internal/mappings/element'
2
+
3
+ module SaxStream
4
+ module Internal
5
+ module Mappings
6
+ class ElementAttribute < Element
7
+ def update_parent_node(builder, doc, parent, object)
8
+ parent[base_attribute_name] = string_value_from_object(object)
9
+ parent
10
+ end
11
+
12
+ private
13
+
14
+ def base_attribute_name
15
+ path_parts.last.sub(/^@/, '')
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,25 @@
1
+ require 'sax_stream/internal/mappings/element'
2
+
3
+ module SaxStream
4
+ module Internal
5
+ module Mappings
6
+ class ElementContent < Element
7
+ def initialize(name, options = {})
8
+ @cdata = options[:cdata]
9
+ super
10
+ end
11
+
12
+ def update_parent_node(builder, doc, parent, object)
13
+ node = find_or_insert_child_element(doc, parent, path_parts.last)
14
+ value = string_value_from_object(object)
15
+ if @cdata
16
+ node.add_child(Nokogiri::XML::CDATA.new(doc, value))
17
+ else
18
+ node.content = value
19
+ end
20
+ node
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,49 @@
1
+ require 'nokogiri'
2
+
3
+ module SaxStream
4
+ module Internal
5
+ class XmlBuilder
6
+ def initialize(options = {})
7
+ @encoding = options[:encoding] || 'UTF-8'
8
+ end
9
+
10
+ def build_xml_for(object, parent = nil)
11
+ mappings = object.mappings
12
+
13
+ in_sub_object = has_doc?
14
+ @doc ||= build_doc
15
+
16
+ base = add_base_element(@doc, parent || @doc, object)
17
+
18
+ object.mappings.each do |mapping|
19
+ add_mapping(@doc, base, object, mapping)
20
+ end
21
+
22
+ in_sub_object ? base : @doc.to_xml
23
+ end
24
+
25
+ private
26
+
27
+ def has_doc?
28
+ !!@doc
29
+ end
30
+
31
+ def build_doc
32
+ @doc = Nokogiri::XML::Document.new
33
+ @doc.encoding = @encoding
34
+ @doc
35
+ end
36
+
37
+ def add_base_element(doc, parent, object)
38
+ base = doc.create_element(object.node_name)
39
+ parent << base
40
+ base
41
+ end
42
+
43
+ def add_mapping(doc, base, object, mapping)
44
+ parent = mapping.find_or_insert_parent_node(doc, base)
45
+ mapping.update_parent_node(self, doc, parent, object)
46
+ end
47
+ end
48
+ end
49
+ end
@@ -1,5 +1,6 @@
1
- require 'sax_stream/internal/field_mapping'
2
- require 'sax_stream/internal/child_mapping'
1
+ require 'sax_stream/internal/mapping_factory'
2
+ require 'sax_stream/internal/xml_builder'
3
+ require 'sax_stream/core_extensions/ordered_hash'
3
4
 
4
5
  module SaxStream
5
6
  # Include this module to make your class map an XML node. For usage examples, see the READEME.
@@ -15,7 +16,7 @@ module SaxStream
15
16
  end
16
17
 
17
18
  def map(attribute_name, options = {})
18
- store_field_mapping(options[:to], Internal::FieldMapping.new(attribute_name, options))
19
+ store_field_mapping(options[:to], Internal::MappingFactory.build_mapping(attribute_name, options))
19
20
  end
20
21
 
21
22
  # Define a relation to another object which is built from an XML node using another class
@@ -49,7 +50,8 @@ module SaxStream
49
50
  # or at least what is known about it, but the parent will not be finished being
50
51
  # parsed. The parent will have already parsed all XML attributes though.
51
52
  def relate(attribute_name, options = {})
52
- store_relation_mapping(options[:to] || '*', Internal::ChildMapping.new(attribute_name, options))
53
+ options[:to] ||= '*'
54
+ store_relation_mapping(options[:to], Internal::MappingFactory.build_relation(attribute_name, options))
53
55
  end
54
56
 
55
57
  def node_name
@@ -92,11 +94,13 @@ module SaxStream
92
94
  end
93
95
 
94
96
  def mappings
95
- parent_class_values(:mappings, {}).merge(class_mappings).freeze
97
+ parent_class_values(:mappings, CoreExtensions::OrderedHash.new).merge(class_mappings).freeze
96
98
  end
97
99
 
98
100
  def regex_mappings
99
- (class_regex_mappings + parent_class_values(:regex_mappings, [])).freeze
101
+ mappings.reject do |key, mapping|
102
+ !key.is_a?(Regexp)
103
+ end
100
104
  end
101
105
 
102
106
  def should_collect?
@@ -111,11 +115,8 @@ module SaxStream
111
115
  end
112
116
 
113
117
  def store_field_mapping(key, mapping)
114
- if key.include?('*')
115
- class_regex_mappings << [Regexp.new(key.gsub('*', '[^/]+')), mapping]
116
- else
117
- class_mappings[key] = mapping
118
- end
118
+ key = Regexp.new(key.gsub('*', '[^/]+')) if key.include?('*')
119
+ class_mappings[key] = mapping
119
120
  end
120
121
 
121
122
  def field_mapping(key)
@@ -133,12 +134,8 @@ module SaxStream
133
134
  @relation_mappings ||= []
134
135
  end
135
136
 
136
- def class_regex_mappings
137
- @regex_mappings ||= []
138
- end
139
-
140
137
  def class_mappings
141
- @mappings ||= {}
138
+ @mappings ||= CoreExtensions::OrderedHash.new
142
139
  end
143
140
 
144
141
  def parent_class_values(method_name, default)
@@ -159,13 +156,21 @@ module SaxStream
159
156
  end
160
157
 
161
158
  def attributes
162
- @attributes ||= {}
159
+ @attributes ||= CoreExtensions::OrderedHash.new
160
+ end
161
+
162
+ def attributes=(value)
163
+ @attributes = value
163
164
  end
164
165
 
165
166
  def relations
166
167
  @relations ||= build_empty_relations
167
168
  end
168
169
 
170
+ def mappings
171
+ self.class.mappings.values
172
+ end
173
+
169
174
  def node_name
170
175
  self.class.node_name
171
176
  end
@@ -174,10 +179,15 @@ module SaxStream
174
179
  self.class.should_collect?
175
180
  end
176
181
 
182
+ def to_xml(encoding = 'UTF-8', builder = nil)
183
+ builder ||= Internal::XmlBuilder.new(:encoding => encoding)
184
+ builder.build_xml_for(self)
185
+ end
186
+
177
187
  private
178
188
 
179
189
  def build_empty_relations
180
- result = {}
190
+ result = CoreExtensions::OrderedHash.new
181
191
  self.class.relation_mappings.each do |relation_mapping|
182
192
  result[relation_mapping.name] = relation_mapping.build_empty_relation
183
193
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sax_stream
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 1.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-26 00:00:00.000000000 Z
12
+ date: 2012-05-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &70295398456460 !ruby/object:Gem::Requirement
16
+ requirement: &70160590504180 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: 1.5.2
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70295398456460
24
+ version_requirements: *70160590504180
25
25
  description: A streaming XML parser which builds objects and passes them to a collecter
26
26
  as they are ready. Based upon Nokogiri SAX parsing functionality.
27
27
  email:
@@ -32,15 +32,21 @@ extra_rdoc_files: []
32
32
  files:
33
33
  - lib/sax_stream/collectors/block_collector.rb
34
34
  - lib/sax_stream/collectors/naive_collector.rb
35
+ - lib/sax_stream/core_extensions/ordered_hash.rb
35
36
  - lib/sax_stream/errors.rb
36
- - lib/sax_stream/internal/child_mapping.rb
37
37
  - lib/sax_stream/internal/combined_handler.rb
38
38
  - lib/sax_stream/internal/element_stack.rb
39
- - lib/sax_stream/internal/field_mapping.rb
40
39
  - lib/sax_stream/internal/handler_stack.rb
41
40
  - lib/sax_stream/internal/mapper_handler.rb
41
+ - lib/sax_stream/internal/mapping_factory.rb
42
+ - lib/sax_stream/internal/mappings/base.rb
43
+ - lib/sax_stream/internal/mappings/child.rb
44
+ - lib/sax_stream/internal/mappings/element.rb
45
+ - lib/sax_stream/internal/mappings/element_attribute.rb
46
+ - lib/sax_stream/internal/mappings/element_content.rb
42
47
  - lib/sax_stream/internal/sax_handler.rb
43
48
  - lib/sax_stream/internal/singular_relationship_collector.rb
49
+ - lib/sax_stream/internal/xml_builder.rb
44
50
  - lib/sax_stream/mapper.rb
45
51
  - lib/sax_stream/parser.rb
46
52
  - lib/sax_stream/types/boolean.rb
@@ -1,65 +0,0 @@
1
- require 'sax_stream/internal/mapper_handler'
2
- require 'sax_stream/internal/singular_relationship_collector'
3
-
4
- module SaxStream
5
- module Internal
6
- class ChildMapping
7
- attr_reader :name
8
-
9
- # Supported options are :to, :as & :parent_collects. See Mapper.relate documentation for more details.
10
- def initialize(name, options)
11
- @name = name.to_s
12
- @parent_collects = options[:parent_collects]
13
- process_conversion_type(options[:as])
14
- end
15
-
16
- def handler_for(node_path, collector, handler_stack, parent_object)
17
- node_name = node_path.split('/').last
18
- @mapper_classes.each do |mapper_class|
19
- if mapper_class.maps_node?(node_name)
20
- return MapperHandler.new(mapper_class, child_collector(parent_object, collector), handler_stack)
21
- end
22
- end
23
- nil
24
- end
25
-
26
- def map_value_onto_object(object, value)
27
- end
28
-
29
- def build_empty_relation
30
- [] if @plural
31
- end
32
-
33
- private
34
-
35
- def child_collector(parent_object, collector)
36
- if @parent_collects
37
- if @plural
38
- parent_object.relations[name]
39
- else
40
- SingularRelationshipCollector.new(parent_object, @name)
41
- end
42
- else
43
- collector
44
- end
45
- end
46
-
47
- def arrayify(value)
48
- value.is_a?(Enumerable) ? value : [value]
49
- end
50
-
51
- def process_conversion_type(as)
52
- @plural = as.is_a?(Enumerable)
53
- @mapper_classes = arrayify(as).compact
54
- if @mapper_classes.empty?
55
- raise ":as options for #{@name} field is empty, for child nodes it must be a mapper class or array of mapper classes"
56
- end
57
- @mapper_classes.each do |mapper_class|
58
- unless mapper_class.respond_to?(:map_key_onto_object)
59
- raise ":as options for #{@name} field contains #{mapper_class.inspect} which does not appear to be a valid mapper class"
60
- end
61
- end
62
- end
63
- end
64
- end
65
- end
@@ -1,40 +0,0 @@
1
- module SaxStream
2
- module Internal
3
- class FieldMapping
4
- def initialize(name, options = {})
5
- @name = name.to_s
6
- @path = options[:to]
7
- process_conversion_type(options[:as])
8
- end
9
-
10
- def map_value_onto_object(object, value)
11
- if value && @parser
12
- value = @parser.parse(value)
13
- end
14
- if object.respond_to?(setter_method)
15
- object.send(setter_method, value)
16
- else
17
- object[@name] = value
18
- end
19
- end
20
-
21
- def handler_for(name, collector, handler_stack, parent_object)
22
- end
23
-
24
- private
25
- def setter_method
26
- "#{@name}=".to_sym
27
- end
28
-
29
- def process_conversion_type(as)
30
- if as
31
- if as.respond_to?(:parse)
32
- @parser = as
33
- else
34
- raise ArgumentError, ":as options for #{@name} field is a #{as.inspect} which must respond to parse"
35
- end
36
- end
37
- end
38
- end
39
- end
40
- end