sax_stream 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +7 -4
- data/lib/sax_stream/collectors/block_collector.rb +15 -0
- data/lib/sax_stream/collectors/naive_collector.rb +21 -0
- data/lib/sax_stream/internal/element_stack.rb +24 -8
- data/lib/sax_stream/internal/mapper_handler.rb +6 -6
- data/lib/sax_stream/mapper.rb +13 -3
- data/lib/sax_stream/types/boolean.rb +13 -0
- data/lib/sax_stream/types/decimal.rb +13 -0
- data/lib/sax_stream/types/integer.rb +13 -0
- data/lib/sax_stream/types.rb +3 -0
- metadata +11 -6
- data/lib/sax_stream/naive_collector.rb +0 -19
data/README.markdown
CHANGED
@@ -74,8 +74,9 @@ The parser object must be supplied with a collector and an array of mapping clas
|
|
74
74
|
|
75
75
|
```ruby
|
76
76
|
require 'sax_stream/parser'
|
77
|
+
require 'sax_stream/collectors/naive_collector'
|
77
78
|
|
78
|
-
collector = SaxStream::NaiveCollector.new
|
79
|
+
collector = SaxStream::Collectors::NaiveCollector.new
|
79
80
|
parser = SaxStream::Parser.new(collector, [Product])
|
80
81
|
|
81
82
|
parser.parse_stream(File.open('products.xml'))
|
@@ -89,8 +90,10 @@ To get the full benefits of this library, supply a collector which does somethin
|
|
89
90
|
|
90
91
|
I plan to supply a batching collector which will collect a certain number of objects before passing them off to another collector you supply, so you can save objects in batches of 100 or whatever is optimal for your application.
|
91
92
|
|
92
|
-
##
|
93
|
+
## Credits
|
93
94
|
|
94
|
-
Craig Ambrose
|
95
|
+
Author: [Craig Ambrose](http://www.craigambrose.com)
|
95
96
|
|
96
|
-
http://www.
|
97
|
+
Initial development sponsored by: [List Globally](http://www.listglobally.com)
|
98
|
+
|
99
|
+
Ideas taken from lots of other great libraries, including ROXML, Happymapper, Sax Machine, and of course very reliant on Nokogiri.
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module SaxStream
|
2
|
+
module Collectors
|
3
|
+
# Initialise this collector with a block that handles one argument. This collector will yield each
|
4
|
+
# mapped object it collects immediately to the block. It will not keep a record of the objects.
|
5
|
+
class BlockCollector
|
6
|
+
def initialize(&block)
|
7
|
+
@block = block
|
8
|
+
end
|
9
|
+
|
10
|
+
def <<(value)
|
11
|
+
@block.call(value)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module SaxStream
|
2
|
+
module Collectors
|
3
|
+
class NaiveCollector
|
4
|
+
def initialize
|
5
|
+
@objects = []
|
6
|
+
end
|
7
|
+
|
8
|
+
def mapped_objects
|
9
|
+
@objects
|
10
|
+
end
|
11
|
+
|
12
|
+
def <<(value)
|
13
|
+
@objects << value
|
14
|
+
end
|
15
|
+
|
16
|
+
def for_type(klass)
|
17
|
+
mapped_objects.select { |object| object.class == klass }
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -29,12 +29,14 @@ module SaxStream
|
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
|
32
|
+
class RootElement < Element
|
33
|
+
def initialize
|
34
|
+
super(nil, [])
|
35
|
+
end
|
34
36
|
end
|
35
37
|
|
36
|
-
def
|
37
|
-
@elements
|
38
|
+
def initialize
|
39
|
+
@elements = []
|
38
40
|
end
|
39
41
|
|
40
42
|
def push(name, attrs)
|
@@ -42,18 +44,26 @@ module SaxStream
|
|
42
44
|
# indented_puts "push element #{name}"
|
43
45
|
end
|
44
46
|
|
45
|
-
def
|
47
|
+
def push_root
|
48
|
+
@elements.push(RootElement.new)
|
49
|
+
end
|
50
|
+
|
51
|
+
def pop(name = nil)
|
46
52
|
raise ProgramError, "attempting to pop an empty ElementStack" if @elements.empty?
|
53
|
+
if name && @element_stack.top_name != name
|
54
|
+
raise ProgramError "received popping element for #{name.inspect} but currently processing #{path.inspect}"
|
55
|
+
end
|
47
56
|
# indented_puts "pop element"
|
48
57
|
@elements.pop
|
49
58
|
end
|
50
59
|
|
51
60
|
def empty?
|
52
|
-
@elements.
|
61
|
+
@elements.length <= 1
|
53
62
|
end
|
54
63
|
|
55
64
|
def path
|
56
|
-
@elements.
|
65
|
+
return nil if @elements.empty?
|
66
|
+
@elements.map(&:name).compact.join('/')
|
57
67
|
end
|
58
68
|
|
59
69
|
def content
|
@@ -66,11 +76,17 @@ module SaxStream
|
|
66
76
|
|
67
77
|
def record_characters(string)
|
68
78
|
# indented_puts " record: #{string.inspect}"
|
69
|
-
@elements.last
|
79
|
+
if @elements.last
|
80
|
+
@elements.last.record_characters(string)
|
81
|
+
end
|
70
82
|
end
|
71
83
|
|
72
84
|
private
|
73
85
|
|
86
|
+
def top_name
|
87
|
+
@elements.last.name if @elements.last
|
88
|
+
end
|
89
|
+
|
74
90
|
def indented_puts(string)
|
75
91
|
indent = ''
|
76
92
|
@elements.length.times { indent << ' ' }
|
@@ -54,9 +54,7 @@ module SaxStream
|
|
54
54
|
end
|
55
55
|
|
56
56
|
def characters(string)
|
57
|
-
|
58
|
-
@element_stack.record_characters(string)
|
59
|
-
end
|
57
|
+
@element_stack.record_characters(string)
|
60
58
|
end
|
61
59
|
|
62
60
|
def current_object
|
@@ -71,6 +69,7 @@ module SaxStream
|
|
71
69
|
attrs.each do |key, value|
|
72
70
|
@mapper_class.map_attribute_onto_object(@current_object, key, value)
|
73
71
|
end
|
72
|
+
@element_stack.push_root
|
74
73
|
@current_object
|
75
74
|
end
|
76
75
|
end
|
@@ -91,7 +90,6 @@ module SaxStream
|
|
91
90
|
|
92
91
|
def pop_element_stack(name)
|
93
92
|
unless @element_stack.empty?
|
94
|
-
raise ProgramError "received end element event for #{name.inspect} but currently processing #{@element_stack.top_name.inspect}" unless @element_stack.top_name == name
|
95
93
|
@mapper_class.map_element_stack_top_onto_object(@current_object, @element_stack)
|
96
94
|
@element_stack.pop
|
97
95
|
end
|
@@ -99,10 +97,12 @@ module SaxStream
|
|
99
97
|
|
100
98
|
def end_current_object(name)
|
101
99
|
raise ProgramError unless @current_object
|
102
|
-
raise ArgumentError, "received end element event for #{name.inspect} but currently processing #{@current_object.
|
103
|
-
|
100
|
+
raise ArgumentError, "received end element event for #{name.inspect} but currently processing #{@current_object.node_name.inspect}" unless @current_object.node_name == name
|
101
|
+
@mapper_class.map_key_onto_object(@current_object, @element_stack.path, @element_stack.content)
|
102
|
+
if @current_object.should_collect?
|
104
103
|
@collector << @current_object
|
105
104
|
end
|
105
|
+
@element_stack.pop
|
106
106
|
@stack.pop(self)
|
107
107
|
@current_object = nil
|
108
108
|
end
|
data/lib/sax_stream/mapper.rb
CHANGED
@@ -72,9 +72,11 @@ module SaxStream
|
|
72
72
|
end
|
73
73
|
|
74
74
|
def map_key_onto_object(object, key, value)
|
75
|
-
|
76
|
-
|
77
|
-
mapping
|
75
|
+
if value
|
76
|
+
mapping = field_mapping(key)
|
77
|
+
if mapping
|
78
|
+
mapping.map_value_onto_object(object, value)
|
79
|
+
end
|
78
80
|
end
|
79
81
|
end
|
80
82
|
|
@@ -148,6 +150,14 @@ module SaxStream
|
|
148
150
|
@relations ||= build_empty_relations
|
149
151
|
end
|
150
152
|
|
153
|
+
def node_name
|
154
|
+
self.class.node_name
|
155
|
+
end
|
156
|
+
|
157
|
+
def should_collect?
|
158
|
+
self.class.should_collect?
|
159
|
+
end
|
160
|
+
|
151
161
|
private
|
152
162
|
|
153
163
|
def build_empty_relations
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sax_stream
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,19 +9,19 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &70239061335640 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 1.
|
21
|
+
version: 1.5.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70239061335640
|
25
25
|
description: A streaming XML parser which builds objects and passes them to a collecter
|
26
26
|
as they are ready. Based upon Nokogiri SAX parsing functionality.
|
27
27
|
email:
|
@@ -30,6 +30,8 @@ executables: []
|
|
30
30
|
extensions: []
|
31
31
|
extra_rdoc_files: []
|
32
32
|
files:
|
33
|
+
- lib/sax_stream/collectors/block_collector.rb
|
34
|
+
- lib/sax_stream/collectors/naive_collector.rb
|
33
35
|
- lib/sax_stream/errors.rb
|
34
36
|
- lib/sax_stream/internal/child_mapping.rb
|
35
37
|
- lib/sax_stream/internal/combined_handler.rb
|
@@ -40,8 +42,11 @@ files:
|
|
40
42
|
- lib/sax_stream/internal/sax_handler.rb
|
41
43
|
- lib/sax_stream/internal/singular_relationship_collector.rb
|
42
44
|
- lib/sax_stream/mapper.rb
|
43
|
-
- lib/sax_stream/naive_collector.rb
|
44
45
|
- lib/sax_stream/parser.rb
|
46
|
+
- lib/sax_stream/types/boolean.rb
|
47
|
+
- lib/sax_stream/types/decimal.rb
|
48
|
+
- lib/sax_stream/types/integer.rb
|
49
|
+
- lib/sax_stream/types.rb
|
45
50
|
- LICENSE
|
46
51
|
- README.markdown
|
47
52
|
homepage: http://github.com/craigambrose/sax_stream
|
@@ -1,19 +0,0 @@
|
|
1
|
-
module SaxStream
|
2
|
-
class NaiveCollector
|
3
|
-
def initialize
|
4
|
-
@objects = []
|
5
|
-
end
|
6
|
-
|
7
|
-
def mapped_objects
|
8
|
-
@objects
|
9
|
-
end
|
10
|
-
|
11
|
-
def <<(value)
|
12
|
-
@objects << value
|
13
|
-
end
|
14
|
-
|
15
|
-
def for_type(klass)
|
16
|
-
mapped_objects.select { |object| object.class == klass }
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|