sax_stream 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +7 -4
- data/lib/sax_stream/collectors/block_collector.rb +15 -0
- data/lib/sax_stream/collectors/naive_collector.rb +21 -0
- data/lib/sax_stream/internal/element_stack.rb +24 -8
- data/lib/sax_stream/internal/mapper_handler.rb +6 -6
- data/lib/sax_stream/mapper.rb +13 -3
- data/lib/sax_stream/types/boolean.rb +13 -0
- data/lib/sax_stream/types/decimal.rb +13 -0
- data/lib/sax_stream/types/integer.rb +13 -0
- data/lib/sax_stream/types.rb +3 -0
- metadata +11 -6
- data/lib/sax_stream/naive_collector.rb +0 -19
data/README.markdown
CHANGED
@@ -74,8 +74,9 @@ The parser object must be supplied with a collector and an array of mapping clas
|
|
74
74
|
|
75
75
|
```ruby
|
76
76
|
require 'sax_stream/parser'
|
77
|
+
require 'sax_stream/collectors/naive_collector'
|
77
78
|
|
78
|
-
collector = SaxStream::NaiveCollector.new
|
79
|
+
collector = SaxStream::Collectors::NaiveCollector.new
|
79
80
|
parser = SaxStream::Parser.new(collector, [Product])
|
80
81
|
|
81
82
|
parser.parse_stream(File.open('products.xml'))
|
@@ -89,8 +90,10 @@ To get the full benefits of this library, supply a collector which does somethin
|
|
89
90
|
|
90
91
|
I plan to supply a batching collector which will collect a certain number of objects before passing them off to another collector you supply, so you can save objects in batches of 100 or whatever is optimal for your application.
|
91
92
|
|
92
|
-
##
|
93
|
+
## Credits
|
93
94
|
|
94
|
-
Craig Ambrose
|
95
|
+
Author: [Craig Ambrose](http://www.craigambrose.com)
|
95
96
|
|
96
|
-
http://www.
|
97
|
+
Initial development sponsored by: [List Globally](http://www.listglobally.com)
|
98
|
+
|
99
|
+
Ideas taken from lots of other great libraries, including ROXML, Happymapper, Sax Machine, and of course very reliant on Nokogiri.
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module SaxStream
|
2
|
+
module Collectors
|
3
|
+
# Initialise this collector with a block that handles one argument. This collector will yield each
|
4
|
+
# mapped object it collects immediately to the block. It will not keep a record of the objects.
|
5
|
+
class BlockCollector
|
6
|
+
def initialize(&block)
|
7
|
+
@block = block
|
8
|
+
end
|
9
|
+
|
10
|
+
def <<(value)
|
11
|
+
@block.call(value)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module SaxStream
|
2
|
+
module Collectors
|
3
|
+
class NaiveCollector
|
4
|
+
def initialize
|
5
|
+
@objects = []
|
6
|
+
end
|
7
|
+
|
8
|
+
def mapped_objects
|
9
|
+
@objects
|
10
|
+
end
|
11
|
+
|
12
|
+
def <<(value)
|
13
|
+
@objects << value
|
14
|
+
end
|
15
|
+
|
16
|
+
def for_type(klass)
|
17
|
+
mapped_objects.select { |object| object.class == klass }
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -29,12 +29,14 @@ module SaxStream
|
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
|
32
|
+
class RootElement < Element
|
33
|
+
def initialize
|
34
|
+
super(nil, [])
|
35
|
+
end
|
34
36
|
end
|
35
37
|
|
36
|
-
def
|
37
|
-
@elements
|
38
|
+
def initialize
|
39
|
+
@elements = []
|
38
40
|
end
|
39
41
|
|
40
42
|
def push(name, attrs)
|
@@ -42,18 +44,26 @@ module SaxStream
|
|
42
44
|
# indented_puts "push element #{name}"
|
43
45
|
end
|
44
46
|
|
45
|
-
def
|
47
|
+
def push_root
|
48
|
+
@elements.push(RootElement.new)
|
49
|
+
end
|
50
|
+
|
51
|
+
def pop(name = nil)
|
46
52
|
raise ProgramError, "attempting to pop an empty ElementStack" if @elements.empty?
|
53
|
+
if name && @element_stack.top_name != name
|
54
|
+
raise ProgramError "received popping element for #{name.inspect} but currently processing #{path.inspect}"
|
55
|
+
end
|
47
56
|
# indented_puts "pop element"
|
48
57
|
@elements.pop
|
49
58
|
end
|
50
59
|
|
51
60
|
def empty?
|
52
|
-
@elements.
|
61
|
+
@elements.length <= 1
|
53
62
|
end
|
54
63
|
|
55
64
|
def path
|
56
|
-
@elements.
|
65
|
+
return nil if @elements.empty?
|
66
|
+
@elements.map(&:name).compact.join('/')
|
57
67
|
end
|
58
68
|
|
59
69
|
def content
|
@@ -66,11 +76,17 @@ module SaxStream
|
|
66
76
|
|
67
77
|
def record_characters(string)
|
68
78
|
# indented_puts " record: #{string.inspect}"
|
69
|
-
@elements.last
|
79
|
+
if @elements.last
|
80
|
+
@elements.last.record_characters(string)
|
81
|
+
end
|
70
82
|
end
|
71
83
|
|
72
84
|
private
|
73
85
|
|
86
|
+
def top_name
|
87
|
+
@elements.last.name if @elements.last
|
88
|
+
end
|
89
|
+
|
74
90
|
def indented_puts(string)
|
75
91
|
indent = ''
|
76
92
|
@elements.length.times { indent << ' ' }
|
@@ -54,9 +54,7 @@ module SaxStream
|
|
54
54
|
end
|
55
55
|
|
56
56
|
def characters(string)
|
57
|
-
|
58
|
-
@element_stack.record_characters(string)
|
59
|
-
end
|
57
|
+
@element_stack.record_characters(string)
|
60
58
|
end
|
61
59
|
|
62
60
|
def current_object
|
@@ -71,6 +69,7 @@ module SaxStream
|
|
71
69
|
attrs.each do |key, value|
|
72
70
|
@mapper_class.map_attribute_onto_object(@current_object, key, value)
|
73
71
|
end
|
72
|
+
@element_stack.push_root
|
74
73
|
@current_object
|
75
74
|
end
|
76
75
|
end
|
@@ -91,7 +90,6 @@ module SaxStream
|
|
91
90
|
|
92
91
|
def pop_element_stack(name)
|
93
92
|
unless @element_stack.empty?
|
94
|
-
raise ProgramError "received end element event for #{name.inspect} but currently processing #{@element_stack.top_name.inspect}" unless @element_stack.top_name == name
|
95
93
|
@mapper_class.map_element_stack_top_onto_object(@current_object, @element_stack)
|
96
94
|
@element_stack.pop
|
97
95
|
end
|
@@ -99,10 +97,12 @@ module SaxStream
|
|
99
97
|
|
100
98
|
def end_current_object(name)
|
101
99
|
raise ProgramError unless @current_object
|
102
|
-
raise ArgumentError, "received end element event for #{name.inspect} but currently processing #{@current_object.
|
103
|
-
|
100
|
+
raise ArgumentError, "received end element event for #{name.inspect} but currently processing #{@current_object.node_name.inspect}" unless @current_object.node_name == name
|
101
|
+
@mapper_class.map_key_onto_object(@current_object, @element_stack.path, @element_stack.content)
|
102
|
+
if @current_object.should_collect?
|
104
103
|
@collector << @current_object
|
105
104
|
end
|
105
|
+
@element_stack.pop
|
106
106
|
@stack.pop(self)
|
107
107
|
@current_object = nil
|
108
108
|
end
|
data/lib/sax_stream/mapper.rb
CHANGED
@@ -72,9 +72,11 @@ module SaxStream
|
|
72
72
|
end
|
73
73
|
|
74
74
|
def map_key_onto_object(object, key, value)
|
75
|
-
|
76
|
-
|
77
|
-
mapping
|
75
|
+
if value
|
76
|
+
mapping = field_mapping(key)
|
77
|
+
if mapping
|
78
|
+
mapping.map_value_onto_object(object, value)
|
79
|
+
end
|
78
80
|
end
|
79
81
|
end
|
80
82
|
|
@@ -148,6 +150,14 @@ module SaxStream
|
|
148
150
|
@relations ||= build_empty_relations
|
149
151
|
end
|
150
152
|
|
153
|
+
def node_name
|
154
|
+
self.class.node_name
|
155
|
+
end
|
156
|
+
|
157
|
+
def should_collect?
|
158
|
+
self.class.should_collect?
|
159
|
+
end
|
160
|
+
|
151
161
|
private
|
152
162
|
|
153
163
|
def build_empty_relations
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sax_stream
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,19 +9,19 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &70239061335640 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 1.
|
21
|
+
version: 1.5.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70239061335640
|
25
25
|
description: A streaming XML parser which builds objects and passes them to a collecter
|
26
26
|
as they are ready. Based upon Nokogiri SAX parsing functionality.
|
27
27
|
email:
|
@@ -30,6 +30,8 @@ executables: []
|
|
30
30
|
extensions: []
|
31
31
|
extra_rdoc_files: []
|
32
32
|
files:
|
33
|
+
- lib/sax_stream/collectors/block_collector.rb
|
34
|
+
- lib/sax_stream/collectors/naive_collector.rb
|
33
35
|
- lib/sax_stream/errors.rb
|
34
36
|
- lib/sax_stream/internal/child_mapping.rb
|
35
37
|
- lib/sax_stream/internal/combined_handler.rb
|
@@ -40,8 +42,11 @@ files:
|
|
40
42
|
- lib/sax_stream/internal/sax_handler.rb
|
41
43
|
- lib/sax_stream/internal/singular_relationship_collector.rb
|
42
44
|
- lib/sax_stream/mapper.rb
|
43
|
-
- lib/sax_stream/naive_collector.rb
|
44
45
|
- lib/sax_stream/parser.rb
|
46
|
+
- lib/sax_stream/types/boolean.rb
|
47
|
+
- lib/sax_stream/types/decimal.rb
|
48
|
+
- lib/sax_stream/types/integer.rb
|
49
|
+
- lib/sax_stream/types.rb
|
45
50
|
- LICENSE
|
46
51
|
- README.markdown
|
47
52
|
homepage: http://github.com/craigambrose/sax_stream
|
@@ -1,19 +0,0 @@
|
|
1
|
-
module SaxStream
|
2
|
-
class NaiveCollector
|
3
|
-
def initialize
|
4
|
-
@objects = []
|
5
|
-
end
|
6
|
-
|
7
|
-
def mapped_objects
|
8
|
-
@objects
|
9
|
-
end
|
10
|
-
|
11
|
-
def <<(value)
|
12
|
-
@objects << value
|
13
|
-
end
|
14
|
-
|
15
|
-
def for_type(klass)
|
16
|
-
mapped_objects.select { |object| object.class == klass }
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|