sax_stream 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown CHANGED
@@ -74,8 +74,9 @@ The parser object must be supplied with a collector and an array of mapping clas
74
74
 
75
75
  ```ruby
76
76
  require 'sax_stream/parser'
77
+ require 'sax_stream/collectors/naive_collector'
77
78
 
78
- collector = SaxStream::NaiveCollector.new
79
+ collector = SaxStream::Collectors::NaiveCollector.new
79
80
  parser = SaxStream::Parser.new(collector, [Product])
80
81
 
81
82
  parser.parse_stream(File.open('products.xml'))
@@ -89,8 +90,10 @@ To get the full benefits of this library, supply a collector which does somethin
89
90
 
90
91
  I plan to supply a batching collector which will collect a certain number of objects before passing them off to another collector you supply, so you can save objects in batches of 100 or whatever is optimal for your application.
91
92
 
92
- ## Author
93
+ ## Credits
93
94
 
94
- Craig Ambrose
95
+ Author: [Craig Ambrose](http://www.craigambrose.com)
95
96
 
96
- http://www.craigambrose.com
97
+ Initial development sponsored by: [List Globally](http://www.listglobally.com)
98
+
99
+ Ideas taken from lots of other great libraries, including ROXML, Happymapper, Sax Machine, and of course very reliant on Nokogiri.
@@ -0,0 +1,15 @@
1
+ module SaxStream
2
+ module Collectors
3
+ # Initialise this collector with a block that handles one argument. This collector will yield each
4
+ # mapped object it collects immediately to the block. It will not keep a record of the objects.
5
+ class BlockCollector
6
+ def initialize(&block)
7
+ @block = block
8
+ end
9
+
10
+ def <<(value)
11
+ @block.call(value)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,21 @@
1
+ module SaxStream
2
+ module Collectors
3
+ class NaiveCollector
4
+ def initialize
5
+ @objects = []
6
+ end
7
+
8
+ def mapped_objects
9
+ @objects
10
+ end
11
+
12
+ def <<(value)
13
+ @objects << value
14
+ end
15
+
16
+ def for_type(klass)
17
+ mapped_objects.select { |object| object.class == klass }
18
+ end
19
+ end
20
+ end
21
+ end
@@ -29,12 +29,14 @@ module SaxStream
29
29
  end
30
30
  end
31
31
 
32
- def initialize
33
- @elements = []
32
+ class RootElement < Element
33
+ def initialize
34
+ super(nil, [])
35
+ end
34
36
  end
35
37
 
36
- def top_name
37
- @elements.last.name if @elements.last
38
+ def initialize
39
+ @elements = []
38
40
  end
39
41
 
40
42
  def push(name, attrs)
@@ -42,18 +44,26 @@ module SaxStream
42
44
  # indented_puts "push element #{name}"
43
45
  end
44
46
 
45
- def pop
47
+ def push_root
48
+ @elements.push(RootElement.new)
49
+ end
50
+
51
+ def pop(name = nil)
46
52
  raise ProgramError, "attempting to pop an empty ElementStack" if @elements.empty?
53
+ if name && @element_stack.top_name != name
54
+ raise ProgramError "received popping element for #{name.inspect} but currently processing #{path.inspect}"
55
+ end
47
56
  # indented_puts "pop element"
48
57
  @elements.pop
49
58
  end
50
59
 
51
60
  def empty?
52
- @elements.empty?
61
+ @elements.length <= 1
53
62
  end
54
63
 
55
64
  def path
56
- @elements.map(&:name).join('/')
65
+ return nil if @elements.empty?
66
+ @elements.map(&:name).compact.join('/')
57
67
  end
58
68
 
59
69
  def content
@@ -66,11 +76,17 @@ module SaxStream
66
76
 
67
77
  def record_characters(string)
68
78
  # indented_puts " record: #{string.inspect}"
69
- @elements.last.record_characters(string)
79
+ if @elements.last
80
+ @elements.last.record_characters(string)
81
+ end
70
82
  end
71
83
 
72
84
  private
73
85
 
86
+ def top_name
87
+ @elements.last.name if @elements.last
88
+ end
89
+
74
90
  def indented_puts(string)
75
91
  indent = ''
76
92
  @elements.length.times { indent << ' ' }
@@ -54,9 +54,7 @@ module SaxStream
54
54
  end
55
55
 
56
56
  def characters(string)
57
- unless @element_stack.empty?
58
- @element_stack.record_characters(string)
59
- end
57
+ @element_stack.record_characters(string)
60
58
  end
61
59
 
62
60
  def current_object
@@ -71,6 +69,7 @@ module SaxStream
71
69
  attrs.each do |key, value|
72
70
  @mapper_class.map_attribute_onto_object(@current_object, key, value)
73
71
  end
72
+ @element_stack.push_root
74
73
  @current_object
75
74
  end
76
75
  end
@@ -91,7 +90,6 @@ module SaxStream
91
90
 
92
91
  def pop_element_stack(name)
93
92
  unless @element_stack.empty?
94
- raise ProgramError "received end element event for #{name.inspect} but currently processing #{@element_stack.top_name.inspect}" unless @element_stack.top_name == name
95
93
  @mapper_class.map_element_stack_top_onto_object(@current_object, @element_stack)
96
94
  @element_stack.pop
97
95
  end
@@ -99,10 +97,12 @@ module SaxStream
99
97
 
100
98
  def end_current_object(name)
101
99
  raise ProgramError unless @current_object
102
- raise ArgumentError, "received end element event for #{name.inspect} but currently processing #{@current_object.class.node_name.inspect}" unless @current_object.class.node_name == name
103
- if @current_object.class.should_collect?
100
+ raise ArgumentError, "received end element event for #{name.inspect} but currently processing #{@current_object.node_name.inspect}" unless @current_object.node_name == name
101
+ @mapper_class.map_key_onto_object(@current_object, @element_stack.path, @element_stack.content)
102
+ if @current_object.should_collect?
104
103
  @collector << @current_object
105
104
  end
105
+ @element_stack.pop
106
106
  @stack.pop(self)
107
107
  @current_object = nil
108
108
  end
@@ -72,9 +72,11 @@ module SaxStream
72
72
  end
73
73
 
74
74
  def map_key_onto_object(object, key, value)
75
- mapping = field_mapping(key)
76
- if mapping
77
- mapping.map_value_onto_object(object, value)
75
+ if value
76
+ mapping = field_mapping(key)
77
+ if mapping
78
+ mapping.map_value_onto_object(object, value)
79
+ end
78
80
  end
79
81
  end
80
82
 
@@ -148,6 +150,14 @@ module SaxStream
148
150
  @relations ||= build_empty_relations
149
151
  end
150
152
 
153
+ def node_name
154
+ self.class.node_name
155
+ end
156
+
157
+ def should_collect?
158
+ self.class.should_collect?
159
+ end
160
+
151
161
  private
152
162
 
153
163
  def build_empty_relations
@@ -0,0 +1,13 @@
1
+ module SaxStream
2
+ module Types
3
+ class Boolean
4
+ def self.parse(value)
5
+ if value
6
+ value = value.strip
7
+ return nil if value == ''
8
+ !!(value =~ /^(yes|true|1)$/i)
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module SaxStream
2
+ module Types
3
+ class Decimal
4
+ def self.parse(value)
5
+ if value
6
+ value = value.gsub(/[^\.0-9]/, '')
7
+ return nil if value == ''
8
+ Float(value)
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module SaxStream
2
+ module Types
3
+ class Integer
4
+ def self.parse(value)
5
+ if value
6
+ value = value.gsub(/[^\.0-9]/, '')
7
+ return nil if value == ''
8
+ Float(value).to_i
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ require 'sax_stream/types/integer'
2
+ require 'sax_stream/types/decimal'
3
+ require 'sax_stream/types/boolean'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sax_stream
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,19 +9,19 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-09 00:00:00.000000000 Z
12
+ date: 2012-04-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &70267973114600 !ruby/object:Gem::Requirement
16
+ requirement: &70239061335640 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
20
20
  - !ruby/object:Gem::Version
21
- version: 1.4.0
21
+ version: 1.5.2
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70267973114600
24
+ version_requirements: *70239061335640
25
25
  description: A streaming XML parser which builds objects and passes them to a collecter
26
26
  as they are ready. Based upon Nokogiri SAX parsing functionality.
27
27
  email:
@@ -30,6 +30,8 @@ executables: []
30
30
  extensions: []
31
31
  extra_rdoc_files: []
32
32
  files:
33
+ - lib/sax_stream/collectors/block_collector.rb
34
+ - lib/sax_stream/collectors/naive_collector.rb
33
35
  - lib/sax_stream/errors.rb
34
36
  - lib/sax_stream/internal/child_mapping.rb
35
37
  - lib/sax_stream/internal/combined_handler.rb
@@ -40,8 +42,11 @@ files:
40
42
  - lib/sax_stream/internal/sax_handler.rb
41
43
  - lib/sax_stream/internal/singular_relationship_collector.rb
42
44
  - lib/sax_stream/mapper.rb
43
- - lib/sax_stream/naive_collector.rb
44
45
  - lib/sax_stream/parser.rb
46
+ - lib/sax_stream/types/boolean.rb
47
+ - lib/sax_stream/types/decimal.rb
48
+ - lib/sax_stream/types/integer.rb
49
+ - lib/sax_stream/types.rb
45
50
  - LICENSE
46
51
  - README.markdown
47
52
  homepage: http://github.com/craigambrose/sax_stream
@@ -1,19 +0,0 @@
1
- module SaxStream
2
- class NaiveCollector
3
- def initialize
4
- @objects = []
5
- end
6
-
7
- def mapped_objects
8
- @objects
9
- end
10
-
11
- def <<(value)
12
- @objects << value
13
- end
14
-
15
- def for_type(klass)
16
- mapped_objects.select { |object| object.class == klass }
17
- end
18
- end
19
- end