saxerator 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -45,6 +45,12 @@ parser.for_tag(:name).at_depth(2).each { |x| names_nested_under_document_root <<
45
45
  parser.for_tag(:name).within(:author).each { |x| author_names << x }
46
46
  ```
47
47
 
48
+ Don't care about memory/streaming, you just want your xml in one big hash? Saxerator can do that too.
49
+
50
+ ```ruby
51
+ parser.all # big, giant hash
52
+ ```
53
+
48
54
  Known Issues
49
55
  ------------
50
56
  * JRuby closes the file stream at the end of parsing, therefor to perform multiple operations
@@ -9,10 +9,15 @@ if !File.exists?(file)
9
9
  end
10
10
  file = File.new(file)
11
11
 
12
- tag = ARGV.shift || :artist
13
-
14
- count = 0
12
+ count = count2 = count3 = count4 = 0
15
13
  Benchmark.bm do |x|
16
- x.report { Saxerator.parser(file).for_tag(tag).each { count = count + 1 } }
14
+ x.report('for_tag') { Saxerator.parser(file).for_tag(:artist).each { count = count + 1 } }
15
+ x.report('at_depth') { Saxerator.parser(file).at_depth(2).each { count2 = count2 + 1 } }
16
+ x.report('within') { Saxerator.parser(file).within(:artists).each { count3 = count3 + 1 } }
17
+ x.report('composite') { Saxerator.parser(file).for_tag(:name).within(:artist).at_depth(3).each { count4 = count4 + 1} }
17
18
  end
18
- puts "#{count} #{tag} elements parsed"
19
+
20
+ puts "for_tag: #{count} artist elements parsed"
21
+ puts "at_depth: #{count2} elements parsed"
22
+ puts "within: #{count3} artists children parsed"
23
+ puts "composite: #{count4} names within artist nested 3 tags deep parsed"
@@ -0,0 +1,21 @@
1
+ $:.push File.expand_path('../../lib', __FILE__)
2
+ require 'saxerator'
3
+ require 'ruby-prof'
4
+
5
+ file = ARGV.shift
6
+ if !File.exists?(file)
7
+ puts "Cannot find file #{file}"
8
+ exit 1
9
+ end
10
+ file = File.new(file)
11
+
12
+ count = 0
13
+ RubyProf.start
14
+
15
+ Saxerator.parser(file).for_tag(:artist).each { count = count + 1 }
16
+
17
+ result = RubyProf.stop
18
+ printer = RubyProf::FlatPrinter.new(result)
19
+ printer.print(STDOUT)
20
+
21
+ puts "for_tag: #{count} artist elements parsed"
data/lib/saxerator.rb CHANGED
@@ -1,13 +1,14 @@
1
1
  require 'nokogiri'
2
2
 
3
- require "saxerator/version"
4
- require 'saxerator/document'
3
+ require 'saxerator/version'
4
+
5
+ require 'saxerator/full_document'
6
+ require 'saxerator/document_fragment'
5
7
  require 'saxerator/string_with_attributes'
6
8
  require 'saxerator/hash_with_attributes'
7
9
  require 'saxerator/xml_node'
8
10
 
9
11
  require 'saxerator/parser/accumulator'
10
- require 'saxerator/parser/document_latch'
11
12
  require 'saxerator/parser/element_name_latch'
12
13
  require 'saxerator/parser/depth_latch'
13
14
  require 'saxerator/parser/within_element_latch'
@@ -17,6 +18,6 @@ module Saxerator
17
18
  extend self
18
19
 
19
20
  def parser(xml)
20
- Saxerator::Document.new(xml)
21
+ Saxerator::FullDocument.new(xml)
21
22
  end
22
23
  end
@@ -0,0 +1,24 @@
1
+ require 'saxerator/dsl'
2
+
3
+ module Saxerator
4
+ class DocumentFragment
5
+ include Enumerable
6
+ include DSL
7
+
8
+ def initialize(source, config = nil, latches = [])
9
+ @source = source
10
+ @latches = latches
11
+ @config = config
12
+ end
13
+
14
+ def each(&block)
15
+ reader = Parser::LatchedAccumulator.new(@config, @latches, block)
16
+ parser = ::Nokogiri::XML::SAX::Parser.new(reader)
17
+
18
+ # Always have to start at the beginning of a File
19
+ @source.rewind if(@source.is_a?(File))
20
+
21
+ parser.parse(@source)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,15 @@
1
+ module Saxerator
2
+ module DSL
3
+ def for_tag(tag)
4
+ DocumentFragment.new(@source, @config, @latches + [Parser::ElementNameLatch.new(tag.to_s)])
5
+ end
6
+
7
+ def at_depth(depth)
8
+ DocumentFragment.new(@source, @config, @latches + [Parser::DepthLatch.new(depth.to_i)])
9
+ end
10
+
11
+ def within(tag)
12
+ DocumentFragment.new(@source, @config, @latches + [Parser::WithinElementLatch.new(tag.to_s)])
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ require 'saxerator/dsl'
2
+
3
+ module Saxerator
4
+ class FullDocument
5
+ include DSL
6
+
7
+ def initialize(source, config = nil)
8
+ @source = source
9
+ @config = config
10
+ @latches = []
11
+ end
12
+
13
+ def all
14
+ DocumentFragment.new(@source, @config, @latches).first
15
+ end
16
+ end
17
+ end
@@ -1,3 +1,5 @@
1
+ require 'saxerator/parser/document_latch'
2
+
1
3
  module Saxerator
2
4
  module Parser
3
5
  class DepthLatch < DocumentLatch
@@ -1,3 +1,5 @@
1
+ require 'saxerator/parser/document_latch'
2
+
1
3
  module Saxerator
2
4
  module Parser
3
5
  class ElementNameLatch < DocumentLatch
@@ -1,3 +1,5 @@
1
+ require 'saxerator/parser/document_latch'
2
+
1
3
  module Saxerator
2
4
  module Parser
3
5
  class WithinElementLatch < DocumentLatch
@@ -1,3 +1,3 @@
1
1
  module Saxerator
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.4"
3
3
  end
@@ -20,6 +20,10 @@ describe Saxerator do
20
20
  results.should == ['one', 'two', 'three']
21
21
  end
22
22
 
23
+ it "should allow you to parse an entire document" do
24
+ subject.all.should == {'blurb' => ['one', 'two', 'three']}
25
+ end
26
+
23
27
  context "and one non-blurb" do
24
28
  let(:xml) { "<blurbs><blurb>one</blurb><blurb>two</blurb><blurb>three</blurb><notablurb>four</notablurb></blurbs>" }
25
29
  it "should only parse the requested tag" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: saxerator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-14 00:00:00.000000000 Z
12
+ date: 2012-04-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -78,7 +78,9 @@ files:
78
78
  - .rvmrc
79
79
  - .gitignore
80
80
  - .travis.yml
81
- - lib/saxerator/document.rb
81
+ - lib/saxerator/document_fragment.rb
82
+ - lib/saxerator/dsl.rb
83
+ - lib/saxerator/full_document.rb
82
84
  - lib/saxerator/hash_with_attributes.rb
83
85
  - lib/saxerator/parser/accumulator.rb
84
86
  - lib/saxerator/parser/depth_latch.rb
@@ -96,6 +98,7 @@ files:
96
98
  - spec/spec_helper.rb
97
99
  - benchmark/benchmark.rb
98
100
  - benchmark/generate_sample_file.rb
101
+ - benchmark/profile.rb
99
102
  homepage: https://github.com/soulcutter/saxerator
100
103
  licenses:
101
104
  - MIT
@@ -1,33 +0,0 @@
1
- module Saxerator
2
- class Document
3
- include Enumerable
4
-
5
- def initialize(source, config = nil, latches = [])
6
- @source = source
7
- @latches = latches
8
- @config = config
9
- end
10
-
11
- def for_tag(tag)
12
- Document.new(@source, @config, @latches + [Parser::ElementNameLatch.new(tag.to_s)])
13
- end
14
-
15
- def at_depth(depth)
16
- Document.new(@source, @config, @latches + [Parser::DepthLatch.new(depth.to_i)])
17
- end
18
-
19
- def within(tag)
20
- Document.new(@source, @config, @latches + [Parser::WithinElementLatch.new(tag.to_s)])
21
- end
22
-
23
- def each(&block)
24
- document = Parser::LatchedAccumulator.new(@config, @latches, block)
25
- parser = ::Nokogiri::XML::SAX::Parser.new document
26
-
27
- # Always have to start at the beginning of a File
28
- @source.rewind if(@source.is_a?(File))
29
-
30
- parser.parse(@source)
31
- end
32
- end
33
- end