saxerator 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -45,6 +45,12 @@ parser.for_tag(:name).at_depth(2).each { |x| names_nested_under_document_root <<
45
45
  parser.for_tag(:name).within(:author).each { |x| author_names << x }
46
46
  ```
47
47
 
48
+ Don't care about memory/streaming, you just want your xml in one big hash? Saxerator can do that too.
49
+
50
+ ```ruby
51
+ parser.all # big, giant hash
52
+ ```
53
+
48
54
  Known Issues
49
55
  ------------
50
56
  * JRuby closes the file stream at the end of parsing, therefor to perform multiple operations
@@ -9,10 +9,15 @@ if !File.exists?(file)
9
9
  end
10
10
  file = File.new(file)
11
11
 
12
- tag = ARGV.shift || :artist
13
-
14
- count = 0
12
+ count = count2 = count3 = count4 = 0
15
13
  Benchmark.bm do |x|
16
- x.report { Saxerator.parser(file).for_tag(tag).each { count = count + 1 } }
14
+ x.report('for_tag') { Saxerator.parser(file).for_tag(:artist).each { count = count + 1 } }
15
+ x.report('at_depth') { Saxerator.parser(file).at_depth(2).each { count2 = count2 + 1 } }
16
+ x.report('within') { Saxerator.parser(file).within(:artists).each { count3 = count3 + 1 } }
17
+ x.report('composite') { Saxerator.parser(file).for_tag(:name).within(:artist).at_depth(3).each { count4 = count4 + 1} }
17
18
  end
18
- puts "#{count} #{tag} elements parsed"
19
+
20
+ puts "for_tag: #{count} artist elements parsed"
21
+ puts "at_depth: #{count2} elements parsed"
22
+ puts "within: #{count3} artists children parsed"
23
+ puts "composite: #{count4} names within artist nested 3 tags deep parsed"
@@ -0,0 +1,21 @@
1
+ $:.push File.expand_path('../../lib', __FILE__)
2
+ require 'saxerator'
3
+ require 'ruby-prof'
4
+
5
+ file = ARGV.shift
6
+ if !File.exists?(file)
7
+ puts "Cannot find file #{file}"
8
+ exit 1
9
+ end
10
+ file = File.new(file)
11
+
12
+ count = 0
13
+ RubyProf.start
14
+
15
+ Saxerator.parser(file).for_tag(:artist).each { count = count + 1 }
16
+
17
+ result = RubyProf.stop
18
+ printer = RubyProf::FlatPrinter.new(result)
19
+ printer.print(STDOUT)
20
+
21
+ puts "for_tag: #{count} artist elements parsed"
data/lib/saxerator.rb CHANGED
@@ -1,13 +1,14 @@
1
1
  require 'nokogiri'
2
2
 
3
- require "saxerator/version"
4
- require 'saxerator/document'
3
+ require 'saxerator/version'
4
+
5
+ require 'saxerator/full_document'
6
+ require 'saxerator/document_fragment'
5
7
  require 'saxerator/string_with_attributes'
6
8
  require 'saxerator/hash_with_attributes'
7
9
  require 'saxerator/xml_node'
8
10
 
9
11
  require 'saxerator/parser/accumulator'
10
- require 'saxerator/parser/document_latch'
11
12
  require 'saxerator/parser/element_name_latch'
12
13
  require 'saxerator/parser/depth_latch'
13
14
  require 'saxerator/parser/within_element_latch'
@@ -17,6 +18,6 @@ module Saxerator
17
18
  extend self
18
19
 
19
20
  def parser(xml)
20
- Saxerator::Document.new(xml)
21
+ Saxerator::FullDocument.new(xml)
21
22
  end
22
23
  end
@@ -0,0 +1,24 @@
1
+ require 'saxerator/dsl'
2
+
3
+ module Saxerator
4
+ class DocumentFragment
5
+ include Enumerable
6
+ include DSL
7
+
8
+ def initialize(source, config = nil, latches = [])
9
+ @source = source
10
+ @latches = latches
11
+ @config = config
12
+ end
13
+
14
+ def each(&block)
15
+ reader = Parser::LatchedAccumulator.new(@config, @latches, block)
16
+ parser = ::Nokogiri::XML::SAX::Parser.new(reader)
17
+
18
+ # Always have to start at the beginning of a File
19
+ @source.rewind if(@source.is_a?(File))
20
+
21
+ parser.parse(@source)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,15 @@
1
+ module Saxerator
2
+ module DSL
3
+ def for_tag(tag)
4
+ DocumentFragment.new(@source, @config, @latches + [Parser::ElementNameLatch.new(tag.to_s)])
5
+ end
6
+
7
+ def at_depth(depth)
8
+ DocumentFragment.new(@source, @config, @latches + [Parser::DepthLatch.new(depth.to_i)])
9
+ end
10
+
11
+ def within(tag)
12
+ DocumentFragment.new(@source, @config, @latches + [Parser::WithinElementLatch.new(tag.to_s)])
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ require 'saxerator/dsl'
2
+
3
+ module Saxerator
4
+ class FullDocument
5
+ include DSL
6
+
7
+ def initialize(source, config = nil)
8
+ @source = source
9
+ @config = config
10
+ @latches = []
11
+ end
12
+
13
+ def all
14
+ DocumentFragment.new(@source, @config, @latches).first
15
+ end
16
+ end
17
+ end
@@ -1,3 +1,5 @@
1
+ require 'saxerator/parser/document_latch'
2
+
1
3
  module Saxerator
2
4
  module Parser
3
5
  class DepthLatch < DocumentLatch
@@ -1,3 +1,5 @@
1
+ require 'saxerator/parser/document_latch'
2
+
1
3
  module Saxerator
2
4
  module Parser
3
5
  class ElementNameLatch < DocumentLatch
@@ -1,3 +1,5 @@
1
+ require 'saxerator/parser/document_latch'
2
+
1
3
  module Saxerator
2
4
  module Parser
3
5
  class WithinElementLatch < DocumentLatch
@@ -1,3 +1,3 @@
1
1
  module Saxerator
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.4"
3
3
  end
@@ -20,6 +20,10 @@ describe Saxerator do
20
20
  results.should == ['one', 'two', 'three']
21
21
  end
22
22
 
23
+ it "should allow you to parse an entire document" do
24
+ subject.all.should == {'blurb' => ['one', 'two', 'three']}
25
+ end
26
+
23
27
  context "and one non-blurb" do
24
28
  let(:xml) { "<blurbs><blurb>one</blurb><blurb>two</blurb><blurb>three</blurb><notablurb>four</notablurb></blurbs>" }
25
29
  it "should only parse the requested tag" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: saxerator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-14 00:00:00.000000000 Z
12
+ date: 2012-04-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -78,7 +78,9 @@ files:
78
78
  - .rvmrc
79
79
  - .gitignore
80
80
  - .travis.yml
81
- - lib/saxerator/document.rb
81
+ - lib/saxerator/document_fragment.rb
82
+ - lib/saxerator/dsl.rb
83
+ - lib/saxerator/full_document.rb
82
84
  - lib/saxerator/hash_with_attributes.rb
83
85
  - lib/saxerator/parser/accumulator.rb
84
86
  - lib/saxerator/parser/depth_latch.rb
@@ -96,6 +98,7 @@ files:
96
98
  - spec/spec_helper.rb
97
99
  - benchmark/benchmark.rb
98
100
  - benchmark/generate_sample_file.rb
101
+ - benchmark/profile.rb
99
102
  homepage: https://github.com/soulcutter/saxerator
100
103
  licenses:
101
104
  - MIT
@@ -1,33 +0,0 @@
1
- module Saxerator
2
- class Document
3
- include Enumerable
4
-
5
- def initialize(source, config = nil, latches = [])
6
- @source = source
7
- @latches = latches
8
- @config = config
9
- end
10
-
11
- def for_tag(tag)
12
- Document.new(@source, @config, @latches + [Parser::ElementNameLatch.new(tag.to_s)])
13
- end
14
-
15
- def at_depth(depth)
16
- Document.new(@source, @config, @latches + [Parser::DepthLatch.new(depth.to_i)])
17
- end
18
-
19
- def within(tag)
20
- Document.new(@source, @config, @latches + [Parser::WithinElementLatch.new(tag.to_s)])
21
- end
22
-
23
- def each(&block)
24
- document = Parser::LatchedAccumulator.new(@config, @latches, block)
25
- parser = ::Nokogiri::XML::SAX::Parser.new document
26
-
27
- # Always have to start at the beginning of a File
28
- @source.rewind if(@source.is_a?(File))
29
-
30
- parser.parse(@source)
31
- end
32
- end
33
- end