saxerator 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +6 -0
- data/benchmark/benchmark.rb +10 -5
- data/benchmark/profile.rb +21 -0
- data/lib/saxerator.rb +5 -4
- data/lib/saxerator/document_fragment.rb +24 -0
- data/lib/saxerator/dsl.rb +15 -0
- data/lib/saxerator/full_document.rb +17 -0
- data/lib/saxerator/parser/depth_latch.rb +2 -0
- data/lib/saxerator/parser/element_name_latch.rb +2 -0
- data/lib/saxerator/parser/within_element_latch.rb +2 -0
- data/lib/saxerator/version.rb +1 -1
- data/spec/lib/saxerator_spec.rb +4 -0
- metadata +6 -3
- data/lib/saxerator/document.rb +0 -33
data/README.md
CHANGED
@@ -45,6 +45,12 @@ parser.for_tag(:name).at_depth(2).each { |x| names_nested_under_document_root <<
|
|
45
45
|
parser.for_tag(:name).within(:author).each { |x| author_names << x }
|
46
46
|
```
|
47
47
|
|
48
|
+
Don't care about memory/streaming, you just want your xml in one big hash? Saxerator can do that too.
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
parser.all # big, giant hash
|
52
|
+
```
|
53
|
+
|
48
54
|
Known Issues
|
49
55
|
------------
|
50
56
|
* JRuby closes the file stream at the end of parsing, therefor to perform multiple operations
|
data/benchmark/benchmark.rb
CHANGED
@@ -9,10 +9,15 @@ if !File.exists?(file)
|
|
9
9
|
end
|
10
10
|
file = File.new(file)
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
count = 0
|
12
|
+
count = count2 = count3 = count4 = 0
|
15
13
|
Benchmark.bm do |x|
|
16
|
-
x.report { Saxerator.parser(file).for_tag(
|
14
|
+
x.report('for_tag') { Saxerator.parser(file).for_tag(:artist).each { count = count + 1 } }
|
15
|
+
x.report('at_depth') { Saxerator.parser(file).at_depth(2).each { count2 = count2 + 1 } }
|
16
|
+
x.report('within') { Saxerator.parser(file).within(:artists).each { count3 = count3 + 1 } }
|
17
|
+
x.report('composite') { Saxerator.parser(file).for_tag(:name).within(:artist).at_depth(3).each { count4 = count4 + 1} }
|
17
18
|
end
|
18
|
-
|
19
|
+
|
20
|
+
puts "for_tag: #{count} artist elements parsed"
|
21
|
+
puts "at_depth: #{count2} elements parsed"
|
22
|
+
puts "within: #{count3} artists children parsed"
|
23
|
+
puts "composite: #{count4} names within artist nested 3 tags deep parsed"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
$:.push File.expand_path('../../lib', __FILE__)
|
2
|
+
require 'saxerator'
|
3
|
+
require 'ruby-prof'
|
4
|
+
|
5
|
+
file = ARGV.shift
|
6
|
+
if !File.exists?(file)
|
7
|
+
puts "Cannot find file #{file}"
|
8
|
+
exit 1
|
9
|
+
end
|
10
|
+
file = File.new(file)
|
11
|
+
|
12
|
+
count = 0
|
13
|
+
RubyProf.start
|
14
|
+
|
15
|
+
Saxerator.parser(file).for_tag(:artist).each { count = count + 1 }
|
16
|
+
|
17
|
+
result = RubyProf.stop
|
18
|
+
printer = RubyProf::FlatPrinter.new(result)
|
19
|
+
printer.print(STDOUT)
|
20
|
+
|
21
|
+
puts "for_tag: #{count} artist elements parsed"
|
data/lib/saxerator.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
|
3
|
-
require
|
4
|
-
|
3
|
+
require 'saxerator/version'
|
4
|
+
|
5
|
+
require 'saxerator/full_document'
|
6
|
+
require 'saxerator/document_fragment'
|
5
7
|
require 'saxerator/string_with_attributes'
|
6
8
|
require 'saxerator/hash_with_attributes'
|
7
9
|
require 'saxerator/xml_node'
|
8
10
|
|
9
11
|
require 'saxerator/parser/accumulator'
|
10
|
-
require 'saxerator/parser/document_latch'
|
11
12
|
require 'saxerator/parser/element_name_latch'
|
12
13
|
require 'saxerator/parser/depth_latch'
|
13
14
|
require 'saxerator/parser/within_element_latch'
|
@@ -17,6 +18,6 @@ module Saxerator
|
|
17
18
|
extend self
|
18
19
|
|
19
20
|
def parser(xml)
|
20
|
-
Saxerator::
|
21
|
+
Saxerator::FullDocument.new(xml)
|
21
22
|
end
|
22
23
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'saxerator/dsl'
|
2
|
+
|
3
|
+
module Saxerator
|
4
|
+
class DocumentFragment
|
5
|
+
include Enumerable
|
6
|
+
include DSL
|
7
|
+
|
8
|
+
def initialize(source, config = nil, latches = [])
|
9
|
+
@source = source
|
10
|
+
@latches = latches
|
11
|
+
@config = config
|
12
|
+
end
|
13
|
+
|
14
|
+
def each(&block)
|
15
|
+
reader = Parser::LatchedAccumulator.new(@config, @latches, block)
|
16
|
+
parser = ::Nokogiri::XML::SAX::Parser.new(reader)
|
17
|
+
|
18
|
+
# Always have to start at the beginning of a File
|
19
|
+
@source.rewind if(@source.is_a?(File))
|
20
|
+
|
21
|
+
parser.parse(@source)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Saxerator
|
2
|
+
module DSL
|
3
|
+
def for_tag(tag)
|
4
|
+
DocumentFragment.new(@source, @config, @latches + [Parser::ElementNameLatch.new(tag.to_s)])
|
5
|
+
end
|
6
|
+
|
7
|
+
def at_depth(depth)
|
8
|
+
DocumentFragment.new(@source, @config, @latches + [Parser::DepthLatch.new(depth.to_i)])
|
9
|
+
end
|
10
|
+
|
11
|
+
def within(tag)
|
12
|
+
DocumentFragment.new(@source, @config, @latches + [Parser::WithinElementLatch.new(tag.to_s)])
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'saxerator/dsl'
|
2
|
+
|
3
|
+
module Saxerator
|
4
|
+
class FullDocument
|
5
|
+
include DSL
|
6
|
+
|
7
|
+
def initialize(source, config = nil)
|
8
|
+
@source = source
|
9
|
+
@config = config
|
10
|
+
@latches = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def all
|
14
|
+
DocumentFragment.new(@source, @config, @latches).first
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/saxerator/version.rb
CHANGED
data/spec/lib/saxerator_spec.rb
CHANGED
@@ -20,6 +20,10 @@ describe Saxerator do
|
|
20
20
|
results.should == ['one', 'two', 'three']
|
21
21
|
end
|
22
22
|
|
23
|
+
it "should allow you to parse an entire document" do
|
24
|
+
subject.all.should == {'blurb' => ['one', 'two', 'three']}
|
25
|
+
end
|
26
|
+
|
23
27
|
context "and one non-blurb" do
|
24
28
|
let(:xml) { "<blurbs><blurb>one</blurb><blurb>two</blurb><blurb>three</blurb><notablurb>four</notablurb></blurbs>" }
|
25
29
|
it "should only parse the requested tag" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -78,7 +78,9 @@ files:
|
|
78
78
|
- .rvmrc
|
79
79
|
- .gitignore
|
80
80
|
- .travis.yml
|
81
|
-
- lib/saxerator/
|
81
|
+
- lib/saxerator/document_fragment.rb
|
82
|
+
- lib/saxerator/dsl.rb
|
83
|
+
- lib/saxerator/full_document.rb
|
82
84
|
- lib/saxerator/hash_with_attributes.rb
|
83
85
|
- lib/saxerator/parser/accumulator.rb
|
84
86
|
- lib/saxerator/parser/depth_latch.rb
|
@@ -96,6 +98,7 @@ files:
|
|
96
98
|
- spec/spec_helper.rb
|
97
99
|
- benchmark/benchmark.rb
|
98
100
|
- benchmark/generate_sample_file.rb
|
101
|
+
- benchmark/profile.rb
|
99
102
|
homepage: https://github.com/soulcutter/saxerator
|
100
103
|
licenses:
|
101
104
|
- MIT
|
data/lib/saxerator/document.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
module Saxerator
|
2
|
-
class Document
|
3
|
-
include Enumerable
|
4
|
-
|
5
|
-
def initialize(source, config = nil, latches = [])
|
6
|
-
@source = source
|
7
|
-
@latches = latches
|
8
|
-
@config = config
|
9
|
-
end
|
10
|
-
|
11
|
-
def for_tag(tag)
|
12
|
-
Document.new(@source, @config, @latches + [Parser::ElementNameLatch.new(tag.to_s)])
|
13
|
-
end
|
14
|
-
|
15
|
-
def at_depth(depth)
|
16
|
-
Document.new(@source, @config, @latches + [Parser::DepthLatch.new(depth.to_i)])
|
17
|
-
end
|
18
|
-
|
19
|
-
def within(tag)
|
20
|
-
Document.new(@source, @config, @latches + [Parser::WithinElementLatch.new(tag.to_s)])
|
21
|
-
end
|
22
|
-
|
23
|
-
def each(&block)
|
24
|
-
document = Parser::LatchedAccumulator.new(@config, @latches, block)
|
25
|
-
parser = ::Nokogiri::XML::SAX::Parser.new document
|
26
|
-
|
27
|
-
# Always have to start at the beginning of a File
|
28
|
-
@source.rewind if(@source.is_a?(File))
|
29
|
-
|
30
|
-
parser.parse(@source)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|