saxerator 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +6 -0
- data/benchmark/benchmark.rb +10 -5
- data/benchmark/profile.rb +21 -0
- data/lib/saxerator.rb +5 -4
- data/lib/saxerator/document_fragment.rb +24 -0
- data/lib/saxerator/dsl.rb +15 -0
- data/lib/saxerator/full_document.rb +17 -0
- data/lib/saxerator/parser/depth_latch.rb +2 -0
- data/lib/saxerator/parser/element_name_latch.rb +2 -0
- data/lib/saxerator/parser/within_element_latch.rb +2 -0
- data/lib/saxerator/version.rb +1 -1
- data/spec/lib/saxerator_spec.rb +4 -0
- metadata +6 -3
- data/lib/saxerator/document.rb +0 -33
data/README.md
CHANGED
@@ -45,6 +45,12 @@ parser.for_tag(:name).at_depth(2).each { |x| names_nested_under_document_root <<
|
|
45
45
|
parser.for_tag(:name).within(:author).each { |x| author_names << x }
|
46
46
|
```
|
47
47
|
|
48
|
+
Don't care about memory/streaming, you just want your xml in one big hash? Saxerator can do that too.
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
parser.all # big, giant hash
|
52
|
+
```
|
53
|
+
|
48
54
|
Known Issues
|
49
55
|
------------
|
50
56
|
* JRuby closes the file stream at the end of parsing, therefor to perform multiple operations
|
data/benchmark/benchmark.rb
CHANGED
@@ -9,10 +9,15 @@ if !File.exists?(file)
|
|
9
9
|
end
|
10
10
|
file = File.new(file)
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
count = 0
|
12
|
+
count = count2 = count3 = count4 = 0
|
15
13
|
Benchmark.bm do |x|
|
16
|
-
x.report { Saxerator.parser(file).for_tag(
|
14
|
+
x.report('for_tag') { Saxerator.parser(file).for_tag(:artist).each { count = count + 1 } }
|
15
|
+
x.report('at_depth') { Saxerator.parser(file).at_depth(2).each { count2 = count2 + 1 } }
|
16
|
+
x.report('within') { Saxerator.parser(file).within(:artists).each { count3 = count3 + 1 } }
|
17
|
+
x.report('composite') { Saxerator.parser(file).for_tag(:name).within(:artist).at_depth(3).each { count4 = count4 + 1} }
|
17
18
|
end
|
18
|
-
|
19
|
+
|
20
|
+
puts "for_tag: #{count} artist elements parsed"
|
21
|
+
puts "at_depth: #{count2} elements parsed"
|
22
|
+
puts "within: #{count3} artists children parsed"
|
23
|
+
puts "composite: #{count4} names within artist nested 3 tags deep parsed"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
$:.push File.expand_path('../../lib', __FILE__)
|
2
|
+
require 'saxerator'
|
3
|
+
require 'ruby-prof'
|
4
|
+
|
5
|
+
file = ARGV.shift
|
6
|
+
if !File.exists?(file)
|
7
|
+
puts "Cannot find file #{file}"
|
8
|
+
exit 1
|
9
|
+
end
|
10
|
+
file = File.new(file)
|
11
|
+
|
12
|
+
count = 0
|
13
|
+
RubyProf.start
|
14
|
+
|
15
|
+
Saxerator.parser(file).for_tag(:artist).each { count = count + 1 }
|
16
|
+
|
17
|
+
result = RubyProf.stop
|
18
|
+
printer = RubyProf::FlatPrinter.new(result)
|
19
|
+
printer.print(STDOUT)
|
20
|
+
|
21
|
+
puts "for_tag: #{count} artist elements parsed"
|
data/lib/saxerator.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
|
3
|
-
require
|
4
|
-
|
3
|
+
require 'saxerator/version'
|
4
|
+
|
5
|
+
require 'saxerator/full_document'
|
6
|
+
require 'saxerator/document_fragment'
|
5
7
|
require 'saxerator/string_with_attributes'
|
6
8
|
require 'saxerator/hash_with_attributes'
|
7
9
|
require 'saxerator/xml_node'
|
8
10
|
|
9
11
|
require 'saxerator/parser/accumulator'
|
10
|
-
require 'saxerator/parser/document_latch'
|
11
12
|
require 'saxerator/parser/element_name_latch'
|
12
13
|
require 'saxerator/parser/depth_latch'
|
13
14
|
require 'saxerator/parser/within_element_latch'
|
@@ -17,6 +18,6 @@ module Saxerator
|
|
17
18
|
extend self
|
18
19
|
|
19
20
|
def parser(xml)
|
20
|
-
Saxerator::
|
21
|
+
Saxerator::FullDocument.new(xml)
|
21
22
|
end
|
22
23
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'saxerator/dsl'
|
2
|
+
|
3
|
+
module Saxerator
|
4
|
+
class DocumentFragment
|
5
|
+
include Enumerable
|
6
|
+
include DSL
|
7
|
+
|
8
|
+
def initialize(source, config = nil, latches = [])
|
9
|
+
@source = source
|
10
|
+
@latches = latches
|
11
|
+
@config = config
|
12
|
+
end
|
13
|
+
|
14
|
+
def each(&block)
|
15
|
+
reader = Parser::LatchedAccumulator.new(@config, @latches, block)
|
16
|
+
parser = ::Nokogiri::XML::SAX::Parser.new(reader)
|
17
|
+
|
18
|
+
# Always have to start at the beginning of a File
|
19
|
+
@source.rewind if(@source.is_a?(File))
|
20
|
+
|
21
|
+
parser.parse(@source)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Saxerator
|
2
|
+
module DSL
|
3
|
+
def for_tag(tag)
|
4
|
+
DocumentFragment.new(@source, @config, @latches + [Parser::ElementNameLatch.new(tag.to_s)])
|
5
|
+
end
|
6
|
+
|
7
|
+
def at_depth(depth)
|
8
|
+
DocumentFragment.new(@source, @config, @latches + [Parser::DepthLatch.new(depth.to_i)])
|
9
|
+
end
|
10
|
+
|
11
|
+
def within(tag)
|
12
|
+
DocumentFragment.new(@source, @config, @latches + [Parser::WithinElementLatch.new(tag.to_s)])
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'saxerator/dsl'
|
2
|
+
|
3
|
+
module Saxerator
|
4
|
+
class FullDocument
|
5
|
+
include DSL
|
6
|
+
|
7
|
+
def initialize(source, config = nil)
|
8
|
+
@source = source
|
9
|
+
@config = config
|
10
|
+
@latches = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def all
|
14
|
+
DocumentFragment.new(@source, @config, @latches).first
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/saxerator/version.rb
CHANGED
data/spec/lib/saxerator_spec.rb
CHANGED
@@ -20,6 +20,10 @@ describe Saxerator do
|
|
20
20
|
results.should == ['one', 'two', 'three']
|
21
21
|
end
|
22
22
|
|
23
|
+
it "should allow you to parse an entire document" do
|
24
|
+
subject.all.should == {'blurb' => ['one', 'two', 'three']}
|
25
|
+
end
|
26
|
+
|
23
27
|
context "and one non-blurb" do
|
24
28
|
let(:xml) { "<blurbs><blurb>one</blurb><blurb>two</blurb><blurb>three</blurb><notablurb>four</notablurb></blurbs>" }
|
25
29
|
it "should only parse the requested tag" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -78,7 +78,9 @@ files:
|
|
78
78
|
- .rvmrc
|
79
79
|
- .gitignore
|
80
80
|
- .travis.yml
|
81
|
-
- lib/saxerator/
|
81
|
+
- lib/saxerator/document_fragment.rb
|
82
|
+
- lib/saxerator/dsl.rb
|
83
|
+
- lib/saxerator/full_document.rb
|
82
84
|
- lib/saxerator/hash_with_attributes.rb
|
83
85
|
- lib/saxerator/parser/accumulator.rb
|
84
86
|
- lib/saxerator/parser/depth_latch.rb
|
@@ -96,6 +98,7 @@ files:
|
|
96
98
|
- spec/spec_helper.rb
|
97
99
|
- benchmark/benchmark.rb
|
98
100
|
- benchmark/generate_sample_file.rb
|
101
|
+
- benchmark/profile.rb
|
99
102
|
homepage: https://github.com/soulcutter/saxerator
|
100
103
|
licenses:
|
101
104
|
- MIT
|
data/lib/saxerator/document.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
module Saxerator
|
2
|
-
class Document
|
3
|
-
include Enumerable
|
4
|
-
|
5
|
-
def initialize(source, config = nil, latches = [])
|
6
|
-
@source = source
|
7
|
-
@latches = latches
|
8
|
-
@config = config
|
9
|
-
end
|
10
|
-
|
11
|
-
def for_tag(tag)
|
12
|
-
Document.new(@source, @config, @latches + [Parser::ElementNameLatch.new(tag.to_s)])
|
13
|
-
end
|
14
|
-
|
15
|
-
def at_depth(depth)
|
16
|
-
Document.new(@source, @config, @latches + [Parser::DepthLatch.new(depth.to_i)])
|
17
|
-
end
|
18
|
-
|
19
|
-
def within(tag)
|
20
|
-
Document.new(@source, @config, @latches + [Parser::WithinElementLatch.new(tag.to_s)])
|
21
|
-
end
|
22
|
-
|
23
|
-
def each(&block)
|
24
|
-
document = Parser::LatchedAccumulator.new(@config, @latches, block)
|
25
|
-
parser = ::Nokogiri::XML::SAX::Parser.new document
|
26
|
-
|
27
|
-
# Always have to start at the beginning of a File
|
28
|
-
@source.rewind if(@source.is_a?(File))
|
29
|
-
|
30
|
-
parser.parse(@source)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|