nokogiri-streaming-reader 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 65e5209b0d9be2508b66a3ec3b4930ff01f1d265
4
+ data.tar.gz: 50bd545822035962339aed4d83c5f0a1bf3701ef
5
+ SHA512:
6
+ metadata.gz: 6795646ee413f21a40f45c8c83bbb50954cda906273f258021c3dfacc674e2a5d485bbe918512ff536582f632036bc000a4da0c99cc95475c6034ae01f4bbaee
7
+ data.tar.gz: f1e23979d3a5512653e4e90e833ed1c16a9b19943712895d7defadb1e3749555454cc6284a33f7e2e559a860c16dd31288bf45050d8037953446b92cd051a824
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Alexander Staubo
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,24 @@
1
+ # Simple streaming reader for Nokogiri
2
+
3
+ This library implements a very simple streaming parser that can parse large documents with low memory overhead.
4
+
5
+ When setting up the reader, one registers paths to capture:
6
+
7
+ reader.on('/path') do |e|
8
+ # ...
9
+ end
10
+
11
+ Each path must be a simple XPath-like path. Unlike XPath, only path segments are currently supported, however. The path must always start at the root.
12
+
13
+ Each registered handler receives the parsed element as its argument.
14
+
15
+ ## Example
16
+
17
+ reader = Nokogiri::Streaming::Reader.new(doc)
18
+ reader.on('/some/element/in/document') do |e|
19
+ # Handle element
20
+ end
21
+ reader.on('/some/other/element/in/document') do |e|
22
+ # Handle element
23
+ end
24
+ reader.run
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,89 @@
1
+ require 'nokogiri'
2
+
3
+ module Nokogiri
4
+ module Streaming
5
+
6
+ class Reader < XML::SAX::Document
7
+
8
+ def initialize(source)
9
+ @source = source
10
+ @parser = Nokogiri::XML::SAX::Parser.new(self)
11
+ @stack = []
12
+ @triggers = {}
13
+ end
14
+
15
+ def run
16
+ @parser.parse(@source)
17
+ end
18
+
19
+ def on(path, &block)
20
+ (@triggers[path] ||= []).push(block)
21
+ end
22
+
23
+ def current_path
24
+ if @stack.any?
25
+ '/' + @stack.join('/')
26
+ else
27
+ ''
28
+ end
29
+ end
30
+
31
+ #
32
+ # The following methods are SAX callbacks from Nokogiri.
33
+ #
34
+
35
+ def end_document
36
+ end
37
+
38
+ def start_element(name, attrs = [])
39
+ if @current
40
+ element = @current.document.create_element(name)
41
+ attrs.each do |name, value|
42
+ element[name] = value
43
+ end
44
+ @current.add_child(element)
45
+ @current = element
46
+ elsif @triggers[current_path + '/' + name]
47
+ fragment = Nokogiri::XML::DocumentFragment.new(Nokogiri::XML::Document.new)
48
+
49
+ element = fragment.document.create_element(name)
50
+ attrs.each do |name, value|
51
+ element[name] = value
52
+ end
53
+ @current = element
54
+ end
55
+ @stack.push(name)
56
+ end
57
+
58
+ def end_element(name)
59
+ path = current_path
60
+
61
+ @stack.pop
62
+
63
+ element = @current
64
+ if @current
65
+ @current = @current.parent
66
+ end
67
+
68
+ triggers = @triggers[path]
69
+ if triggers
70
+ triggers.each do |proc|
71
+ proc.call(element)
72
+ end
73
+ end
74
+ end
75
+
76
+ def characters(string)
77
+ if @current
78
+ @current.add_child(string)
79
+ end
80
+ end
81
+
82
+ def cdata_block(string)
83
+ characters(string)
84
+ end
85
+
86
+ end
87
+
88
+ end
89
+ end
@@ -0,0 +1,5 @@
1
+ module Nokogiri
2
+ module Streaming
3
+ VERSION = '0.0.1'
4
+ end
5
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'nokogiri/streaming/version'
7
+
8
+ Gem::Specification.new do |spec|
9
+ spec.name = "nokogiri-streaming-reader"
10
+ spec.version = Nokogiri::Streaming::VERSION
11
+ spec.authors = ["Alexander Staubo"]
12
+ spec.email = ["alex@bengler.no"]
13
+ spec.summary =
14
+ spec.description = %q{Simple streaming reader for Nokogiri.}
15
+ spec.homepage = ""
16
+ spec.license = "MIT"
17
+
18
+ spec.files = `git ls-files -z`.split("\x0")
19
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
+ spec.require_paths = ["lib"]
22
+
23
+ spec.add_dependency "nokogiri", ">= 1.4"
24
+ spec.add_development_dependency "bundler", "~> 1.5"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "rspec"
27
+ end
@@ -0,0 +1,40 @@
1
+ require 'spec_helper'
2
+
3
+ describe Nokogiri::Streaming::Reader do
4
+
5
+ subject do
6
+ Nokogiri::Streaming::Reader
7
+ end
8
+
9
+ it 'parses registered paths' do
10
+ doc = %{
11
+ <root>
12
+ <fruit/>
13
+ <fruit/>
14
+ <vegetable id='1'><seed/></vegetable>
15
+ <meat><type>beef</type></meat>
16
+ </root>
17
+ }
18
+
19
+ fruits = []
20
+ vegetables = []
21
+ meats = []
22
+
23
+ reader = subject.new(doc)
24
+ reader.on('/root/fruit') do |e|
25
+ fruits.push(e.to_xml(indent: 0))
26
+ end
27
+ reader.on('/root/vegetable') do |e|
28
+ vegetables.push(e.to_xml(indent: 0))
29
+ end
30
+ reader.on('/root/meat/type') do |e|
31
+ meats.push(e.to_xml(indent: 0))
32
+ end
33
+ reader.run
34
+
35
+ expect(fruits).to eq ['<fruit/>', '<fruit/>']
36
+ expect(vegetables).to eq ["<vegetable id=\"1\">\n<seed/>\n</vegetable>"]
37
+ expect(meats).to eq ["<type>beef</type>"]
38
+ end
39
+
40
+ end
@@ -0,0 +1,3 @@
1
+ require 'rspec'
2
+
3
+ require_relative '../lib/nokogiri/streaming/reader'
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nokogiri-streaming-reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Alexander Staubo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '1.4'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '1.4'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.5'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.5'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Simple streaming reader for Nokogiri.
70
+ email:
71
+ - alex@bengler.no
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - .gitignore
77
+ - Gemfile
78
+ - LICENSE.txt
79
+ - README.md
80
+ - Rakefile
81
+ - lib/nokogiri/streaming/reader.rb
82
+ - lib/nokogiri/streaming/version.rb
83
+ - nokogiri-streaming-reader.gemspec
84
+ - spec/reader_spec.rb
85
+ - spec/spec_helper.rb
86
+ homepage: ''
87
+ licenses:
88
+ - MIT
89
+ metadata: {}
90
+ post_install_message:
91
+ rdoc_options: []
92
+ require_paths:
93
+ - lib
94
+ required_ruby_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ required_rubygems_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 2.0.3
107
+ signing_key:
108
+ specification_version: 4
109
+ summary: Simple streaming reader for Nokogiri.
110
+ test_files:
111
+ - spec/reader_spec.rb
112
+ - spec/spec_helper.rb
113
+ has_rdoc: