nokogiri-streaming-reader 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 65e5209b0d9be2508b66a3ec3b4930ff01f1d265
4
+ data.tar.gz: 50bd545822035962339aed4d83c5f0a1bf3701ef
5
+ SHA512:
6
+ metadata.gz: 6795646ee413f21a40f45c8c83bbb50954cda906273f258021c3dfacc674e2a5d485bbe918512ff536582f632036bc000a4da0c99cc95475c6034ae01f4bbaee
7
+ data.tar.gz: f1e23979d3a5512653e4e90e833ed1c16a9b19943712895d7defadb1e3749555454cc6284a33f7e2e559a860c16dd31288bf45050d8037953446b92cd051a824
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Alexander Staubo
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,24 @@
1
+ # Simple streaming reader for Nokogiri
2
+
3
+ This library implements a very simple streaming parser that can parse large documents with low memory overhead.
4
+
5
+ When setting up the reader, one registers paths to capture:
6
+
7
+ reader.on('/path') do |e|
8
+ # ...
9
+ end
10
+
11
+ Each path must be a simple XPath-like path. Unlike XPath, only path segments are currently supported, however. The path must always start at the root.
12
+
13
+ Each registered handler receives the parsed element as its argument.
14
+
15
+ ## Example
16
+
17
+ reader = Nokogiri::Streaming::Reader.new(doc)
18
+ reader.on('/some/element/in/document') do |e|
19
+ # Handle element
20
+ end
21
+ reader.on('/some/other/element/in/document') do |e|
22
+ # Handle element
23
+ end
24
+ reader.run
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,89 @@
1
+ require 'nokogiri'
2
+
3
+ module Nokogiri
4
+ module Streaming
5
+
6
+ class Reader < XML::SAX::Document
7
+
8
+ def initialize(source)
9
+ @source = source
10
+ @parser = Nokogiri::XML::SAX::Parser.new(self)
11
+ @stack = []
12
+ @triggers = {}
13
+ end
14
+
15
+ def run
16
+ @parser.parse(@source)
17
+ end
18
+
19
+ def on(path, &block)
20
+ (@triggers[path] ||= []).push(block)
21
+ end
22
+
23
+ def current_path
24
+ if @stack.any?
25
+ '/' + @stack.join('/')
26
+ else
27
+ ''
28
+ end
29
+ end
30
+
31
+ #
32
+ # The following methods are SAX callbacks from Nokogiri.
33
+ #
34
+
35
+ def end_document
36
+ end
37
+
38
+ def start_element(name, attrs = [])
39
+ if @current
40
+ element = @current.document.create_element(name)
41
+ attrs.each do |name, value|
42
+ element[name] = value
43
+ end
44
+ @current.add_child(element)
45
+ @current = element
46
+ elsif @triggers[current_path + '/' + name]
47
+ fragment = Nokogiri::XML::DocumentFragment.new(Nokogiri::XML::Document.new)
48
+
49
+ element = fragment.document.create_element(name)
50
+ attrs.each do |name, value|
51
+ element[name] = value
52
+ end
53
+ @current = element
54
+ end
55
+ @stack.push(name)
56
+ end
57
+
58
+ def end_element(name)
59
+ path = current_path
60
+
61
+ @stack.pop
62
+
63
+ element = @current
64
+ if @current
65
+ @current = @current.parent
66
+ end
67
+
68
+ triggers = @triggers[path]
69
+ if triggers
70
+ triggers.each do |proc|
71
+ proc.call(element)
72
+ end
73
+ end
74
+ end
75
+
76
+ def characters(string)
77
+ if @current
78
+ @current.add_child(string)
79
+ end
80
+ end
81
+
82
+ def cdata_block(string)
83
+ characters(string)
84
+ end
85
+
86
+ end
87
+
88
+ end
89
+ end
@@ -0,0 +1,5 @@
1
+ module Nokogiri
2
+ module Streaming
3
+ VERSION = '0.0.1'
4
+ end
5
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'nokogiri/streaming/version'
7
+
8
+ Gem::Specification.new do |spec|
9
+ spec.name = "nokogiri-streaming-reader"
10
+ spec.version = Nokogiri::Streaming::VERSION
11
+ spec.authors = ["Alexander Staubo"]
12
+ spec.email = ["alex@bengler.no"]
13
+ spec.summary =
14
+ spec.description = %q{Simple streaming reader for Nokogiri.}
15
+ spec.homepage = ""
16
+ spec.license = "MIT"
17
+
18
+ spec.files = `git ls-files -z`.split("\x0")
19
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
+ spec.require_paths = ["lib"]
22
+
23
+ spec.add_dependency "nokogiri", ">= 1.4"
24
+ spec.add_development_dependency "bundler", "~> 1.5"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "rspec"
27
+ end
@@ -0,0 +1,40 @@
1
+ require 'spec_helper'
2
+
3
+ describe Nokogiri::Streaming::Reader do
4
+
5
+ subject do
6
+ Nokogiri::Streaming::Reader
7
+ end
8
+
9
+ it 'parses registered paths' do
10
+ doc = %{
11
+ <root>
12
+ <fruit/>
13
+ <fruit/>
14
+ <vegetable id='1'><seed/></vegetable>
15
+ <meat><type>beef</type></meat>
16
+ </root>
17
+ }
18
+
19
+ fruits = []
20
+ vegetables = []
21
+ meats = []
22
+
23
+ reader = subject.new(doc)
24
+ reader.on('/root/fruit') do |e|
25
+ fruits.push(e.to_xml(indent: 0))
26
+ end
27
+ reader.on('/root/vegetable') do |e|
28
+ vegetables.push(e.to_xml(indent: 0))
29
+ end
30
+ reader.on('/root/meat/type') do |e|
31
+ meats.push(e.to_xml(indent: 0))
32
+ end
33
+ reader.run
34
+
35
+ expect(fruits).to eq ['<fruit/>', '<fruit/>']
36
+ expect(vegetables).to eq ["<vegetable id=\"1\">\n<seed/>\n</vegetable>"]
37
+ expect(meats).to eq ["<type>beef</type>"]
38
+ end
39
+
40
+ end
@@ -0,0 +1,3 @@
1
+ require 'rspec'
2
+
3
+ require_relative '../lib/nokogiri/streaming/reader'
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nokogiri-streaming-reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Alexander Staubo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '1.4'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '1.4'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.5'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.5'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Simple streaming reader for Nokogiri.
70
+ email:
71
+ - alex@bengler.no
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - .gitignore
77
+ - Gemfile
78
+ - LICENSE.txt
79
+ - README.md
80
+ - Rakefile
81
+ - lib/nokogiri/streaming/reader.rb
82
+ - lib/nokogiri/streaming/version.rb
83
+ - nokogiri-streaming-reader.gemspec
84
+ - spec/reader_spec.rb
85
+ - spec/spec_helper.rb
86
+ homepage: ''
87
+ licenses:
88
+ - MIT
89
+ metadata: {}
90
+ post_install_message:
91
+ rdoc_options: []
92
+ require_paths:
93
+ - lib
94
+ required_ruby_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ required_rubygems_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 2.0.3
107
+ signing_key:
108
+ specification_version: 4
109
+ summary: Simple streaming reader for Nokogiri.
110
+ test_files:
111
+ - spec/reader_spec.rb
112
+ - spec/spec_helper.rb
113
+ has_rdoc: