saxerator 0.9.8 → 0.9.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.circleci/config.yml +19 -0
- data/Gemfile +1 -1
- data/README.md +11 -7
- data/Rakefile +1 -1
- data/benchmark/benchmark.rb +1 -1
- data/lib/saxerator.rb +3 -1
- data/lib/saxerator/adapters/nokogiri.rb +4 -0
- data/lib/saxerator/adapters/oga.rb +44 -0
- data/lib/saxerator/adapters/ox.rb +4 -0
- data/lib/saxerator/adapters/rexml.rb +2 -1
- data/lib/saxerator/builder/array_element.rb +3 -2
- data/lib/saxerator/builder/hash_builder.rb +15 -11
- data/lib/saxerator/builder/string_element.rb +1 -1
- data/lib/saxerator/configuration.rb +5 -5
- data/lib/saxerator/document_fragment.rb +2 -0
- data/lib/saxerator/latches/child_of.rb +1 -1
- data/lib/saxerator/parser/accumulator.rb +1 -1
- data/lib/saxerator/version.rb +1 -1
- data/saxerator.gemspec +2 -1
- data/spec/fixtures/mixed_text_with_elements.xml +2 -0
- data/spec/lib/builder/hash_builder_spec.rb +20 -15
- data/spec/lib/builder/xml_builder_spec.rb +2 -1
- data/spec/lib/dsl/for_tags_spec.rb +1 -1
- data/spec/lib/dsl/with_attributes_spec.rb +1 -1
- data/spec/lib/saxerator_spec.rb +42 -1
- data/spec/spec_helper.rb +1 -1
- metadata +30 -15
- data/.travis.yml +0 -11
- data/lib/saxerator/builder/empty_element.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3ef3a973685c23f4ddb66241eae62350d3149ab3d55620825e0d49a8f014f0e9
|
4
|
+
data.tar.gz: 217fba2c53d19fa17183116c0c115618b4cfa0c2bdc8d89e0fbae0900a20c497
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6967cab1638d25d5544dacd15b4ac1f93c1b72552cd24f83c493ac64574c4ce9977843194b12535a7fc2e089ffe4aae3c105efba9003d0fcc3de052eb2cb350d
|
7
|
+
data.tar.gz: 7a9f5f932d43bd398773174cf2757a5bdf6951fe64296ed117772f80b6bc0f494dd50c9e417dc959c8980636821a75441ca738f11573b81068954f5eb792a227
|
@@ -0,0 +1,19 @@
|
|
1
|
+
version: 2.1
|
2
|
+
orbs:
|
3
|
+
ruby: circleci/ruby@0.2.1
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
docker:
|
8
|
+
- image: circleci/ruby:2.6.3-stretch-node
|
9
|
+
steps:
|
10
|
+
- checkout
|
11
|
+
- run:
|
12
|
+
name: Resolve ruby bundle
|
13
|
+
command: bundle lock
|
14
|
+
- ruby/load-cache
|
15
|
+
- ruby/install-deps
|
16
|
+
- ruby/save-cache
|
17
|
+
- run:
|
18
|
+
name: Run tests across all adapters
|
19
|
+
command: "bundle exec rake spec:adapters"
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Saxerator [![
|
1
|
+
Saxerator [![soulcutter](https://circleci.com/gh/soulcutter/saxerator.svg?style=shield)](https://circleci.com/gh/soulcutter/saxerator)[![Code Climate](https://codeclimate.com/github/soulcutter/saxerator.png)](https://codeclimate.com/github/soulcutter/saxerator)
|
2
2
|
=========
|
3
3
|
|
4
4
|
Saxerator is a streaming xml-to-hash parser designed for working with very large xml files by
|
@@ -39,6 +39,9 @@ for added clarity).
|
|
39
39
|
| `with_attribute(name, value)` | Elements that have an attribute with a given `name` and `value`. If no `value` is given, matches any element with the specified attribute name present
|
40
40
|
| `with_attributes(attrs)` | Similar to `with_attribute` except takes an Array or Hash indicating the attributes to match
|
41
41
|
|
42
|
+
On any parsing error it'll raise an `Saxerator::ParseException` exception with the message that describe what is wrong on XML document.
|
43
|
+
**Warning** Rexml won't raise and error if root elent wasn't closed. (will be fixed on ruby 2.5)
|
44
|
+
|
42
45
|
Examples
|
43
46
|
--------
|
44
47
|
```ruby
|
@@ -99,7 +102,7 @@ end
|
|
99
102
|
|
100
103
|
| Setting | Default | Values | Description
|
101
104
|
|:------------------|:--------|-----------------|------------
|
102
|
-
| `adapter` | `:nokogiri` | `:nokogiri`, `:ox`, `:rexml` | The XML parser used by Saxerator |
|
105
|
+
| `adapter` | `:nokogiri` | `:nokogiri`, `:oga`, `:ox`, `:rexml` | The XML parser used by Saxerator |
|
103
106
|
| `output_type` | `:hash` | `:hash`, `:xml` | The type of object generated by Saxerator's parsing. `:hash` generates a Ruby Hash, `:xml` generates a `REXML::Document`
|
104
107
|
| `symbolize_keys!` | n/a | n/a | Call this method if you want the hash keys to be symbols rather than strings
|
105
108
|
| `ignore_namespaces!`| n/a | n/a | Call this method if you want to treat the XML document as if it has no namespace information. It differs slightly from `strip_namespaces!` since it deals with how the XML is processed rather than how it is output
|
@@ -115,6 +118,7 @@ Other Documentation
|
|
115
118
|
-------------------
|
116
119
|
* [REXML](http://www.germane-software.com/software/rexml/) ([api docs](http://ruby-doc.org/stdlib-2.4.0/libdoc/rexml/rdoc/REXML/Document.html))
|
117
120
|
* [Nokogiri](http://www.nokogiri.org/) ([api docs](http://www.rubydoc.info/github/sparklemotion/nokogiri))
|
121
|
+
* [Oga](https://github.com/YorickPeterse/oga) ([api docs](http://code.yorickpeterse.com/oga/latest/))
|
118
122
|
* [Ox](https://github.com/ohler55/ox) ([api docs](http://www.ohler.com/ox/))
|
119
123
|
|
120
124
|
FAQ
|
@@ -140,10 +144,10 @@ When I fetch a tag that has one or more elements, sometimes I get an `Array`, an
|
|
140
144
|
> You can treat objects consistently as arrays using
|
141
145
|
> [Ruby's built-in array conversion method](http://www.ruby-doc.org/core-2.1.1/Kernel.html#method-i-Array)
|
142
146
|
> in the form `Array(element_or_array)`
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
>
|
147
|
+
|
148
|
+
Why Active Record fails when I'm passing String value to the query?
|
149
|
+
|
150
|
+
> Saxerator doesn't return Array, Hash or String to you. But you can convert it to needed type by calling `.to_<type>` method as you usually do.
|
147
151
|
|
148
152
|
### Contribution ###
|
149
153
|
|
@@ -154,4 +158,4 @@ Saxerator was inspired by - but not affiliated with - [nori](https://github.com/
|
|
154
158
|
[Practicing Ruby](http://practicingruby.com/)
|
155
159
|
|
156
160
|
#### Legal Stuff ####
|
157
|
-
Copyright © 2012-
|
161
|
+
Copyright © 2012-2020 Bradley Schaefer. MIT License (see LICENSE file).
|
data/Rakefile
CHANGED
@@ -9,7 +9,7 @@ task default: :spec
|
|
9
9
|
namespace :spec do
|
10
10
|
desc 'Run specs against all available adapters'
|
11
11
|
task :adapters do |_|
|
12
|
-
%w
|
12
|
+
%w[nokogiri ox oga rexml].each do |adapter|
|
13
13
|
ENV['SAXERATOR_ADAPTER'] = adapter
|
14
14
|
Rake::Task['spec'].invoke
|
15
15
|
::Rake.application['spec'].reenable
|
data/benchmark/benchmark.rb
CHANGED
data/lib/saxerator.rb
CHANGED
@@ -8,7 +8,6 @@ require 'saxerator/configuration'
|
|
8
8
|
|
9
9
|
require 'saxerator/builder'
|
10
10
|
require 'saxerator/builder/array_element'
|
11
|
-
require 'saxerator/builder/empty_element'
|
12
11
|
require 'saxerator/builder/hash_element'
|
13
12
|
require 'saxerator/builder/string_element'
|
14
13
|
require 'saxerator/builder/hash_builder'
|
@@ -24,6 +23,9 @@ require 'saxerator/latches/child_of'
|
|
24
23
|
require 'saxerator/latches/with_attributes'
|
25
24
|
|
26
25
|
module Saxerator
|
26
|
+
class ParseException < StandardError
|
27
|
+
end
|
28
|
+
|
27
29
|
extend self
|
28
30
|
|
29
31
|
def parser(xml)
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'oga'
|
3
|
+
|
4
|
+
module Saxerator
|
5
|
+
module Adapters
|
6
|
+
class Oga
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
def self.parse(source, reader)
|
10
|
+
parser = ::Oga::XML::SaxParser.new(new(reader), source, strict: true)
|
11
|
+
parser.parse
|
12
|
+
rescue LL::ParserError => message
|
13
|
+
raise Saxerator::ParseException, message
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(reader)
|
17
|
+
@reader = reader
|
18
|
+
@ignore_namespaces = reader.ignore_namespaces?
|
19
|
+
end
|
20
|
+
|
21
|
+
def_delegator :@reader, :characters, :on_text
|
22
|
+
def_delegator :@reader, :characters, :on_cdata
|
23
|
+
|
24
|
+
def on_element(namespace, name, attrs = {})
|
25
|
+
name = "#{namespace}:#{name}" if namespace && !@ignore_namespaces
|
26
|
+
attrs = @ignore_namespaces ? strip_namespace(attrs) : attrs.to_a
|
27
|
+
@reader.start_element(name, attrs)
|
28
|
+
end
|
29
|
+
|
30
|
+
def after_element(namespace, name)
|
31
|
+
name = "#{namespace}:#{name}" if namespace && !@ignore_namespaces
|
32
|
+
@reader.end_element(name)
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def strip_namespace(attrs)
|
38
|
+
attrs.map { |k, v| [k.gsub(NAMESPACE_MATCHER, ''), v] }
|
39
|
+
end
|
40
|
+
|
41
|
+
NAMESPACE_MATCHER = /\A.+:/
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -11,6 +11,8 @@ module Saxerator
|
|
11
11
|
def self.parse(source, reader)
|
12
12
|
handler = new(reader)
|
13
13
|
REXML::Document.parse_stream(source, handler)
|
14
|
+
rescue REXML::ParseException => message
|
15
|
+
raise Saxerator::ParseException, message
|
14
16
|
end
|
15
17
|
|
16
18
|
def initialize(reader)
|
@@ -36,7 +38,6 @@ module Saxerator
|
|
36
38
|
def strip_namespace(name)
|
37
39
|
name.split(':').last
|
38
40
|
end
|
39
|
-
|
40
41
|
end
|
41
42
|
end
|
42
43
|
end
|
@@ -3,10 +3,11 @@ require 'delegate'
|
|
3
3
|
module Saxerator
|
4
4
|
module Builder
|
5
5
|
class ArrayElement < DelegateClass(Array)
|
6
|
-
attr_accessor :name
|
6
|
+
attr_accessor :name, :attributes
|
7
7
|
|
8
|
-
def initialize(arr = [], name = nil)
|
8
|
+
def initialize(arr = [], name = nil, attributes = nil)
|
9
9
|
@name = name
|
10
|
+
@attributes = attributes
|
10
11
|
super(arr)
|
11
12
|
end
|
12
13
|
|
@@ -8,18 +8,12 @@ module Saxerator
|
|
8
8
|
@name = config.generate_key_for(name)
|
9
9
|
@attributes = normalize_attributes(attributes)
|
10
10
|
@children = []
|
11
|
-
@text = false
|
12
11
|
end
|
13
12
|
|
14
13
|
def add_node(node)
|
15
|
-
@text = true if node.is_a? String
|
16
14
|
@children << node
|
17
15
|
end
|
18
16
|
|
19
|
-
def to_empty_element
|
20
|
-
EmptyElement.new(@name, @attributes)
|
21
|
-
end
|
22
|
-
|
23
17
|
def to_s
|
24
18
|
StringElement.new(@children.join, @name, @attributes)
|
25
19
|
end
|
@@ -45,6 +39,17 @@ module Saxerator
|
|
45
39
|
hash
|
46
40
|
end
|
47
41
|
|
42
|
+
def to_array
|
43
|
+
arr = @children.map do |child|
|
44
|
+
if child.kind_of?(String)
|
45
|
+
StringElement.new(child)
|
46
|
+
else
|
47
|
+
child.block_variable
|
48
|
+
end
|
49
|
+
end
|
50
|
+
ArrayElement.new(arr, @name, @attributes)
|
51
|
+
end
|
52
|
+
|
48
53
|
def add_to_hash_element(hash, name, element)
|
49
54
|
name = generate_key(name)
|
50
55
|
if hash.key? name
|
@@ -58,11 +63,10 @@ module Saxerator
|
|
58
63
|
end
|
59
64
|
|
60
65
|
def block_variable
|
61
|
-
return
|
62
|
-
if @children.
|
63
|
-
|
64
|
-
|
65
|
-
to_empty_element
|
66
|
+
return to_hash unless @children.any? { |c| c.kind_of?(String) }
|
67
|
+
return to_s if @children.all? { |c| c.kind_of?(String) }
|
68
|
+
|
69
|
+
to_array
|
66
70
|
end
|
67
71
|
|
68
72
|
def normalize_attributes(attributes)
|
@@ -3,7 +3,7 @@ module Saxerator
|
|
3
3
|
attr_writer :hash_key_generator
|
4
4
|
attr_reader :output_type
|
5
5
|
|
6
|
-
ADAPTER_TYPES = [
|
6
|
+
ADAPTER_TYPES = %i[ox nokogiri rexml oga].freeze
|
7
7
|
|
8
8
|
def initialize
|
9
9
|
@adapter = :rexml
|
@@ -39,7 +39,7 @@ module Saxerator
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def hash_key_normalizer
|
42
|
-
@hash_key_normalizer ||= ->
|
42
|
+
@hash_key_normalizer ||= ->(x) { x.to_s }
|
43
43
|
end
|
44
44
|
|
45
45
|
def hash_key_generator
|
@@ -47,15 +47,15 @@ module Saxerator
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def symbolize_keys!
|
50
|
-
@hash_key_generator = ->
|
50
|
+
@hash_key_generator = ->(x) { hash_key_normalizer.call(x).to_sym }
|
51
51
|
end
|
52
52
|
|
53
53
|
def strip_namespaces!(*namespaces)
|
54
54
|
if namespaces.any?
|
55
55
|
matching_group = namespaces.join('|')
|
56
|
-
@hash_key_normalizer = ->
|
56
|
+
@hash_key_normalizer = ->(x) { x.to_s.gsub(/(#{matching_group}):/, '') }
|
57
57
|
else
|
58
|
-
@hash_key_normalizer = ->
|
58
|
+
@hash_key_normalizer = ->(x) { x.to_s.gsub(/\w+:/, '') }
|
59
59
|
end
|
60
60
|
end
|
61
61
|
|
data/lib/saxerator/version.rb
CHANGED
data/saxerator.gemspec
CHANGED
@@ -26,7 +26,7 @@ Gem::Specification.new do |s|
|
|
26
26
|
'Gemfile',
|
27
27
|
'Rakefile',
|
28
28
|
'.gitignore',
|
29
|
-
'.
|
29
|
+
'.circleci/config.yml'
|
30
30
|
] +
|
31
31
|
Dir.glob('lib/**/*.rb') +
|
32
32
|
Dir.glob('spec/**/*.*') +
|
@@ -36,6 +36,7 @@ Gem::Specification.new do |s|
|
|
36
36
|
s.require_paths = ['lib']
|
37
37
|
|
38
38
|
s.add_development_dependency 'nokogiri', '>= 1.4.0'
|
39
|
+
s.add_development_dependency 'oga'
|
39
40
|
s.add_development_dependency 'ox'
|
40
41
|
s.add_development_dependency 'rake'
|
41
42
|
s.add_development_dependency 'rspec', '~> 3.1'
|
@@ -0,0 +1,2 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<description>This is a description.<p id='1'>A paragraph within the description.<fig id='1'> A figure within the paragraph.</fig></p><p id='2'>Another paragraph.</p><p id='3'><fig id='2'> A figure within the paragraph.</fig></p></description>
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
|
2
3
|
require 'spec_helper'
|
3
4
|
|
4
5
|
describe 'Saxerator (default) hash format' do
|
@@ -71,29 +72,33 @@ describe 'Saxerator (default) hash format' do
|
|
71
72
|
context 'parsing an empty element' do
|
72
73
|
subject(:element) { entry['media:thumbnail'] }
|
73
74
|
|
74
|
-
it 'behaves somewhat like nil' do
|
75
|
-
expect(element).to be_nil
|
76
|
-
expect(!element).to be true
|
77
|
-
expect(element.to_s).to eq('')
|
78
|
-
expect(element.to_h).to eq({})
|
79
|
-
end
|
80
|
-
|
81
75
|
it { is_expected.to be_empty }
|
82
76
|
|
83
77
|
it 'has attributes' do
|
84
78
|
expect(element.attributes.keys).to eq ['url']
|
85
79
|
end
|
86
80
|
|
87
|
-
|
88
|
-
|
89
|
-
expect(element.send(conversion).name).to eq 'media:thumbnail'
|
90
|
-
end
|
81
|
+
it 'has a name' do
|
82
|
+
expect(element.name).to eq 'media:thumbnail'
|
91
83
|
end
|
84
|
+
end
|
92
85
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
86
|
+
describe 'Saxerator elements with both text and element children format' do
|
87
|
+
let(:xml) { fixture_file('mixed_text_with_elements.xml') }
|
88
|
+
subject(:description) { Saxerator.parser(xml).for_tag(:description).first }
|
89
|
+
|
90
|
+
it "emits an array of child elements in the order they appear in the document", :aggregate_failures do
|
91
|
+
expect(description.map(&:class))
|
92
|
+
.to eq([
|
93
|
+
Saxerator::Builder::StringElement,
|
94
|
+
Saxerator::Builder::ArrayElement,
|
95
|
+
Saxerator::Builder::StringElement,
|
96
|
+
Saxerator::Builder::HashElement
|
97
|
+
])
|
98
|
+
# verifying the nodes are what we expect them to be
|
99
|
+
expect(description.last.name).to eq 'p'
|
100
|
+
expect(description.last.attributes).to include('id' => '3')
|
101
|
+
expect(subject.first).to eq "This is a description."
|
97
102
|
end
|
98
103
|
end
|
99
104
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
|
2
3
|
require 'spec_helper'
|
3
4
|
|
4
5
|
describe 'Saxerator xml format' do
|
@@ -12,7 +13,7 @@ describe 'Saxerator xml format' do
|
|
12
13
|
|
13
14
|
it { is_expected.to be_a(REXML::Document) }
|
14
15
|
it 'looks like the original document' do
|
15
|
-
expected_xml = '<?xml version=\'1.0\' encoding=\'UTF-8\'?><entry><id>1</id><published>2012-01-01T16:17:00-06:00</published><updated>2012-01-01T16:17:00-06:00</updated><link href="https://example.com/blog/how-to-eat-an-airplane"/><title>How to eat an airplane</title><content type="html"><p>Airplanes are very large — this can present difficulty in digestion.</p></content><media:thumbnail url="http://www.gravatar.com/avatar/a9eb6ba22e482b71b266daadf9c9a080?s=80"/><author><name>Soul<utter</name></author><contributor type="primary"><name>Jane Doe</name></contributor><contributor><name>Leviticus Alabaster</name></contributor></entry>'
|
16
|
+
expected_xml = '<?xml version=\'1.0\' encoding=\'UTF-8\'?><entry><id>1</id><published>2012-01-01T16:17:00-06:00</published><updated>2012-01-01T16:17:00-06:00</updated><link href="https://example.com/blog/how-to-eat-an-airplane"/><title>How to eat an airplane</title><content type="html"><p>Airplanes are very large — this can present difficulty in digestion.</p></content><media:thumbnail url="http://www.gravatar.com/avatar/a9eb6ba22e482b71b266daadf9c9a080?s=80"/><author><name>Soul<utter</name></author><contributor type="primary"><name>Jane Doe</name></contributor><contributor><name>Leviticus Alabaster</name></contributor></entry>' # rubocop:disable Metrics/LineLength
|
16
17
|
expect(entry.to_s).to eq(expected_xml)
|
17
18
|
end
|
18
19
|
end
|
@@ -14,7 +14,7 @@ describe 'Saxerator::DSL#for_tags' do
|
|
14
14
|
end
|
15
15
|
|
16
16
|
it 'only selects the specified tags' do
|
17
|
-
expect(parser.for_tags(%w
|
17
|
+
expect(parser.for_tags(%w[blurb1 blurb3]).inject([], :<<)).to eq(['one', 'three'])
|
18
18
|
end
|
19
19
|
|
20
20
|
it 'raises an ArgumentError for a non-Array argument' do
|
@@ -23,7 +23,7 @@ describe 'Saxerator::DSL#with_attributes' do
|
|
23
23
|
end
|
24
24
|
|
25
25
|
it 'matches tags which have the specified attributes' do
|
26
|
-
expect(parser.with_attributes(%w
|
26
|
+
expect(parser.with_attributes(%w[type ridiculous]).inject([], :<<))
|
27
27
|
.to eq(['Leviticus Alabaster', 'Eunice Diesel'])
|
28
28
|
end
|
29
29
|
|
data/spec/lib/saxerator_spec.rb
CHANGED
@@ -10,7 +10,7 @@ RSpec.describe Saxerator do
|
|
10
10
|
let(:xml) { fixture_file('flat_blurbs.xml') }
|
11
11
|
|
12
12
|
it 'can parse it' do
|
13
|
-
expect(parser.all).to eq('blurb' => %w
|
13
|
+
expect(parser.all).to eq('blurb' => %w[one two three])
|
14
14
|
end
|
15
15
|
|
16
16
|
it 'allows multiple operations on the same parser' do
|
@@ -19,6 +19,12 @@ RSpec.describe Saxerator do
|
|
19
19
|
expect(parser.for_tag(:blurb).first).to eq('one')
|
20
20
|
expect(parser.for_tag(:blurb).first).to eq('one')
|
21
21
|
end
|
22
|
+
|
23
|
+
it 'call each without block returns enumerator' do
|
24
|
+
enumerator = parser.for_tag(:blurb).each
|
25
|
+
expect(enumerator).to be_an(Enumerator)
|
26
|
+
expect(enumerator.to_a).to eq(%w(one two three))
|
27
|
+
end
|
22
28
|
end
|
23
29
|
|
24
30
|
context 'with a String argument' do
|
@@ -34,6 +40,41 @@ RSpec.describe Saxerator do
|
|
34
40
|
it 'can parse it' do
|
35
41
|
expect(parser.all).to eq('name' => 'Illiterates that can read', 'author' => 'Eunice Diesel')
|
36
42
|
end
|
43
|
+
|
44
|
+
it 'call each without block returns enumerator' do
|
45
|
+
enumerator = parser.for_tag(:name).each
|
46
|
+
expect(enumerator).to be_an(Enumerator)
|
47
|
+
expect(enumerator.to_a).to eq(['Illiterates that can read'])
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
context 'raise exception when ' do
|
52
|
+
let(:broken_xml_1) do
|
53
|
+
<<-eos
|
54
|
+
<book>
|
55
|
+
<name>Illiterates that can read</name>
|
56
|
+
<author>Eunice Diesel</author>
|
57
|
+
eos
|
58
|
+
end
|
59
|
+
|
60
|
+
let(:broken_xml_2) do
|
61
|
+
<<-eos
|
62
|
+
<book>
|
63
|
+
<name>Illiterates that can read
|
64
|
+
<author>Eunice Diesel</author>
|
65
|
+
</book>
|
66
|
+
eos
|
67
|
+
end
|
68
|
+
|
69
|
+
unless ENV['SAXERATOR_ADAPTER'] == "rexml"
|
70
|
+
it 'ending node not found' do
|
71
|
+
expect { Saxerator.parser(broken_xml_1).all }.to raise_error(Saxerator::ParseException)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'node in the middle not closed' do
|
76
|
+
expect { Saxerator.parser(broken_xml_2).all }.to raise_error(Saxerator::ParseException)
|
77
|
+
end
|
37
78
|
end
|
38
79
|
end
|
39
80
|
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bradley Schaefer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-03-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 1.4.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: oga
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: ox
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -75,8 +89,8 @@ executables: []
|
|
75
89
|
extensions: []
|
76
90
|
extra_rdoc_files: []
|
77
91
|
files:
|
92
|
+
- ".circleci/config.yml"
|
78
93
|
- ".gitignore"
|
79
|
-
- ".travis.yml"
|
80
94
|
- Gemfile
|
81
95
|
- LICENSE
|
82
96
|
- README.md
|
@@ -85,11 +99,11 @@ files:
|
|
85
99
|
- benchmark/generate_sample_file.rb
|
86
100
|
- lib/saxerator.rb
|
87
101
|
- lib/saxerator/adapters/nokogiri.rb
|
102
|
+
- lib/saxerator/adapters/oga.rb
|
88
103
|
- lib/saxerator/adapters/ox.rb
|
89
104
|
- lib/saxerator/adapters/rexml.rb
|
90
105
|
- lib/saxerator/builder.rb
|
91
106
|
- lib/saxerator/builder/array_element.rb
|
92
|
-
- lib/saxerator/builder/empty_element.rb
|
93
107
|
- lib/saxerator/builder/hash_builder.rb
|
94
108
|
- lib/saxerator/builder/hash_element.rb
|
95
109
|
- lib/saxerator/builder/string_element.rb
|
@@ -111,6 +125,7 @@ files:
|
|
111
125
|
- saxerator.gemspec
|
112
126
|
- spec/examples.txt
|
113
127
|
- spec/fixtures/flat_blurbs.xml
|
128
|
+
- spec/fixtures/mixed_text_with_elements.xml
|
114
129
|
- spec/fixtures/nested_elements.xml
|
115
130
|
- spec/lib/builder/hash_builder_spec.rb
|
116
131
|
- spec/lib/builder/xml_builder_spec.rb
|
@@ -144,25 +159,25 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
144
159
|
- !ruby/object:Gem::Version
|
145
160
|
version: '0'
|
146
161
|
requirements: []
|
147
|
-
|
148
|
-
rubygems_version: 2.6.8
|
162
|
+
rubygems_version: 3.0.6
|
149
163
|
signing_key:
|
150
164
|
specification_version: 4
|
151
165
|
summary: A SAX-based XML-to-hash parser for parsing large files into manageable chunks
|
152
166
|
test_files:
|
167
|
+
- spec/spec_helper.rb
|
153
168
|
- spec/examples.txt
|
154
|
-
- spec/
|
155
|
-
- spec/
|
156
|
-
- spec/lib/builder/hash_builder_spec.rb
|
169
|
+
- spec/support/fixture_file.rb
|
170
|
+
- spec/lib/saxerator_spec.rb
|
157
171
|
- spec/lib/builder/xml_builder_spec.rb
|
172
|
+
- spec/lib/builder/hash_builder_spec.rb
|
173
|
+
- spec/lib/dsl/for_tag_spec.rb
|
174
|
+
- spec/lib/dsl/for_tags_spec.rb
|
175
|
+
- spec/lib/dsl/within_spec.rb
|
158
176
|
- spec/lib/dsl/all_spec.rb
|
159
177
|
- spec/lib/dsl/at_depth_spec.rb
|
160
178
|
- spec/lib/dsl/child_of_spec.rb
|
161
|
-
- spec/lib/dsl/for_tag_spec.rb
|
162
|
-
- spec/lib/dsl/for_tags_spec.rb
|
163
179
|
- spec/lib/dsl/with_attribute_spec.rb
|
164
180
|
- spec/lib/dsl/with_attributes_spec.rb
|
165
|
-
- spec/
|
166
|
-
- spec/
|
167
|
-
- spec/
|
168
|
-
- spec/support/fixture_file.rb
|
181
|
+
- spec/fixtures/mixed_text_with_elements.xml
|
182
|
+
- spec/fixtures/flat_blurbs.xml
|
183
|
+
- spec/fixtures/nested_elements.xml
|
data/.travis.yml
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
require 'saxerator/builder/hash_element'
|
2
|
-
|
3
|
-
module Saxerator
|
4
|
-
module Builder
|
5
|
-
class EmptyElement < HashElement
|
6
|
-
def nil?; true end
|
7
|
-
|
8
|
-
def !; true end
|
9
|
-
|
10
|
-
def to_s
|
11
|
-
StringElement.new('', name, attributes)
|
12
|
-
end
|
13
|
-
|
14
|
-
def to_h
|
15
|
-
HashElement.new(name, attributes)
|
16
|
-
end
|
17
|
-
|
18
|
-
def to_a
|
19
|
-
ArrayElement.new([], name)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|