saxerator 0.9.5 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.travis.yml +9 -12
  4. data/Gemfile +4 -2
  5. data/README.md +23 -3
  6. data/Rakefile +12 -1
  7. data/benchmark/benchmark.rb +54 -12
  8. data/benchmark/generate_sample_file.rb +1 -1
  9. data/lib/saxerator.rb +2 -3
  10. data/lib/saxerator/adapters/nokogiri.rb +39 -0
  11. data/lib/saxerator/adapters/ox.rb +69 -0
  12. data/lib/saxerator/adapters/rexml.rb +42 -0
  13. data/lib/saxerator/builder.rb +2 -2
  14. data/lib/saxerator/builder/array_element.rb +8 -2
  15. data/lib/saxerator/builder/empty_element.rb +2 -3
  16. data/lib/saxerator/builder/hash_builder.rb +8 -7
  17. data/lib/saxerator/builder/hash_element.rb +7 -8
  18. data/lib/saxerator/builder/string_element.rb +5 -7
  19. data/lib/saxerator/builder/xml_builder.rb +11 -7
  20. data/lib/saxerator/configuration.rb +27 -7
  21. data/lib/saxerator/document_fragment.rb +3 -5
  22. data/lib/saxerator/dsl.rb +6 -5
  23. data/lib/saxerator/full_document.rb +1 -1
  24. data/lib/saxerator/latches/abstract_latch.rb +1 -1
  25. data/lib/saxerator/latches/at_depth.rb +2 -2
  26. data/lib/saxerator/latches/child_of.rb +9 -14
  27. data/lib/saxerator/latches/for_tags.rb +2 -2
  28. data/lib/saxerator/latches/with_attributes.rb +2 -2
  29. data/lib/saxerator/latches/within.rb +6 -10
  30. data/lib/saxerator/parser/accumulator.rb +6 -9
  31. data/lib/saxerator/parser/latched_accumulator.rb +4 -49
  32. data/lib/saxerator/sax_handler.rb +9 -0
  33. data/lib/saxerator/version.rb +1 -1
  34. data/saxerator.gemspec +5 -2
  35. data/spec/lib/builder/hash_builder_spec.rb +25 -19
  36. data/spec/lib/builder/xml_builder_spec.rb +7 -26
  37. data/spec/lib/dsl/all_spec.rb +11 -6
  38. data/spec/lib/dsl/at_depth_spec.rb +10 -8
  39. data/spec/lib/dsl/child_of_spec.rb +6 -6
  40. data/spec/lib/dsl/for_tag_spec.rb +3 -3
  41. data/spec/lib/dsl/for_tags_spec.rb +5 -5
  42. data/spec/lib/dsl/with_attribute_spec.rb +4 -4
  43. data/spec/lib/dsl/with_attributes_spec.rb +8 -7
  44. data/spec/lib/dsl/within_spec.rb +6 -5
  45. data/spec/lib/saxerator_spec.rb +70 -58
  46. data/spec/spec_helper.rb +24 -3
  47. data/spec/support/fixture_file.rb +1 -1
  48. metadata +39 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8a9e778a67df70446a761b0c44b299ec71d17767
4
- data.tar.gz: 4dd1010e6d76ec961dfaaac08b03e5c4e9179658
3
+ metadata.gz: e54e504ee27201c8e10d314e067250c1dd779514
4
+ data.tar.gz: 3a7338b78ab7e05f316c365903c3ab625e81d62d
5
5
  SHA512:
6
- metadata.gz: f575598f8e4eb2a50a828457107084615cecb12dfc4c1c94bb0b673cf7a6cc0c75771400c38d8b9eb403bc95da1be5f9bfcd7d1ff5be85f171090ef7961a6c30
7
- data.tar.gz: b33b5a6cd650a36d77ddf69ce33380aad95cc88d9c25d9000a86d1bd88377e4d07f0c7451a592b3b3c8d377c88de2ece5a72dda3c5be4dfdecceb487ee4768b0
6
+ metadata.gz: 350eb22db29091954e33bdf251a4e5e8133c3397221f7318d66ab878366862c614096d8b55a63de7d8300070b4ef8c434e0584769e5bbd6da7bb062218a2224e
7
+ data.tar.gz: 46145d2ff6a104304e0abdf330ecca528d0e1b14bbe5ce789f1016aeb7fe51f77888cce1c101233e75ce37bc3b3af67cfd5469682697a4ec37c3e00d8a446e7e
data/.gitignore CHANGED
@@ -8,3 +8,5 @@ coverage/
8
8
  .rvmrc
9
9
  .ruby-version
10
10
  .ruby-gemset
11
+
12
+ /spec/examples.txt
@@ -1,14 +1,11 @@
1
- script: "rspec spec"
1
+ before_install:
2
+ - gem install bundler -v 1.12.5
3
+ script: "bundle exec rake spec:adapters"
2
4
  language: ruby
3
- bundler_args: --without coverage
5
+ bundler_args: --without coverage --binstubs
6
+ sudo: false
7
+ cache: bundler
4
8
  rvm:
5
- - 2.1.0
6
- - 2.0.0
7
- - 1.9.2
8
- - 1.9.3
9
- - jruby-19mode
10
- - rbx-2.2.1
11
- matrix:
12
- allow_failures:
13
- - rvm: jruby-19mode
14
- - rvm: rbx-2.2.1
9
+ - 2.3.1
10
+ - 2.2.2
11
+ - 2.1.6
data/Gemfile CHANGED
@@ -1,8 +1,10 @@
1
- source "http://rubygems.org"
1
+ source 'http://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in saxerator.gemspec
4
4
  gemspec
5
5
 
6
6
  group :coverage do
7
7
  gem 'simplecov'
8
- end
8
+ end
9
+
10
+ gem 'pry', platforms: [:ruby_21, :ruby_22, :ruby_23]
data/README.md CHANGED
@@ -13,6 +13,15 @@ You can parse any valid xml in 3 simple steps.
13
13
  1. Perform your work in an `each` block, or using any [Enumerable](http://apidock.com/ruby/Enumerable)
14
14
  method
15
15
 
16
+ Installation
17
+ ------------
18
+ 1. `gem install saxerator`
19
+ 1. Choose an xml parser
20
+ * (default) Use ruby's built-in REXML parser - no other dependencies necessary
21
+ * `gem install nokogiri`
22
+ * `gem install ox`
23
+ 1. If not using the default, specify your adapter in the [Saxerator configuration](#configuration)
24
+
16
25
  The DSL
17
26
  -------
18
27
  The DSL consists of predicates that may be combined to describe which elements the parser should enumerate over.
@@ -90,7 +99,8 @@ end
90
99
 
91
100
  | Setting | Default | Values | Description
92
101
  |:------------------|:--------|-----------------|------------
93
- | `output_type` | `:hash` | `:hash`, `:xml` | The type of object generated by Saxerator's parsing. `:hash` should be self-explanatory, `:xml` generates a `Nokogiri::XML::Document`
102
+ | `adapter` | `:nokogiri` | `:nokogiri`, `:ox`, `:rexml` | The XML parser used by Saxerator |
103
+ | `output_type` | `:hash` | `:hash`, `:xml` | The type of object generated by Saxerator's parsing. `:hash` generates a Ruby Hash, `:xml` generates a `REXML::Document`
94
104
  | `symbolize_keys!` | n/a | n/a | Call this method if you want the hash keys to be symbols rather than strings
95
105
  | `ignore_namespaces!`| n/a | n/a | Call this method if you want to treat the XML document as if it has no namespace information. It differs slightly from `strip_namespaces!` since it deals with how the XML is processed rather than how it is output
96
106
  | `strip_namespaces!`| n/a | user-specified | Called with no arguments this strips all namespaces, or you may specify an arbitrary number of namespaces to strip, i.e. `config.strip_namespaces! :rss, :soapenv`
@@ -101,6 +111,12 @@ Known Issues
101
111
  * JRuby closes the file stream at the end of parsing, therefor to perform multiple operations
102
112
  which parse a file you will need to instantiate a new parser with a new File object.
103
113
 
114
+ Other Documentation
115
+ -------------------
116
+ * [REXML](http://www.germane-software.com/software/rexml/) ([api docs](http://ruby-doc.org/stdlib-2.4.0/libdoc/rexml/rdoc/REXML/Document.html))
117
+ * [Nokogiri](http://www.nokogiri.org/) ([api docs](http://www.rubydoc.info/github/sparklemotion/nokogiri))
118
+ * [Ox](https://github.com/ohler55/ox) ([api docs](http://www.ohler.com/ox/))
119
+
104
120
  FAQ
105
121
  ---
106
122
  Why the name 'Saxerator'?
@@ -129,9 +145,13 @@ When I fetch a tag that has one or more elements, sometimes I get an `Array`, an
129
145
  > occasionally comes up is for elements that are sometimes-empty. Empty elements behave mostly like an
130
146
  > empty `Hash`, however you may convert it to a more `String`-like object via `#to_s`
131
147
 
148
+ ### Contribution ###
149
+
150
+ For running tests for all parsers run `rake spec:adapters`
151
+
132
152
  ### Acknowledgements ###
133
- Saxerator was inspired by - but not affiliated with - [nori](https://github.com/savonrb/nori) and [Gregory Brown](http://majesticseacreature.com/)'s
153
+ Saxerator was inspired by - but not affiliated with - [nori](https://github.com/savonrb/nori) and Gregory Brown's
134
154
  [Practicing Ruby](http://practicingruby.com/)
135
155
 
136
156
  #### Legal Stuff ####
137
- Copyright © Bradley Schaefer. MIT License (see LICENSE file).
157
+ Copyright © 2012-2017 Bradley Schaefer. MIT License (see LICENSE file).
data/Rakefile CHANGED
@@ -4,4 +4,15 @@ require 'rspec/core/rake_task'
4
4
 
5
5
  RSpec::Core::RakeTask.new(:spec)
6
6
 
7
- task :default => :spec
7
+ task default: :spec
8
+
9
+ namespace :spec do
10
+ desc 'Run specs against all available adapters'
11
+ task :adapters do |_|
12
+ %w(nokogiri ox rexml).each do |adapter|
13
+ ENV['SAXERATOR_ADAPTER'] = adapter
14
+ Rake::Task['spec'].invoke
15
+ ::Rake.application['spec'].reenable
16
+ end
17
+ end
18
+ end
@@ -1,23 +1,65 @@
1
- $:.push File.expand_path('../../lib', __FILE__)
1
+ $LOAD_PATH.push File.expand_path('../../lib', __FILE__)
2
2
  require 'saxerator'
3
3
  require 'benchmark'
4
4
 
5
5
  file = ARGV.shift
6
- if !File.exists?(file)
6
+ unless File.exist?(file)
7
7
  puts "Cannot find file #{file}"
8
8
  exit 1
9
9
  end
10
10
  file = File.new(file)
11
11
 
12
- count = count2 = count3 = count4 = 0
13
- Benchmark.bm do |x|
14
- x.report('for_tag') { Saxerator.parser(file).for_tag(:artist).each { count = count + 1 } }
15
- x.report('at_depth') { Saxerator.parser(file).at_depth(2).each { count2 = count2 + 1 } }
16
- x.report('within') { Saxerator.parser(file).within(:artists).each { count3 = count3 + 1 } }
17
- x.report('composite') { Saxerator.parser(file).for_tag(:name).within(:artist).at_depth(3).each { count4 = count4 + 1} }
12
+ ADAPTERS = [:nokogiri, :ox].freeze
13
+
14
+ class SaxeratorBenchmark
15
+ def initialize(file)
16
+ @file = file
17
+ end
18
+
19
+ def with_adapter(adapter) # rubocop:disable Metrics/MethodLength
20
+ puts '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
21
+ puts
22
+ puts "Benchmark with :#{adapter} parser"
23
+ puts
24
+ puts '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
25
+ puts
26
+
27
+ count = count2 = count3 = count4 = 0
28
+
29
+ Benchmark.bm do |x|
30
+ x.report('for_tag') do
31
+ Saxerator.parser(@file) { |confing| confing.adapter = adapter }
32
+ .for_tag(:artist).each { count += 1 }
33
+ end
34
+
35
+ x.report('at_depth') do
36
+ Saxerator.parser(@file) { |confing| confing.adapter = adapter }
37
+ .at_depth(2).each { count2 += 1 }
38
+ end
39
+
40
+ x.report('within') do
41
+ Saxerator.parser(@file) { |confing| confing.adapter = adapter }
42
+ .within(:artists).each { count3 += 1 }
43
+ end
44
+
45
+ x.report('composite') do
46
+ Saxerator.parser(@file) { |confing| confing.adapter = adapter }
47
+ .for_tag(:name)
48
+ .within(:artist).at_depth(3).each { count4 += 1 }
49
+ end
50
+ end
51
+
52
+ puts
53
+ puts '##########################################################'
54
+ puts
55
+ puts "for_tag: #{count} artist elements parsed"
56
+ puts "at_depth: #{count2} elements parsed"
57
+ puts "within: #{count3} artists children parsed"
58
+ puts "composite: #{count4} names within artist nested 3 tags deep parsed"
59
+ puts
60
+ end
18
61
  end
19
62
 
20
- puts "for_tag: #{count} artist elements parsed"
21
- puts "at_depth: #{count2} elements parsed"
22
- puts "within: #{count3} artists children parsed"
23
- puts "composite: #{count4} names within artist nested 3 tags deep parsed"
63
+ saxerator_benchmark = SaxeratorBenchmark.new(file)
64
+
65
+ ADAPTERS.each { |adapter| saxerator_benchmark.with_adapter(adapter) }
@@ -28,4 +28,4 @@ File.open(filename, 'w') do |f|
28
28
  end
29
29
  f.puts '</artists>'
30
30
  end
31
- puts "DONE!"
31
+ puts 'DONE!'
@@ -1,7 +1,6 @@
1
1
  require 'saxerator/version'
2
2
 
3
- require 'nokogiri'
4
-
3
+ require 'saxerator/sax_handler'
5
4
  require 'saxerator/dsl'
6
5
  require 'saxerator/full_document'
7
6
  require 'saxerator/document_fragment'
@@ -31,6 +30,6 @@ module Saxerator
31
30
  config = Configuration.new
32
31
  yield(config) if block_given?
33
32
 
34
- Saxerator::FullDocument.new(xml, config)
33
+ FullDocument.new(xml, config)
35
34
  end
36
35
  end
@@ -0,0 +1,39 @@
1
+ require 'forwardable'
2
+ require 'nokogiri'
3
+
4
+ module Saxerator
5
+ module Adapters
6
+ class Nokogiri < ::Nokogiri::XML::SAX::Document
7
+ extend Forwardable
8
+
9
+ def self.parse(source, reader)
10
+ parser = ::Nokogiri::XML::SAX::Parser.new(new(reader))
11
+ parser.parse(source)
12
+ end
13
+
14
+ def initialize(reader)
15
+ @reader = reader
16
+ @ignore_namespaces = reader.ignore_namespaces?
17
+ end
18
+
19
+ def_delegators :@reader, :start_element, :end_element, :characters
20
+ def_delegator :@reader, :characters, :cdata_block
21
+
22
+ def start_element_namespace(name, attrs = [], _prefix = nil, _uri = nil, _ns = [])
23
+ return super unless @ignore_namespaces
24
+ start_element(name, strip_namespace(attrs))
25
+ end
26
+
27
+ def end_element_namespace(name, _prefix = nil, _uri = nil)
28
+ return super unless @ignore_namespaces
29
+ end_element(name)
30
+ end
31
+
32
+ private
33
+
34
+ def strip_namespace(attrs)
35
+ attrs.map { |attr| [attr.localname, attr.value] }
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,69 @@
1
+ require 'forwardable'
2
+ require 'ox'
3
+
4
+ module Saxerator
5
+ module Adapters
6
+ class Ox # < ::Ox::Sax
7
+ extend Forwardable
8
+
9
+ def self.parse(source, reader)
10
+ handler = new(reader)
11
+ ::Ox.sax_parse(handler, source)
12
+ end
13
+
14
+ attr_accessor :name
15
+ attr_reader :attributes
16
+ attr_reader :reader
17
+
18
+ def initialize(reader)
19
+ @reader = reader
20
+
21
+ @attributes = {}
22
+ @name = ''
23
+ end
24
+
25
+ def guard!
26
+ reader.start_element(name, attributes.to_a) unless name.empty?
27
+ reset!
28
+ end
29
+
30
+ def attr(name, value)
31
+ attributes[name.to_s] = value
32
+ end
33
+
34
+ def start_element(name)
35
+ guard!
36
+
37
+ name = name.to_s
38
+ name = strip_namespace(name) if reader.ignore_namespaces?
39
+ self.name = name
40
+ end
41
+
42
+ def end_element(name)
43
+ guard!
44
+
45
+ name = name.to_s
46
+ name = strip_namespace(name) if reader.ignore_namespaces?
47
+ reader.end_element(name)
48
+ end
49
+
50
+ def text(str)
51
+ guard!
52
+ reader.characters(str)
53
+ end
54
+
55
+ alias cdata text
56
+
57
+ private
58
+
59
+ def reset!
60
+ @attributes.clear
61
+ @name = ''
62
+ end
63
+
64
+ def strip_namespace(name)
65
+ name.split(':').last
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,42 @@
1
+ require 'forwardable'
2
+ require 'rexml/document'
3
+ require 'rexml/streamlistener'
4
+
5
+ module Saxerator
6
+ module Adapters
7
+ class Rexml
8
+ extend Forwardable
9
+ include REXML::StreamListener
10
+
11
+ def self.parse(source, reader)
12
+ handler = new(reader)
13
+ REXML::Document.parse_stream(source, handler)
14
+ end
15
+
16
+ def initialize(reader)
17
+ @reader = reader
18
+ @ignore_namespaces = reader.ignore_namespaces?
19
+ end
20
+
21
+ def_delegator :@reader, :characters, :text
22
+ def_delegator :@reader, :characters, :cdata
23
+
24
+ def tag_start(name, attrs)
25
+ name = strip_namespace(name) if @ignore_namespaces
26
+ @reader.start_element(name, attrs)
27
+ end
28
+
29
+ def tag_end(name)
30
+ name = strip_namespace(name) if @ignore_namespaces
31
+ @reader.end_element(name)
32
+ end
33
+
34
+ private
35
+
36
+ def strip_namespace(name)
37
+ name.split(':').last
38
+ end
39
+
40
+ end
41
+ end
42
+ end
@@ -13,7 +13,7 @@ module Saxerator
13
13
  def camel_case(str)
14
14
  str = str.to_s
15
15
  return str if str !~ /_/ && str =~ /[A-Z]+.*/
16
- str.split('_').map{|e| e.capitalize}.join
16
+ str.split('_').map(&:capitalize).join
17
17
  end
18
18
  end
19
- end
19
+ end
@@ -1,9 +1,15 @@
1
+ require 'delegate'
2
+
1
3
  module Saxerator
2
4
  module Builder
3
- class ArrayElement < Array
5
+ class ArrayElement < DelegateClass(Array)
4
6
  attr_accessor :name
5
7
 
6
- def to_ary; self end
8
+ def initialize(arr = [], name = nil)
9
+ @name = name
10
+ super(arr)
11
+ end
12
+
7
13
  def to_a; self end
8
14
  end
9
15
  end
@@ -4,6 +4,7 @@ module Saxerator
4
4
  module Builder
5
5
  class EmptyElement < HashElement
6
6
  def nil?; true end
7
+
7
8
  def !; true end
8
9
 
9
10
  def to_s
@@ -15,9 +16,7 @@ module Saxerator
15
16
  end
16
17
 
17
18
  def to_a
18
- array = ArrayElement.new
19
- array.name = name
20
- array
19
+ ArrayElement.new([], name)
21
20
  end
22
21
  end
23
22
  end
@@ -45,12 +45,11 @@ module Saxerator
45
45
  hash
46
46
  end
47
47
 
48
- def add_to_hash_element( hash, name, element)
48
+ def add_to_hash_element(hash, name, element)
49
49
  name = generate_key(name)
50
- if hash[name]
51
- unless hash[name].is_a?(Array)
52
- hash[name] = ArrayElement[hash[name]]
53
- hash[name].name = name
50
+ if hash.key? name
51
+ unless hash[name].is_a?(ArrayElement)
52
+ hash[name] = ArrayElement.new([hash[name]], name)
54
53
  end
55
54
  hash[name] << element
56
55
  else
@@ -60,12 +59,14 @@ module Saxerator
60
59
 
61
60
  def block_variable
62
61
  return to_s if @text
63
- return to_hash if @children.count > 0 || (@attributes.count > 0 && @config.put_attributes_in_hash?)
62
+ if @children.count > 0 || (@attributes.count > 0 && @config.put_attributes_in_hash?)
63
+ return to_hash
64
+ end
64
65
  to_empty_element
65
66
  end
66
67
 
67
68
  def normalize_attributes(attributes)
68
- Hash[attributes.map {|key, value| [generate_key(key), value] }]
69
+ Hash[attributes.map { |key, value| [generate_key(key), value] }]
69
70
  end
70
71
 
71
72
  def generate_key(name)