weak_xml 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9de2ed9ec121336d705ad7207756be1940c05bff
4
+ data.tar.gz: e29c2675bd840107c0517acacc0dd5cb67c0f40e
5
+ SHA512:
6
+ metadata.gz: 4b304cc23f025b3e8d74d87d87b66950f945cfd73e6dfe9c9bb29f5a784d68eb9e278d3d0fcb93915da84feec2054149ad5ba4a1a0db5a0fe07797f4e62006fa
7
+ data.tar.gz: 2657121dd8b75d5ea9a2e3d402aef5ef6adf5b333bdd0046217e59229059e2cf439bdd27f250274f1fe7bb4b53ad46a1ecd38898cbbbb48e852834719407af57
data/.codeclimate.yml ADDED
@@ -0,0 +1,2 @@
1
+ exclude_paths:
2
+ - "benchmark/*"
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format documentation
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 2.0.0
5
+ - 2.1.0
6
+ - 2.2.0
7
+ - jruby-1.7
8
+ - jruby-9.0.0.0.pre2
9
+ - rbx-2.5
data/Benchmark.results ADDED
@@ -0,0 +1,122 @@
1
+ processor : 0
2
+ vendor_id : GenuineIntel
3
+ cpu family : 6
4
+ model : 58
5
+ model name : Intel(R) Core(TM) i5-3470 CPU @ 3.20GHz
6
+ stepping : 9
7
+ microcode : 0x19
8
+ cpu MHz : 3193.115
9
+ cache size : 6144 KB
10
+ physical id : 0
11
+ siblings : 1
12
+ core id : 0
13
+ cpu cores : 1
14
+ apicid : 0
15
+ initial apicid : 0
16
+ fpu : yes
17
+ fpu_exception : yes
18
+ cpuid level : 5
19
+ wp : yes
20
+ flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 syscall nx rdtscp lm constant_tsc rep_good nopl pni monitor ssse3 lahf_lm
21
+ bogomips : 6386.23
22
+ clflush size : 64
23
+ cache_alignment : 64
24
+ address sizes : 36 bits physical, 48 bits virtual
25
+ power management:
26
+
27
+ Find 'z303-id' and get content
28
+
29
+ Calculating -------------------------------------
30
+ Nokogiri 361.000 i/100ms
31
+ Oga 10.000 i/100ms
32
+ Ox 198.000 i/100ms
33
+ WeakXml 4.202k i/100ms
34
+ WeakXml (+hint) 4.556k i/100ms
35
+ WeakXml (-ML) 4.007k i/100ms
36
+ WeakXml (+hint, -ML) 4.352k i/100ms
37
+ Plain regex 6.425k i/100ms
38
+ -------------------------------------------------
39
+ Nokogiri 4.211k (±22.0%) i/s - 7.942k
40
+ Oga 107.656 (± 0.9%) i/s - 220.000
41
+ Ox 2.006k (± 1.3%) i/s - 4.158k
42
+ WeakXml 60.069k (± 2.1%) i/s - 121.858k
43
+ WeakXml (+hint) 67.484k (± 1.5%) i/s - 136.680k
44
+ WeakXml (-ML) 57.477k (± 2.0%) i/s - 116.203k
45
+ WeakXml (+hint, -ML) 64.767k (± 1.9%) i/s - 130.560k
46
+ Plain regex 107.943k (± 2.0%) i/s - 218.450k
47
+
48
+ Comparison:
49
+ Plain regex: 107943.2 i/s
50
+ WeakXml (+hint): 67484.5 i/s - 1.60x slower
51
+ WeakXml (+hint, -ML): 64766.6 i/s - 1.67x slower
52
+ WeakXml: 60069.1 i/s - 1.80x slower
53
+ WeakXml (-ML): 57477.0 i/s - 1.88x slower
54
+ Nokogiri: 4210.8 i/s - 25.63x slower
55
+ Ox: 2006.0 i/s - 53.81x slower
56
+ Oga: 107.7 i/s - 1002.67x slower
57
+
58
+ Find 'fees' and get attr total_record_count
59
+
60
+ Calculating -------------------------------------
61
+ Nokogiri 741.000 i/100ms
62
+ Oga 10.000 i/100ms
63
+ Ox 197.000 i/100ms
64
+ WeakXml 1.385k i/100ms
65
+ WeakXml (+hint) 4.568k i/100ms
66
+ WeakXml (-ML) 4.014k i/100ms
67
+ WeakXml (+hint, -ML) 5.587k i/100ms
68
+ -------------------------------------------------
69
+ Nokogiri 8.632k (±17.4%) i/s - 17.043k
70
+ Oga 106.709 (± 3.7%) i/s - 220.000
71
+ Ox 2.025k (± 1.2%) i/s - 4.137k
72
+ WeakXml 15.940k (± 2.1%) i/s - 31.855k
73
+ WeakXml (+hint) 68.124k (± 1.2%) i/s - 137.040k
74
+ WeakXml (-ML) 57.698k (± 2.5%) i/s - 116.406k
75
+ WeakXml (+hint, -ML) 90.311k (± 1.6%) i/s - 184.371k
76
+
77
+ Comparison:
78
+ WeakXml (+hint, -ML): 90311.0 i/s
79
+ WeakXml (+hint): 68123.8 i/s - 1.33x slower
80
+ WeakXml (-ML): 57698.1 i/s - 1.57x slower
81
+ WeakXml: 15940.1 i/s - 5.67x slower
82
+ Nokogiri: 8632.0 i/s - 10.46x slower
83
+ Ox: 2024.9 i/s - 44.60x slower
84
+ Oga: 106.7 i/s - 846.33x slower
85
+
86
+
87
+ Find all 'fee' and extract attribute 'link'
88
+
89
+ Calculating -------------------------------------
90
+ Nokogiri 757.000 i/100ms
91
+ Oga 40.000 i/100ms
92
+ Ox 537.000 i/100ms
93
+ WeakXml 849.000 i/100ms
94
+ WeakXml (-ML) 4.179k i/100ms
95
+ -------------------------------------------------
96
+ Nokogiri 8.113k (±14.3%) i/s - 15.897k
97
+ Oga 402.017 (± 1.0%) i/s - 840.000
98
+ Ox 5.731k (± 1.1%) i/s - 11.814k
99
+ WeakXml 9.237k (± 2.1%) i/s - 18.678k
100
+ WeakXml (-ML) 58.899k (± 1.7%) i/s - 121.191k
101
+
102
+ Comparison:
103
+ WeakXml (-ML): 58899.0 i/s
104
+ WeakXml: 9236.8 i/s - 6.38x slower
105
+ Nokogiri: 8112.8 i/s - 7.26x slower
106
+ Ox: 5731.0 i/s - 10.28x slower
107
+ Oga: 402.0 i/s - 146.51x slower
108
+
109
+
110
+ Real life example with a hand full finds on a document
111
+
112
+ Calculating -------------------------------------
113
+ Nokogiri 217.000 i/100ms
114
+ WeakXml 767.000 i/100ms
115
+ -------------------------------------------------
116
+ Nokogiri 2.314k (± 5.3%) i/s - 4.774k
117
+ WeakXml 8.371k (± 0.6%) i/s - 16.874k
118
+
119
+ Comparison:
120
+ WeakXml: 8371.4 i/s
121
+ Nokogiri: 2313.9 i/s - 3.62x slower
122
+
data/Gemfile ADDED
@@ -0,0 +1,24 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in your gemspec
4
+ gemspec
5
+
6
+ group :development do
7
+ gem "activesupport"
8
+ gem "benchmark-ips"
9
+ gem "nokogiri"
10
+ gem "oga"
11
+ gem "ox" if RUBY_ENGINE == "ruby"
12
+
13
+ if !ENV["CI"] && RUBY_ENGINE == "ruby"
14
+ gem "pry", "~> 0.9.12.6"
15
+ gem "pry-byebug", "<= 1.3.2"
16
+ gem "pry-rescue", "~> 1.4.2"
17
+ gem "pry-stack_explorer", "~> 0.4.9.1"
18
+ gem "pry-syntax-hacks", "~> 0.0.6"
19
+ end
20
+ end
21
+
22
+ group :test do
23
+ gem "codeclimate-test-reporter", require: nil
24
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Michael Sievers
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,69 @@
1
+ # WeakXml
2
+
3
+ [![Build Status](https://travis-ci.org/msievers/weak_xml.svg)](https://travis-ci.org/msievers/weak_xml)
4
+ [![Test Coverage](https://codeclimate.com/github/msievers/weak_xml/badges/coverage.svg)](https://codeclimate.com/github/msievers/weak_xml/coverage)
5
+ [![Code Climate](https://codeclimate.com/github/msievers/weak_xml/badges/gpa.svg)](https://codeclimate.com/github/msievers/weak_xml)
6
+ [![Dependency Status](https://gemnasium.com/msievers/weak_xml.svg)](https://gemnasium.com/msievers/weak_xml)
7
+
8
+ `WeakXml` is a non-parsing xml library which only works for certain well structured xml files.
9
+
10
+ ## Usage
11
+
12
+ ```ruby
13
+ xml = <<-EOXML
14
+ <xml>
15
+ <foo>
16
+ <bar attr1="value1">content1</bar>
17
+ <bar attr1="value2" attr2="value3">content2</bar>
18
+ <bar attr2="value4>content3</bar>
19
+ </foo>
20
+ <foo>
21
+ <muff>content4</muff>
22
+ </foo>
23
+ </xml>
24
+ EOXML
25
+
26
+ # .find gets the first node with the given tag or nil
27
+ some_node = WeakXml.find("bar", xml) # => #<Fragment ...
28
+ WeakXml.find("nope", xml) # => nil
29
+
30
+ # you can get the content
31
+ some_node.content # => "content1"
32
+
33
+ # or some (existing) attribute
34
+ some_node.attr("attr1") # => "value1"
35
+ some_node.attr("nope_attr") # => nil
36
+
37
+ # in contrast to .find, find_all gets you all nodes which match the given tag
38
+ some_nodes = WeakXml.find_all("bar", xml)
39
+ WeakXml.find_all("nope", xml) # => []
40
+
41
+ # elements of find_all behave like the ones from find
42
+ some_nodes.map(&:content) # => ["content1", "content2", "content3"]
43
+
44
+ # xml/options can be stored within an instance of WeakXml
45
+ doc = WeakXml.new(xml, disable_multiline: false)
46
+ doc.find("bar").content # => "content1"
47
+ ```
48
+
49
+ ## Why not mighty ?
50
+
51
+ `WeakXml`is called weak because it probably does not work with your special fancy xml. It not even tries. Internally, regular expressions are used and those only work with certain well formed xml. No edge cases, no standard compliance, nothing.
52
+
53
+ ## Why anybody might like it anyway ?
54
+
55
+ There are no dependencies. It's fast (factor 10 and up compared to Nokigiri for certain scenarios). According to the 80/20 rule, many xml documents should work without problems.
56
+
57
+ ## Development
58
+
59
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
60
+
61
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
62
+
63
+ ## Contributing
64
+
65
+ 1. Fork it ( https://github.com/msievers/weak_xml/fork )
66
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
67
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
68
+ 4. Push to the branch (`git push origin my-new-feature`)
69
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
7
+
8
+ task :benchmark do
9
+ puts File.read("/proc/cpuinfo") if File.exist?("/proc/cpuinfo")
10
+
11
+ require_relative "./benchmark/weak_xml/weak_xml_versus_others"
12
+ Benchmark::WeakXml::WeakXmlVersusOthers.new.call
13
+ end
@@ -0,0 +1,357 @@
1
+ require "active_support"
2
+ require "active_support/core_ext"
3
+ require "nokogiri"
4
+ require "ox"
5
+ require "oga"
6
+ require "pry"
7
+ require "weak_xml"
8
+ require_relative "../weak_xml"
9
+
10
+ class Benchmark::WeakXml::WeakXmlVersusOthers
11
+ DISABLE_ALL_EXCEPT_NOKOGIRI = false
12
+
13
+ def call
14
+ puts "Find 'z303-id' and get content\n\n"
15
+
16
+ Benchmark.ips do |x|
17
+ x.config(time: 2, warmup: 1)
18
+
19
+ x.report("Nokogiri") do
20
+ Nokogiri::XML(ALEPH_BOR_INFO_XML).xpath(".//z303-id").text
21
+ end
22
+
23
+ unless DISABLE_ALL_EXCEPT_NOKOGIRI
24
+ x.report("Oga") do
25
+ Oga.parse_xml(ALEPH_BOR_INFO_XML).xpath(".//z303-id").text
26
+ end
27
+
28
+ x.report("Ox") do
29
+ Ox.parse(ALEPH_BOR_INFO_XML).locate("*/z303-id").first.text
30
+ end
31
+ end
32
+
33
+ x.report("WeakXml") do
34
+ WeakXml.find("z303-id", ALEPH_BOR_INFO_XML).content
35
+ end
36
+
37
+ x.report("WeakXml (+hint)") do
38
+ WeakXml.find("<z303-id>", ALEPH_BOR_INFO_XML).content
39
+ end
40
+
41
+ x.report("WeakXml (-ML)") do
42
+ WeakXml.find("z303-id", ALEPH_BOR_INFO_XML, disable_multiline: true).content
43
+ end
44
+
45
+ x.report("WeakXml (+hint, -ML)") do
46
+ WeakXml.find("<z303-id>", ALEPH_BOR_INFO_XML, disable_multiline: true).content
47
+ end
48
+
49
+ x.report("Plain regex") do
50
+ ALEPH_BOR_INFO_XML.match(Regexp.new("<z303-id>(.*?)<\/z303-id>")).captures.first
51
+ end
52
+
53
+ x.compare!
54
+ end
55
+
56
+ puts "Find 'fees' and get attr total_record_count\n\n"
57
+
58
+ Benchmark.ips do |x|
59
+ x.config(time: 2, warmup: 1)
60
+
61
+ x.report("Nokogiri") do
62
+ Nokogiri::XML(ALMA_FEES_XML).xpath(".//fees").attr("total_record_count").value
63
+ end
64
+
65
+ unless DISABLE_ALL_EXCEPT_NOKOGIRI
66
+ x.report("Oga") do
67
+ Oga.parse_xml(ALEPH_BOR_INFO_XML).xpath(".//z303-id").text
68
+ end
69
+
70
+ x.report("Ox") do
71
+ Ox.parse(ALEPH_BOR_INFO_XML).locate("*/z303-id").first.text
72
+ end
73
+ end
74
+
75
+ x.report("WeakXml") do
76
+ WeakXml.find("fees", ALMA_FEES_XML).attr("total_record_count")
77
+ end
78
+
79
+ x.report("WeakXml (+hint)") do
80
+ WeakXml.find("<z303-id>", ALEPH_BOR_INFO_XML).content
81
+ end
82
+
83
+ x.report("WeakXml (-ML)") do
84
+ WeakXml.find("z303-id", ALEPH_BOR_INFO_XML, disable_multiline: true).content
85
+ end
86
+
87
+ x.report("WeakXml (+hint, -ML)") do
88
+ WeakXml.find("<z303-id>", ALEPH_BOR_INFO_XML, disable_multiline: true)#.content
89
+ end
90
+
91
+ x.compare!
92
+ end
93
+
94
+ puts "\nFind all 'fee' and extract attribute 'link'\n\n"
95
+
96
+ Benchmark.ips do |x|
97
+ x.config(time: 2, warmup: 1)
98
+
99
+ x.report("Nokogiri") do
100
+ Nokogiri::XML(ALMA_FEES_XML).xpath(".//fee").map { |_fee| _fee.attr("link") }
101
+ end
102
+
103
+ unless DISABLE_ALL_EXCEPT_NOKOGIRI
104
+ x.report("Oga") do
105
+ Oga.parse_xml(ALMA_FEES_XML).xpath(".//fee").map { |_fee| _fee.attr("link").value }
106
+ end
107
+
108
+ x.report("Ox") do
109
+ Ox.parse(ALMA_FEES_XML).locate("*/fee").map { |_fee| _fee.attributes[:link] }
110
+ end
111
+ end
112
+
113
+ x.report("WeakXml") do
114
+ WeakXml.find_all("fee", ALMA_FEES_XML).map { |_fee| _fee.attr("link") }
115
+ end
116
+
117
+ x.report("WeakXml (-ML)") do
118
+ WeakXml.find_all("fee", ALMA_FEES_XML, disable_multiline: true).map { |_fee| _fee.attr("link") }
119
+ end
120
+
121
+ x.compare!
122
+ end
123
+
124
+ puts "\n Real life example with a hand full finds on a document\n\n"
125
+
126
+ Benchmark.ips do |x|
127
+ x.config(time: 2, warmup: 1)
128
+
129
+ x.report("Nokogiri") do
130
+ doc = Nokogiri::XML(ALEPH_BOR_INFO_XML)
131
+
132
+ doc.xpath(".//balance").try(:first).try(:to_f)
133
+ doc.xpath(".//current-fine").map(&:content).map(&:to_f)
134
+ doc.xpath(".//sign")
135
+ doc.xpath(".//z303-id").first.content
136
+ doc.xpath(".//z303-name").first.content
137
+ doc.xpath(".//z304-email-address").first.content
138
+ doc.xpath(".//z305-expiry-date").first.content
139
+ end
140
+
141
+ x.report("WeakXml") do
142
+ doc = WeakXml.new(ALEPH_BOR_INFO_XML)
143
+ doc.find("<balance>").try(:content).try(:to_f)
144
+ doc.find_all("<current-fine>").map! { |e| e.content.to_f }
145
+ doc.find("<sign>").try(:content)
146
+ doc.find("<z303-id>").content
147
+ doc.find("<z303-name>").content
148
+ doc.find("<z304-email-address>").content
149
+ doc.find("<z305-expiry-date>").content
150
+ end
151
+
152
+ x.compare!
153
+ end
154
+ end
155
+
156
+ ALEPH_BOR_INFO_XML = <<-EOXML.strip_heredoc
157
+ <?xml version = "1.0" encoding = "UTF-8"?>
158
+ <bor-info>
159
+ <z303>
160
+ <z303-id>PB12345</z303-id>
161
+ <z303-proxy-for-id></z303-proxy-for-id>
162
+ <z303-primary-id></z303-primary-id>
163
+ <z303-name-key>mustermann max PB12345</z303-name-key>
164
+ <z303-user-type></z303-user-type>
165
+ <z303-user-library>PAD12</z303-user-library>
166
+ <z303-open-date>01/02/1923</z303-open-date>
167
+ <z303-update-date>01/02/1923</z303-update-date>
168
+ <z303-con-lng>GER</z303-con-lng>
169
+ <z303-alpha>L</z303-alpha>
170
+ <z303-name>Mustermann, Max</z303-name>
171
+ <z303-title></z303-title>
172
+ <z303-delinq-1>00</z303-delinq-1>
173
+ <z303-delinq-n-1></z303-delinq-n-1>
174
+ <z303-delinq-1-update-date>19230201</z303-delinq-1-update-date>
175
+ <z303-delinq-1-cat-name>OL_ABC</z303-delinq-1-cat-name>
176
+ <z303-delinq-2>00</z303-delinq-2>
177
+ <z303-delinq-n-2></z303-delinq-n-2>
178
+ <z303-delinq-2-update-date>00000000</z303-delinq-2-update-date>
179
+ <z303-delinq-2-cat-name></z303-delinq-2-cat-name>
180
+ <z303-delinq-3>00</z303-delinq-3>
181
+ <z303-delinq-n-3></z303-delinq-n-3>
182
+ <z303-delinq-3-update-date>00000000</z303-delinq-3-update-date>
183
+ <z303-delinq-3-cat-name></z303-delinq-3-cat-name>
184
+ <z303-budget></z303-budget>
185
+ <z303-profile-id></z303-profile-id>
186
+ <z303-ill-library>AB</z303-ill-library>
187
+ <z303-home-library>AB</z303-home-library>
188
+ <z303-field-1></z303-field-1>
189
+ <z303-field-2></z303-field-2>
190
+ <z303-field-3></z303-field-3>
191
+ <z303-note-1></z303-note-1>
192
+ <z303-note-2></z303-note-2>
193
+ <z303-salutation></z303-salutation>
194
+ <z303-ill-total-limit>9999</z303-ill-total-limit>
195
+ <z303-ill-active-limit>9999</z303-ill-active-limit>
196
+ <z303-dispatch-library></z303-dispatch-library>
197
+ <z303-birth-date>01/02/1923</z303-birth-date>
198
+ <z303-export-consent>Y</z303-export-consent>
199
+ <z303-proxy-id-type>00</z303-proxy-id-type>
200
+ <z303-send-all-letters>Y</z303-send-all-letters>
201
+ <z303-plain-html></z303-plain-html>
202
+ <z303-want-sms>N</z303-want-sms>
203
+ <z303-plif-modification></z303-plif-modification>
204
+ <z303-title-req-limit>0000</z303-title-req-limit>
205
+ <z303-gender></z303-gender>
206
+ <z303-birthplace></z303-birthplace>
207
+ <z303-upd-time-stamp>123456789012345</z303-upd-time-stamp>
208
+ <z303-last-name></z303-last-name>
209
+ <z303-first-name></z303-first-name>
210
+ </z303>
211
+ <z304>
212
+ <z304-id>PB12345</z304-id>
213
+ <z304-sequence>01</z304-sequence>
214
+ <z304-address-0>Herr Max Mustermann</z304-address-0>
215
+ <z304-address-1>Musterplatz 1</z304-address-1>
216
+ <z304-zip></z304-zip>
217
+ <z304-email-address>max@mustermann.org</z304-email-address>
218
+ <z304-telephone>12345678</z304-telephone>
219
+ <z304-date-from>19230201</z304-date-from>
220
+ <z304-date-to>00000000</z304-date-to>
221
+ <z304-address-type>02</z304-address-type>
222
+ <z304-telephone-2></z304-telephone-2>
223
+ <z304-telephone-3></z304-telephone-3>
224
+ <z304-telephone-4></z304-telephone-4>
225
+ <z304-sms-number></z304-sms-number>
226
+ <z304-update-date>19240201</z304-update-date>
227
+ <z304-cat-name>OL_ABC</z304-cat-name>
228
+ <z304-upd-time-stamp>192402011418275</z304-upd-time-stamp>
229
+ </z304>
230
+ <z305>
231
+ <z305-id>PB12345</z305-id>
232
+ <z305-sub-library>PAD12</z305-sub-library>
233
+ <z305-open-date>01/02/1923</z305-open-date>
234
+ <z305-update-date>01/02/1923</z305-update-date>
235
+ <z305-bor-type></z305-bor-type>
236
+ <z305-bor-status>FOO - BAR</z305-bor-status>
237
+ <z305-registration-date>00000000</z305-registration-date>
238
+ <z305-expiry-date>01/02/2020</z305-expiry-date>
239
+ <z305-note></z305-note>
240
+ <z305-loan-permission>Y</z305-loan-permission>
241
+ <z305-photo-permission>Y</z305-photo-permission>
242
+ <z305-over-permission>Y</z305-over-permission>
243
+ <z305-multi-hold>Y</z305-multi-hold>
244
+ <z305-loan-check>Y</z305-loan-check>
245
+ <z305-hold-permission>Y</z305-hold-permission>
246
+ <z305-renew-permission>Y</z305-renew-permission>
247
+ <z305-rr-permission>Y</z305-rr-permission>
248
+ <z305-ignore-late-return>N</z305-ignore-late-return>
249
+ <z305-last-activity-date></z305-last-activity-date>
250
+ <z305-photo-charge>F</z305-photo-charge>
251
+ <z305-no-loan>0001</z305-no-loan>
252
+ <z305-no-hold>0000</z305-no-hold>
253
+ <z305-no-photo>0000</z305-no-photo>
254
+ <z305-no-cash>0000</z305-no-cash>
255
+ <z305-cash-limit>25.00</z305-cash-limit>
256
+ <z305-credit-debit></z305-credit-debit>
257
+ <z305-sum>0.00</z305-sum>
258
+ <z305-delinq-1>00</z305-delinq-1>
259
+ <z305-delinq-n-1></z305-delinq-n-1>
260
+ <z305-delinq-1-update-date>00000000</z305-delinq-1-update-date>
261
+ <z305-delinq-1-cat-name></z305-delinq-1-cat-name>
262
+ <z305-delinq-2>00</z305-delinq-2>
263
+ <z305-delinq-n-2></z305-delinq-n-2>
264
+ <z305-delinq-2-update-date>00000000</z305-delinq-2-update-date>
265
+ <z305-delinq-2-cat-name></z305-delinq-2-cat-name>
266
+ <z305-delinq-3>00</z305-delinq-3>
267
+ <z305-delinq-n-3></z305-delinq-n-3>
268
+ <z305-delinq-3-update-date>00000000</z305-delinq-3-update-date>
269
+ <z305-delinq-3-cat-name></z305-delinq-3-cat-name>
270
+ <z305-field-1></z305-field-1>
271
+ <z305-field-2></z305-field-2>
272
+ <z305-field-3></z305-field-3>
273
+ <z305-hold-on-shelf>Y</z305-hold-on-shelf>
274
+ <z305-end-block-date></z305-end-block-date>
275
+ <z305-booking-permission>N</z305-booking-permission>
276
+ <z305-booking-ignore-hours>N</z305-booking-ignore-hours>
277
+ <z305-rush-cat-request>N</z305-rush-cat-request>
278
+ <z305-upd-time-stamp>192301021200000</z305-upd-time-stamp>
279
+ </z305>
280
+ <item-l>
281
+ <z13>
282
+ <z13-doc-number>123456</z13-doc-number>
283
+ <z13-year>1950</z13-year>
284
+ <z13-open-date>01/02/2003</z13-open-date>
285
+ <z13-update-date>01/02/2003</z13-update-date>
286
+ <z13-call-no-key></z13-call-no-key>
287
+ <z13-call-no-code>AB01</z13-call-no-code>
288
+ <z13-call-no>ABC1234+1</z13-call-no>
289
+ <z13-author-code>123a1</z13-author-code>
290
+ <z13-author>Mustermann, Max 1923- (DE-123)123456789</z13-author>
291
+ <z13-title-code>123-1</z13-title-code>
292
+ <z13-title>Some title</z13-title>
293
+ <z13-imprint-code>123-1</z13-imprint-code>
294
+ <z13-imprint>Musterstadt [u.a.]</z13-imprint>
295
+ <z13-isbn-issn-code>123a1</z13-isbn-issn-code>
296
+ <z13-isbn-issn>0-123-12345-1</z13-isbn-issn>
297
+ <z13-upd-time-stamp>192301021200000</z13-upd-time-stamp>
298
+ </z13>
299
+ <current-fine> 20.00</current-fine>
300
+ <due-date>01/02/2000</due-date>
301
+ <due-hour>23:59</due-hour>
302
+ </item-l>
303
+ <session-id>00000000000000000000000000000000000000000000000000</session-id>
304
+ </bor-info>
305
+ EOXML
306
+
307
+ ALMA_FEES_XML = <<-EOXML.strip_heredoc
308
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?><fees total_record_count="4" total_sum="415.0" currency="USD">
309
+ <fee link="/almaws/v1/users/johns/fees/950645520000121">
310
+ <id>950645520000121</id>
311
+ <type desc="Overdue fine">OVERDUEFINE</type>
312
+ <status desc="Active">ACTIVE</status>
313
+ <balance>400.0</balance>
314
+ <original_amount>400.0</original_amount>
315
+ <creation_time>2014-10-21T08:17:12.450Z</creation_time>
316
+ <comment>Date generated: 10/21/2014, Due: 06/23/2014, Fine Policy: Overdue Fine for All Hours, Action: Renewed</comment>
317
+ <title>History</title>
318
+ </fee>
319
+ <fee link="/almaws/v1/users/johns/fees/950645580000121">
320
+ <id>950645580000121</id>
321
+ <type desc="Renew fee">RENEWFEE</type>
322
+ <status desc="Active">ACTIVE</status>
323
+ <balance>10.0</balance>
324
+ <original_amount>10.0</original_amount>
325
+ <creation_time>2014-10-21T08:17:12.709Z</creation_time>
326
+ <title>History</title>
327
+ </fee>
328
+ <fee link="/almaws/v1/users/johns/fees/711924990000121">
329
+ <id>711924990000121</id>
330
+ <type desc="Overdue fine">OVERDUEFINE</type>
331
+ <status desc="Active">ACTIVE</status>
332
+ <balance>55.0</balance>
333
+ <original_amount>77.0</original_amount>
334
+ <creation_time>2014-07-15T08:40:32.630Z</creation_time>
335
+ <comment>Date generated: 15/07/2014, Due: 23/06/2014, Fine Policy: Overdue Fine for All Hours, Action: Lost with overdue charge</comment>
336
+ <title>History</title>
337
+ <transactions>
338
+ <transaction>
339
+ <type desc="Payment">PAYMENT</type>
340
+ <amount>22.0</amount>
341
+ <created_by>Ex Libris</created_by>
342
+ <transaction_time>2014-10-20T13:26:54.144Z</transaction_time>
343
+ </transaction>
344
+ </transactions>
345
+ </fee>
346
+ <fee link="/almaws/v1/users/johns/fees/950644660000121">
347
+ <id>950644660000121</id>
348
+ <type desc="Credit">CREDIT</type>
349
+ <status desc="Active">ACTIVE</status>
350
+ <balance>-50.0</balance>
351
+ <original_amount>-50.0</original_amount>
352
+ <creation_time>2014-10-21T07:45:03.188Z</creation_time>
353
+ <title>History</title>
354
+ </fee>
355
+ </fees>
356
+ EOXML
357
+ end
@@ -0,0 +1,4 @@
1
+ require "benchmark/ips"
2
+
3
+ class Benchmark::WeakXml
4
+ end
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "weak_xml"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ begin
10
+ require "pry"
11
+ Pry.start
12
+ rescue
13
+ require "irb"
14
+ IRB.start
15
+ end
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,26 @@
1
+ class Fragment
2
+ # Compiling regular expressions is expensive, specially if one uses variable
3
+ # parts. So, in order to achive the best performance, these should be compiled
4
+ # upfront without any "runtime dependencies".
5
+ ATTRIBUTES_REGEXP = Regexp.compile(/\A<\w+\s+([^>]+)/m)
6
+ CONTENT_REGEXP = Regexp.compile(/.*?>(.*?)<[^>]+>\Z/m)
7
+
8
+ def initialize(tag, xml)
9
+ @tag = tag
10
+ @xml = xml
11
+ end
12
+
13
+ def attr(key)
14
+ if match_data = @xml.match(ATTRIBUTES_REGEXP)
15
+ if value_match_data = match_data.captures.first.match(/#{key}="(.+?)"/)
16
+ value_match_data.captures.first
17
+ end
18
+ end
19
+ end
20
+
21
+ def content
22
+ if match_data = @xml.match(CONTENT_REGEXP)
23
+ match_data.captures.first.tap(&:strip!)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,3 @@
1
+ class WeakXml
2
+ VERSION = "0.1.0"
3
+ end
data/lib/weak_xml.rb ADDED
@@ -0,0 +1,49 @@
1
+ require "weak_xml/version"
2
+
3
+ class WeakXml
4
+ require_relative "./weak_xml/fragment"
5
+
6
+ #
7
+ # class methods
8
+ #
9
+ def self.find(tag, xml, options = {})
10
+ if matched_string = xml[regex_factory(tag, options)]
11
+ Fragment.new(tag, matched_string)
12
+ end
13
+ end
14
+
15
+ def self.find_all(tag, xml, options = {})
16
+ xml.scan(regex_factory(tag, options)).map! do |_match|
17
+ Fragment.new(tag, _match)
18
+ end
19
+ end
20
+
21
+ def self.regex_factory(tag, options = {})
22
+ enable_multiline = !options[:disable_multiline]
23
+
24
+ regexp_base =
25
+ if tag.start_with?("<") && tag.end_with?(">")
26
+ "#{tag}.*?<\/#{tag[1..-2]}>"
27
+ else
28
+ "<#{tag}[>\s].*?<\/#{tag}>"
29
+ end
30
+
31
+ Regexp.new(regexp_base, (enable_multiline ? Regexp::MULTILINE : 0))
32
+ end
33
+
34
+ #
35
+ # instance methods
36
+ #
37
+ def initialize(xml, options = {})
38
+ @options = options
39
+ @xml = xml
40
+ end
41
+
42
+ def find(tag, options = nil)
43
+ self.class.find(tag, @xml, (options || @options))
44
+ end
45
+
46
+ def find_all(tag, options = nil)
47
+ self.class.find_all(tag, @xml, (options || @options))
48
+ end
49
+ end
data/weak_xml.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "weak_xml/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "weak_xml"
8
+ spec.version = WeakXml::VERSION
9
+ spec.authors = ["Michael Sievers"]
10
+ spec.summary = %q{A none parsing xml library}
11
+ spec.homepage = "https://github.com/msievers/weak_xml"
12
+
13
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
14
+ spec.bindir = "exe"
15
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
16
+ spec.require_paths = ["lib"]
17
+
18
+ spec.add_development_dependency "bundler", ">= 1.3"
19
+ spec.add_development_dependency "rake"
20
+ spec.add_development_dependency "rspec", ">= 3.0.0", "< 4.0.0"
21
+ spec.add_development_dependency "simplecov", ">= 0.8.0"
22
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: weak_xml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Michael Sievers
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-05-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 3.0.0
48
+ - - "<"
49
+ - !ruby/object:Gem::Version
50
+ version: 4.0.0
51
+ type: :development
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: 3.0.0
58
+ - - "<"
59
+ - !ruby/object:Gem::Version
60
+ version: 4.0.0
61
+ - !ruby/object:Gem::Dependency
62
+ name: simplecov
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: 0.8.0
68
+ type: :development
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 0.8.0
75
+ description:
76
+ email:
77
+ executables: []
78
+ extensions: []
79
+ extra_rdoc_files: []
80
+ files:
81
+ - ".codeclimate.yml"
82
+ - ".gitignore"
83
+ - ".rspec"
84
+ - ".travis.yml"
85
+ - Benchmark.results
86
+ - Gemfile
87
+ - LICENSE.txt
88
+ - README.md
89
+ - Rakefile
90
+ - benchmark/weak_xml.rb
91
+ - benchmark/weak_xml/weak_xml_versus_others.rb
92
+ - bin/console
93
+ - bin/setup
94
+ - lib/weak_xml.rb
95
+ - lib/weak_xml/fragment.rb
96
+ - lib/weak_xml/version.rb
97
+ - weak_xml.gemspec
98
+ homepage: https://github.com/msievers/weak_xml
99
+ licenses: []
100
+ metadata: {}
101
+ post_install_message:
102
+ rdoc_options: []
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ requirements: []
116
+ rubyforge_project:
117
+ rubygems_version: 2.4.7
118
+ signing_key:
119
+ specification_version: 4
120
+ summary: A none parsing xml library
121
+ test_files: []
122
+ has_rdoc: