jrexml 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,3 @@
1
+ = 0.5
2
+
3
+ * Birthday!
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ # (c) Copyright 2007 Nick Sieger <nicksieger@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person
4
+ # obtaining a copy of this software and associated documentation files
5
+ # (the "Software"), to deal in the Software without restriction,
6
+ # including without limitation the rights to use, copy, modify, merge,
7
+ # publish, distribute, sublicense, and/or sell copies of the Software,
8
+ # and to permit persons to whom the Software is furnished to do so,
9
+ # subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be
12
+ # included in all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18
+ # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19
+ # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
data/Manifest.txt ADDED
@@ -0,0 +1,11 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ LICENSE.txt
5
+ Rakefile
6
+ lib/jrexml/java_pull_parser.rb
7
+ lib/jrexml.rb
8
+ lib/xpp3-1.1.4.jar
9
+ lib/xpp3.LICENSE.txt
10
+ spec/java_pull_parser_spec.rb
11
+ spec/spec_helper.rb
data/README.txt ADDED
@@ -0,0 +1,20 @@
1
+ JREXML is an add-on for JRuby that uses a Java pull parser library to speed up REXML.
2
+
3
+ REXML is, unfortunately, painfully slow running under JRuby at the moment due to the slowness of regular expression parsing. JREXML shoves a small wrapper around XPP3/MXP1 (http://www.extreme.indiana.edu/xgws/xsoap/xpp/mxp1/) into the guts of REXML, disabling the regular expression parser and providing close to a 10x speedup.
4
+
5
+ = Install
6
+
7
+ Simply install the gem under JRuby:
8
+
9
+ jruby -S gem install jrexml
10
+
11
+ And require 'jrexml' to speed up REXML.
12
+
13
+ gem 'jrexml'
14
+ require 'jrexml'
15
+
16
+ = License
17
+
18
+ This software is released under an MIT license. For details, see the LICENSE.txt file included with the distribution. The software is copyright (c) 2007 Nick Sieger <nicksieger@gmail.com>.
19
+
20
+ This product includes software developed by the Indiana University Extreme! Lab (http://www.extreme.indiana.edu/). See the license in the file lib/xpp3.LICENSE.txt for details.
data/Rakefile ADDED
@@ -0,0 +1,64 @@
1
+ require 'spec/rake/spectask'
2
+
3
+ MANIFEST = FileList["History.txt", "Manifest.txt", "README.txt", "LICENSE.txt", "Rakefile",
4
+ "lib/**/*.rb", "lib/xpp*", "spec/**/*.rb"]
5
+
6
+ begin
7
+ require 'hoe'
8
+ hoe = Hoe.new("jrexml", "0.5") do |p|
9
+ p.rubyforge_name = "caldersphere"
10
+ p.url = "http://caldersphere.rubyforge.org/jrexml"
11
+ p.author = "Nick Sieger"
12
+ p.email = "nick@nicksieger.com"
13
+ p.summary = "JREXML speeds up REXML under JRuby by using a Java pull parser."
14
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
15
+ p.description = p.paragraphs_of('README.txt', 0...1).join("\n\n")
16
+ p.extra_deps.reject!{|d| d.first == "hoe"}
17
+ p.test_globs = ["spec/**/*_spec.rb"]
18
+ end
19
+ hoe.spec.files = MANIFEST
20
+ hoe.spec.dependencies.delete_if { |dep| dep.name == "hoe" }
21
+ rescue LoadError
22
+ puts "You really need Hoe installed to be able to package this gem"
23
+ end
24
+
25
+ # Hoe insists on setting task :default => :test
26
+ # !@#$ no easy way to empty the default list of prerequisites
27
+ Rake::Task['default'].send :instance_variable_set, "@prerequisites", FileList[]
28
+
29
+ file "Manifest.txt" => :manifest
30
+ task :manifest do
31
+ File.open("Manifest.txt", "w") {|f| MANIFEST.each {|n| f << "#{n}\n"} }
32
+ end
33
+ Rake::Task['manifest'].invoke # Always regen manifest, so Hoe has up-to-date list of files
34
+
35
+ task :default => :spec
36
+
37
+ Spec::Rake::SpecTask.new do |t|
38
+ t.libs << "lib"
39
+ t.spec_files = FileList['spec/**/*_spec.rb']
40
+ end
41
+
42
+ task :benchmark do
43
+ $LOAD_PATH.unshift "lib"
44
+ require 'benchmark'
45
+ require 'rexml/document'
46
+
47
+ xml = File.open(File.dirname(__FILE__) + "/spec/atom_feed.xml") {|f| f.read }
48
+
49
+ Benchmark.bm(7) do |x|
50
+ x.report("REXML") do
51
+ 10.times do
52
+ REXML::Document.new xml
53
+ end
54
+ end
55
+ if RUBY_PLATFORM =~ /java/
56
+ x.report("JREXML") do
57
+ require 'jrexml'
58
+ 10.times do
59
+ REXML::Document.new xml
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
data/lib/jrexml.rb ADDED
@@ -0,0 +1,13 @@
1
+ require 'rexml/parsers/baseparser'
2
+ require 'jrexml/java_pull_parser'
3
+
4
+ class REXML::Parsers::BaseParser #:nodoc:
5
+ # Extend every REXML base parser with a version that uses a Java pull parser
6
+ # library
7
+ def self.new(*args)
8
+ obj = allocate
9
+ obj.extend(JREXML::JavaPullParser)
10
+ obj.send :initialize, *args
11
+ obj
12
+ end
13
+ end
@@ -0,0 +1,171 @@
1
+ module JREXML
2
+ begin
3
+ XmlPullParser = Java::org.xmlpull.v1.XmlPullParser
4
+ rescue
5
+ raise LoadError, "JREXML is only for JRuby" if RUBY_PLATFORM !~ /java/
6
+ XPP_VERSION = "1.1.4"
7
+ begin
8
+ require "xpp3-#{XPP_VERSION}.jar"
9
+ @triedxpp ||= 0
10
+ @triedxpp += 1
11
+ retry unless @triedxpp > 1
12
+ rescue LoadError
13
+ raise LoadError, "Unable to load XmlPullParser java class; " +
14
+ "you need to include xpp3-#{XPP_VERSION}.jar on the classpath"
15
+ end
16
+ end
17
+
18
+ START_DOCUMENT = XmlPullParser::START_DOCUMENT
19
+ END_DOCUMENT = XmlPullParser::END_DOCUMENT
20
+ START_TAG = XmlPullParser::START_TAG
21
+ END_TAG = XmlPullParser::END_TAG
22
+ TEXT = XmlPullParser::TEXT
23
+ CDSECT = XmlPullParser::CDSECT
24
+ COMMENT = XmlPullParser::COMMENT
25
+ ENTITY_REF = XmlPullParser::ENTITY_REF
26
+ IGNORABLE_WHITESPACE = XmlPullParser::IGNORABLE_WHITESPACE
27
+ PROCESSING_INSTRUCTION = XmlPullParser::PROCESSING_INSTRUCTION
28
+
29
+ ADJACENT_EVENTS = [TEXT, ENTITY_REF]
30
+
31
+ class XmlParsingError < StandardError; end
32
+
33
+ module JavaPullParser
34
+ def self.factory
35
+ @factory ||= proc do
36
+ fact = org.xmlpull.v1.XmlPullParserFactory.newInstance
37
+ fact.set_namespace_aware false
38
+ fact.set_validating false
39
+ fact
40
+ end.call
41
+ end
42
+
43
+ def stream=(source)
44
+ @source = JavaPullParser.factory.newPullParser
45
+ @source.setInput java.io.ByteArrayInputStream.new(get_bytes(source)), nil
46
+ end
47
+
48
+ # Returns true if there are no more events
49
+ def empty?
50
+ event_stack.empty?
51
+ end
52
+
53
+ # Returns true if there are more events. Synonymous with !empty?
54
+ def has_next?
55
+ !empty?
56
+ end
57
+
58
+ # Push an event back on the head of the stream. This method
59
+ # has (theoretically) infinite depth.
60
+ def unshift(event)
61
+ @event_stack ||= []
62
+ @event_stack.unshift event
63
+ end
64
+
65
+ def peek(depth = 0)
66
+ raise "not implemented"
67
+ end
68
+
69
+ def pull
70
+ event = event_stack.shift
71
+ unless @first_event_seen
72
+ @first_event_seen = true
73
+ version = @source.getProperty("http://xmlpull.org/v1/doc/properties.html#xmldecl-version")
74
+ if version
75
+ standalone = @source.getProperty("http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone")
76
+ encoding = @source.getInputEncoding
77
+ unshift event
78
+ return [:xmldecl, version, encoding, standalone]
79
+ end
80
+ end
81
+ convert_event(event)
82
+ end
83
+
84
+ def all_events
85
+ events = []
86
+ while event = pull
87
+ events << event
88
+ end
89
+ events
90
+ end
91
+
92
+ private
93
+ def convert_event(event)
94
+ if ADJACENT_EVENTS.include?(event)
95
+ text = ""
96
+ loop do
97
+ case event
98
+ when TEXT
99
+ text << @source.text
100
+ when ENTITY_REF
101
+ text << "&#{@source.name};"
102
+ end
103
+ event = event_stack.shift
104
+ break unless event
105
+ if !ADJACENT_EVENTS.include?(event)
106
+ unshift event
107
+ return [:text, text]
108
+ end
109
+ end
110
+ end
111
+ convert_event_without_text_or_entityref(event)
112
+ end
113
+
114
+ def convert_event_without_text_or_entityref(event)
115
+ case event
116
+ when START_DOCUMENT
117
+ [:start_document]
118
+ when END_DOCUMENT
119
+ @document_ended = true
120
+ [:end_document]
121
+ when START_TAG
122
+ attributes = {}
123
+ 0.upto(@source.attribute_count - 1) do |i|
124
+ attributes[@source.getAttributeName(i)] = @source.getAttributeValue(i)
125
+ end
126
+ [:start_element, @source.name, attributes]
127
+ when END_TAG
128
+ [:end_element, @source.name]
129
+ when IGNORABLE_WHITESPACE
130
+ [:text, @source.text]
131
+ when CDSECT
132
+ [:cdata, @source.text]
133
+ when COMMENT
134
+ [:comment, @source.text]
135
+ when PROCESSING_INSTRUCTION
136
+ pi_info = @source.text.split(/ /, 2)
137
+ pi_info[1] = " #{pi_info[1]}" # REXML likes the space there
138
+ [:processing_instruction, *pi_info]
139
+ when nil
140
+ nil
141
+ else
142
+ [:unknown, debug_event(event)]
143
+ end
144
+ end
145
+
146
+ def event_stack
147
+ @event_stack ||= []
148
+ if @event_stack.empty? && !@document_ended
149
+ begin
150
+ @event_stack << @source.nextToken
151
+ rescue NativeException => e
152
+ raise XmlParsingError, e.message
153
+ end
154
+ end
155
+ @event_stack
156
+ end
157
+
158
+ def get_bytes(src)
159
+ string = if src.respond_to?(:read)
160
+ src.read
161
+ else
162
+ src.to_s
163
+ end
164
+ string.to_java_bytes
165
+ end
166
+
167
+ def debug_event(event)
168
+ "XmlPullParser::#{XmlPullParser::TYPES[event]}" if event
169
+ end
170
+ end
171
+ end
Binary file
@@ -0,0 +1,47 @@
1
+ # Indiana University Extreme! Lab Software License
2
+ #
3
+ # Version 1.1.1
4
+ #
5
+ # Copyright (c) 2002 Extreme! Lab, Indiana University. All rights reserved.
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright notice,
12
+ # this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in
16
+ # the documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. The end-user documentation included with the redistribution, if any,
19
+ # must include the following acknowledgment:
20
+ #
21
+ # "This product includes software developed by the Indiana University
22
+ # Extreme! Lab (http://www.extreme.indiana.edu/)."
23
+ #
24
+ # Alternately, this acknowledgment may appear in the software itself,
25
+ # if and wherever such third-party acknowledgments normally appear.
26
+ #
27
+ # 4. The names "Indiana Univeristy" and "Indiana Univeristy Extreme! Lab"
28
+ # must not be used to endorse or promote products derived from this
29
+ # software without prior written permission. For written permission,
30
+ # please contact http://www.extreme.indiana.edu/.
31
+ #
32
+ # 5. Products derived from this software may not use "Indiana Univeristy"
33
+ # name nor may "Indiana Univeristy" appear in their name, without prior
34
+ # written permission of the Indiana University.
35
+ #
36
+ # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
37
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
38
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
39
+ # IN NO EVENT SHALL THE AUTHORS, COPYRIGHT HOLDERS OR ITS CONTRIBUTORS
40
+ # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
41
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43
+ # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45
+ # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
46
+ # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47
+ #
@@ -0,0 +1,99 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe JREXML::JavaPullParser do
4
+ def parse(source)
5
+ @parser = REXML::Parsers::BaseParser.new(source)
6
+ @parser.extend(JREXML::JavaPullParser)
7
+ @parser.stream = source
8
+ (class << @parser; self; end).send :define_method, "base_events" do
9
+ events = []
10
+ baseparser = REXML::Parsers::BaseParser.new(source)
11
+ loop do
12
+ event = baseparser.pull
13
+ events << event
14
+ break if event.first == :end_document
15
+ end
16
+ events
17
+ end
18
+ @parser
19
+ end
20
+
21
+ def verify_events
22
+ @parser.base_events.each do |evt|
23
+ @parser.pull.should == evt
24
+ end
25
+ @parser.should be_empty
26
+ end
27
+
28
+ def parse_and_verify(source)
29
+ parse source
30
+ verify_events
31
+ end
32
+
33
+ it "should parse a document consisting of a single empty element" do
34
+ parse_and_verify %q(<document/>)
35
+ end
36
+
37
+ it "should allow calling empty? or has_next? in between pulls" do
38
+ @parser = parse %q(<document/>)
39
+ @parser.pull.should == [:start_element, "document", {}]
40
+ @parser.should_not be_empty
41
+ @parser.pull.should == [:end_element, "document"]
42
+ @parser.has_next?.should == true
43
+ @parser.pull.should == [:end_document]
44
+ @parser.has_next?.should == false
45
+ @parser.should be_empty
46
+ end
47
+
48
+ it "should parse text between elements" do
49
+ parse_and_verify %q(<document>This is the body</document>)
50
+ end
51
+
52
+ it "should parse multiple texts" do
53
+ parse_and_verify <<-XML
54
+ <document>
55
+ some text
56
+ <a-tag/>
57
+ some other text
58
+ </document>
59
+ XML
60
+ end
61
+
62
+ it "should parse attributes" do
63
+ parse_and_verify %q(<document attr1="value" attr2='value2'/>)
64
+ end
65
+
66
+ it "should handle namespaces in the same way as the base parser (which is to be ignorant of them)" do
67
+ parse_and_verify %q(<d:document xmlns:d="urn:example.com" d:attr="value"/>)
68
+ end
69
+
70
+ it "should handle the xml processing instruction" do
71
+ parse_and_verify <<-XML
72
+ <?xml version="1.0" encoding="utf-8"?>
73
+ <document/>
74
+ XML
75
+ end
76
+
77
+ it "should handle CDATA" do
78
+ parse_and_verify %q(<document><![CDATA[some cdata]]></document>)
79
+ end
80
+
81
+ it "should handle comments" do
82
+ parse_and_verify %q(<document><!-- some comment --></document>)
83
+ end
84
+
85
+ it "should handle processing instructions" do
86
+ parse_and_verify %q(<?xml version="1.0"?>
87
+ <?xml-stylesheet href="hello-page-html.xsl" type="text/xsl"?><document/>)
88
+ end
89
+
90
+ it "should handle simple entity refs" do
91
+ parse_and_verify %q(<document>text &lt; other &gt;&#x20;text</document>)
92
+ end
93
+
94
+ it "should handle a longer, more complex document (50+K atom feed)" do
95
+ File.open(File.dirname(__FILE__) + "/atom_feed.xml") do |f|
96
+ parse_and_verify f.read
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,8 @@
1
+ $LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
2
+ require 'rexml/parsers/baseparser'
3
+ require 'jrexml'
4
+
5
+ Spec::Runner.configure do |config|
6
+ config.before :all do
7
+ end
8
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: jrexml
5
+ version: !ruby/object:Gem::Version
6
+ version: "0.5"
7
+ date: 2007-08-07 00:00:00 -07:00
8
+ summary: JREXML speeds up REXML under JRuby by using a Java pull parser.
9
+ require_paths:
10
+ - lib
11
+ email: nick@nicksieger.com
12
+ homepage: http://caldersphere.rubyforge.org/jrexml
13
+ rubyforge_project: caldersphere
14
+ description: JREXML is an add-on for JRuby that uses a Java pull parser library to speed up REXML.
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Nick Sieger
31
+ files:
32
+ - History.txt
33
+ - Manifest.txt
34
+ - README.txt
35
+ - LICENSE.txt
36
+ - Rakefile
37
+ - lib/jrexml/java_pull_parser.rb
38
+ - lib/jrexml.rb
39
+ - lib/xpp3-1.1.4.jar
40
+ - lib/xpp3.LICENSE.txt
41
+ - spec/java_pull_parser_spec.rb
42
+ - spec/spec_helper.rb
43
+ test_files:
44
+ - spec/java_pull_parser_spec.rb
45
+ rdoc_options:
46
+ - --main
47
+ - README.txt
48
+ extra_rdoc_files:
49
+ - History.txt
50
+ - Manifest.txt
51
+ - README.txt
52
+ - LICENSE.txt
53
+ - lib/xpp3.LICENSE.txt
54
+ executables: []
55
+
56
+ extensions: []
57
+
58
+ requirements: []
59
+
60
+ dependencies: []
61
+