jrexml 0.5

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,3 @@
1
+ = 0.5
2
+
3
+ * Birthday!
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ # (c) Copyright 2007 Nick Sieger <nicksieger@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person
4
+ # obtaining a copy of this software and associated documentation files
5
+ # (the "Software"), to deal in the Software without restriction,
6
+ # including without limitation the rights to use, copy, modify, merge,
7
+ # publish, distribute, sublicense, and/or sell copies of the Software,
8
+ # and to permit persons to whom the Software is furnished to do so,
9
+ # subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be
12
+ # included in all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18
+ # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19
+ # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
data/Manifest.txt ADDED
@@ -0,0 +1,11 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ LICENSE.txt
5
+ Rakefile
6
+ lib/jrexml/java_pull_parser.rb
7
+ lib/jrexml.rb
8
+ lib/xpp3-1.1.4.jar
9
+ lib/xpp3.LICENSE.txt
10
+ spec/java_pull_parser_spec.rb
11
+ spec/spec_helper.rb
data/README.txt ADDED
@@ -0,0 +1,20 @@
1
+ JREXML is an add-on for JRuby that uses a Java pull parser library to speed up REXML.
2
+
3
+ REXML is, unfortunately, painfully slow running under JRuby at the moment due to the slowness of regular expression parsing. JREXML shoves a small wrapper around XPP3/MXP1 (http://www.extreme.indiana.edu/xgws/xsoap/xpp/mxp1/) into the guts of REXML, disabling the regular expression parser and providing close to a 10x speedup.
4
+
5
+ = Install
6
+
7
+ Simply install the gem under JRuby:
8
+
9
+ jruby -S gem install jrexml
10
+
11
+ And require 'jrexml' to speed up REXML.
12
+
13
+ gem 'jrexml'
14
+ require 'jrexml'
15
+
16
+ = License
17
+
18
+ This software is released under an MIT license. For details, see the LICENSE.txt file included with the distribution. The software is copyright (c) 2007 Nick Sieger <nicksieger@gmail.com>.
19
+
20
+ This product includes software developed by the Indiana University Extreme! Lab (http://www.extreme.indiana.edu/). See the license in the file lib/xpp3.LICENSE.txt for details.
data/Rakefile ADDED
@@ -0,0 +1,64 @@
1
+ require 'spec/rake/spectask'
2
+
3
+ MANIFEST = FileList["History.txt", "Manifest.txt", "README.txt", "LICENSE.txt", "Rakefile",
4
+ "lib/**/*.rb", "lib/xpp*", "spec/**/*.rb"]
5
+
6
+ begin
7
+ require 'hoe'
8
+ hoe = Hoe.new("jrexml", "0.5") do |p|
9
+ p.rubyforge_name = "caldersphere"
10
+ p.url = "http://caldersphere.rubyforge.org/jrexml"
11
+ p.author = "Nick Sieger"
12
+ p.email = "nick@nicksieger.com"
13
+ p.summary = "JREXML speeds up REXML under JRuby by using a Java pull parser."
14
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
15
+ p.description = p.paragraphs_of('README.txt', 0...1).join("\n\n")
16
+ p.extra_deps.reject!{|d| d.first == "hoe"}
17
+ p.test_globs = ["spec/**/*_spec.rb"]
18
+ end
19
+ hoe.spec.files = MANIFEST
20
+ hoe.spec.dependencies.delete_if { |dep| dep.name == "hoe" }
21
+ rescue LoadError
22
+ puts "You really need Hoe installed to be able to package this gem"
23
+ end
24
+
25
+ # Hoe insists on setting task :default => :test
26
+ # !@#$ no easy way to empty the default list of prerequisites
27
+ Rake::Task['default'].send :instance_variable_set, "@prerequisites", FileList[]
28
+
29
+ file "Manifest.txt" => :manifest
30
+ task :manifest do
31
+ File.open("Manifest.txt", "w") {|f| MANIFEST.each {|n| f << "#{n}\n"} }
32
+ end
33
+ Rake::Task['manifest'].invoke # Always regen manifest, so Hoe has up-to-date list of files
34
+
35
+ task :default => :spec
36
+
37
+ Spec::Rake::SpecTask.new do |t|
38
+ t.libs << "lib"
39
+ t.spec_files = FileList['spec/**/*_spec.rb']
40
+ end
41
+
42
+ task :benchmark do
43
+ $LOAD_PATH.unshift "lib"
44
+ require 'benchmark'
45
+ require 'rexml/document'
46
+
47
+ xml = File.open(File.dirname(__FILE__) + "/spec/atom_feed.xml") {|f| f.read }
48
+
49
+ Benchmark.bm(7) do |x|
50
+ x.report("REXML") do
51
+ 10.times do
52
+ REXML::Document.new xml
53
+ end
54
+ end
55
+ if RUBY_PLATFORM =~ /java/
56
+ x.report("JREXML") do
57
+ require 'jrexml'
58
+ 10.times do
59
+ REXML::Document.new xml
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
data/lib/jrexml.rb ADDED
@@ -0,0 +1,13 @@
1
+ require 'rexml/parsers/baseparser'
2
+ require 'jrexml/java_pull_parser'
3
+
4
+ class REXML::Parsers::BaseParser #:nodoc:
5
+ # Extend every REXML base parser with a version that uses a Java pull parser
6
+ # library
7
+ def self.new(*args)
8
+ obj = allocate
9
+ obj.extend(JREXML::JavaPullParser)
10
+ obj.send :initialize, *args
11
+ obj
12
+ end
13
+ end
@@ -0,0 +1,171 @@
1
+ module JREXML
2
+ begin
3
+ XmlPullParser = Java::org.xmlpull.v1.XmlPullParser
4
+ rescue
5
+ raise LoadError, "JREXML is only for JRuby" if RUBY_PLATFORM !~ /java/
6
+ XPP_VERSION = "1.1.4"
7
+ begin
8
+ require "xpp3-#{XPP_VERSION}.jar"
9
+ @triedxpp ||= 0
10
+ @triedxpp += 1
11
+ retry unless @triedxpp > 1
12
+ rescue LoadError
13
+ raise LoadError, "Unable to load XmlPullParser java class; " +
14
+ "you need to include xpp3-#{XPP_VERSION}.jar on the classpath"
15
+ end
16
+ end
17
+
18
+ START_DOCUMENT = XmlPullParser::START_DOCUMENT
19
+ END_DOCUMENT = XmlPullParser::END_DOCUMENT
20
+ START_TAG = XmlPullParser::START_TAG
21
+ END_TAG = XmlPullParser::END_TAG
22
+ TEXT = XmlPullParser::TEXT
23
+ CDSECT = XmlPullParser::CDSECT
24
+ COMMENT = XmlPullParser::COMMENT
25
+ ENTITY_REF = XmlPullParser::ENTITY_REF
26
+ IGNORABLE_WHITESPACE = XmlPullParser::IGNORABLE_WHITESPACE
27
+ PROCESSING_INSTRUCTION = XmlPullParser::PROCESSING_INSTRUCTION
28
+
29
+ ADJACENT_EVENTS = [TEXT, ENTITY_REF]
30
+
31
+ class XmlParsingError < StandardError; end
32
+
33
+ module JavaPullParser
34
+ def self.factory
35
+ @factory ||= proc do
36
+ fact = org.xmlpull.v1.XmlPullParserFactory.newInstance
37
+ fact.set_namespace_aware false
38
+ fact.set_validating false
39
+ fact
40
+ end.call
41
+ end
42
+
43
+ def stream=(source)
44
+ @source = JavaPullParser.factory.newPullParser
45
+ @source.setInput java.io.ByteArrayInputStream.new(get_bytes(source)), nil
46
+ end
47
+
48
+ # Returns true if there are no more events
49
+ def empty?
50
+ event_stack.empty?
51
+ end
52
+
53
+ # Returns true if there are more events. Synonymous with !empty?
54
+ def has_next?
55
+ !empty?
56
+ end
57
+
58
+ # Push an event back on the head of the stream. This method
59
+ # has (theoretically) infinite depth.
60
+ def unshift(event)
61
+ @event_stack ||= []
62
+ @event_stack.unshift event
63
+ end
64
+
65
+ def peek(depth = 0)
66
+ raise "not implemented"
67
+ end
68
+
69
+ def pull
70
+ event = event_stack.shift
71
+ unless @first_event_seen
72
+ @first_event_seen = true
73
+ version = @source.getProperty("http://xmlpull.org/v1/doc/properties.html#xmldecl-version")
74
+ if version
75
+ standalone = @source.getProperty("http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone")
76
+ encoding = @source.getInputEncoding
77
+ unshift event
78
+ return [:xmldecl, version, encoding, standalone]
79
+ end
80
+ end
81
+ convert_event(event)
82
+ end
83
+
84
+ def all_events
85
+ events = []
86
+ while event = pull
87
+ events << event
88
+ end
89
+ events
90
+ end
91
+
92
+ private
93
+ def convert_event(event)
94
+ if ADJACENT_EVENTS.include?(event)
95
+ text = ""
96
+ loop do
97
+ case event
98
+ when TEXT
99
+ text << @source.text
100
+ when ENTITY_REF
101
+ text << "&#{@source.name};"
102
+ end
103
+ event = event_stack.shift
104
+ break unless event
105
+ if !ADJACENT_EVENTS.include?(event)
106
+ unshift event
107
+ return [:text, text]
108
+ end
109
+ end
110
+ end
111
+ convert_event_without_text_or_entityref(event)
112
+ end
113
+
114
+ def convert_event_without_text_or_entityref(event)
115
+ case event
116
+ when START_DOCUMENT
117
+ [:start_document]
118
+ when END_DOCUMENT
119
+ @document_ended = true
120
+ [:end_document]
121
+ when START_TAG
122
+ attributes = {}
123
+ 0.upto(@source.attribute_count - 1) do |i|
124
+ attributes[@source.getAttributeName(i)] = @source.getAttributeValue(i)
125
+ end
126
+ [:start_element, @source.name, attributes]
127
+ when END_TAG
128
+ [:end_element, @source.name]
129
+ when IGNORABLE_WHITESPACE
130
+ [:text, @source.text]
131
+ when CDSECT
132
+ [:cdata, @source.text]
133
+ when COMMENT
134
+ [:comment, @source.text]
135
+ when PROCESSING_INSTRUCTION
136
+ pi_info = @source.text.split(/ /, 2)
137
+ pi_info[1] = " #{pi_info[1]}" # REXML likes the space there
138
+ [:processing_instruction, *pi_info]
139
+ when nil
140
+ nil
141
+ else
142
+ [:unknown, debug_event(event)]
143
+ end
144
+ end
145
+
146
+ def event_stack
147
+ @event_stack ||= []
148
+ if @event_stack.empty? && !@document_ended
149
+ begin
150
+ @event_stack << @source.nextToken
151
+ rescue NativeException => e
152
+ raise XmlParsingError, e.message
153
+ end
154
+ end
155
+ @event_stack
156
+ end
157
+
158
+ def get_bytes(src)
159
+ string = if src.respond_to?(:read)
160
+ src.read
161
+ else
162
+ src.to_s
163
+ end
164
+ string.to_java_bytes
165
+ end
166
+
167
+ def debug_event(event)
168
+ "XmlPullParser::#{XmlPullParser::TYPES[event]}" if event
169
+ end
170
+ end
171
+ end
Binary file
@@ -0,0 +1,47 @@
1
+ # Indiana University Extreme! Lab Software License
2
+ #
3
+ # Version 1.1.1
4
+ #
5
+ # Copyright (c) 2002 Extreme! Lab, Indiana University. All rights reserved.
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright notice,
12
+ # this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in
16
+ # the documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. The end-user documentation included with the redistribution, if any,
19
+ # must include the following acknowledgment:
20
+ #
21
+ # "This product includes software developed by the Indiana University
22
+ # Extreme! Lab (http://www.extreme.indiana.edu/)."
23
+ #
24
+ # Alternately, this acknowledgment may appear in the software itself,
25
+ # if and wherever such third-party acknowledgments normally appear.
26
+ #
27
+ # 4. The names "Indiana Univeristy" and "Indiana Univeristy Extreme! Lab"
28
+ # must not be used to endorse or promote products derived from this
29
+ # software without prior written permission. For written permission,
30
+ # please contact http://www.extreme.indiana.edu/.
31
+ #
32
+ # 5. Products derived from this software may not use "Indiana Univeristy"
33
+ # name nor may "Indiana Univeristy" appear in their name, without prior
34
+ # written permission of the Indiana University.
35
+ #
36
+ # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
37
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
38
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
39
+ # IN NO EVENT SHALL THE AUTHORS, COPYRIGHT HOLDERS OR ITS CONTRIBUTORS
40
+ # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
41
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43
+ # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45
+ # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
46
+ # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47
+ #
@@ -0,0 +1,99 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe JREXML::JavaPullParser do
4
+ def parse(source)
5
+ @parser = REXML::Parsers::BaseParser.new(source)
6
+ @parser.extend(JREXML::JavaPullParser)
7
+ @parser.stream = source
8
+ (class << @parser; self; end).send :define_method, "base_events" do
9
+ events = []
10
+ baseparser = REXML::Parsers::BaseParser.new(source)
11
+ loop do
12
+ event = baseparser.pull
13
+ events << event
14
+ break if event.first == :end_document
15
+ end
16
+ events
17
+ end
18
+ @parser
19
+ end
20
+
21
+ def verify_events
22
+ @parser.base_events.each do |evt|
23
+ @parser.pull.should == evt
24
+ end
25
+ @parser.should be_empty
26
+ end
27
+
28
+ def parse_and_verify(source)
29
+ parse source
30
+ verify_events
31
+ end
32
+
33
+ it "should parse a document consisting of a single empty element" do
34
+ parse_and_verify %q(<document/>)
35
+ end
36
+
37
+ it "should allow calling empty? or has_next? in between pulls" do
38
+ @parser = parse %q(<document/>)
39
+ @parser.pull.should == [:start_element, "document", {}]
40
+ @parser.should_not be_empty
41
+ @parser.pull.should == [:end_element, "document"]
42
+ @parser.has_next?.should == true
43
+ @parser.pull.should == [:end_document]
44
+ @parser.has_next?.should == false
45
+ @parser.should be_empty
46
+ end
47
+
48
+ it "should parse text between elements" do
49
+ parse_and_verify %q(<document>This is the body</document>)
50
+ end
51
+
52
+ it "should parse multiple texts" do
53
+ parse_and_verify <<-XML
54
+ <document>
55
+ some text
56
+ <a-tag/>
57
+ some other text
58
+ </document>
59
+ XML
60
+ end
61
+
62
+ it "should parse attributes" do
63
+ parse_and_verify %q(<document attr1="value" attr2='value2'/>)
64
+ end
65
+
66
+ it "should handle namespaces in the same way as the base parser (which is to be ignorant of them)" do
67
+ parse_and_verify %q(<d:document xmlns:d="urn:example.com" d:attr="value"/>)
68
+ end
69
+
70
+ it "should handle the xml processing instruction" do
71
+ parse_and_verify <<-XML
72
+ <?xml version="1.0" encoding="utf-8"?>
73
+ <document/>
74
+ XML
75
+ end
76
+
77
+ it "should handle CDATA" do
78
+ parse_and_verify %q(<document><![CDATA[some cdata]]></document>)
79
+ end
80
+
81
+ it "should handle comments" do
82
+ parse_and_verify %q(<document><!-- some comment --></document>)
83
+ end
84
+
85
+ it "should handle processing instructions" do
86
+ parse_and_verify %q(<?xml version="1.0"?>
87
+ <?xml-stylesheet href="hello-page-html.xsl" type="text/xsl"?><document/>)
88
+ end
89
+
90
+ it "should handle simple entity refs" do
91
+ parse_and_verify %q(<document>text &lt; other &gt;&#x20;text</document>)
92
+ end
93
+
94
+ it "should handle a longer, more complex document (50+K atom feed)" do
95
+ File.open(File.dirname(__FILE__) + "/atom_feed.xml") do |f|
96
+ parse_and_verify f.read
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,8 @@
1
+ $LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
2
+ require 'rexml/parsers/baseparser'
3
+ require 'jrexml'
4
+
5
+ Spec::Runner.configure do |config|
6
+ config.before :all do
7
+ end
8
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: jrexml
5
+ version: !ruby/object:Gem::Version
6
+ version: "0.5"
7
+ date: 2007-08-07 00:00:00 -07:00
8
+ summary: JREXML speeds up REXML under JRuby by using a Java pull parser.
9
+ require_paths:
10
+ - lib
11
+ email: nick@nicksieger.com
12
+ homepage: http://caldersphere.rubyforge.org/jrexml
13
+ rubyforge_project: caldersphere
14
+ description: JREXML is an add-on for JRuby that uses a Java pull parser library to speed up REXML.
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Nick Sieger
31
+ files:
32
+ - History.txt
33
+ - Manifest.txt
34
+ - README.txt
35
+ - LICENSE.txt
36
+ - Rakefile
37
+ - lib/jrexml/java_pull_parser.rb
38
+ - lib/jrexml.rb
39
+ - lib/xpp3-1.1.4.jar
40
+ - lib/xpp3.LICENSE.txt
41
+ - spec/java_pull_parser_spec.rb
42
+ - spec/spec_helper.rb
43
+ test_files:
44
+ - spec/java_pull_parser_spec.rb
45
+ rdoc_options:
46
+ - --main
47
+ - README.txt
48
+ extra_rdoc_files:
49
+ - History.txt
50
+ - Manifest.txt
51
+ - README.txt
52
+ - LICENSE.txt
53
+ - lib/xpp3.LICENSE.txt
54
+ executables: []
55
+
56
+ extensions: []
57
+
58
+ requirements: []
59
+
60
+ dependencies: []
61
+