hpricot 0.6-jruby → 0.6.161-jruby

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,9 @@ import org.jruby.RubyClass;
6
6
  import org.jruby.RubyHash;
7
7
  import org.jruby.RubyModule;
8
8
  import org.jruby.RubyNumeric;
9
+ import org.jruby.RubyObjectAdapter;
9
10
  import org.jruby.RubyString;
11
+ import org.jruby.javasupport.JavaEmbedUtils;
10
12
  import org.jruby.runtime.Block;
11
13
  import org.jruby.runtime.CallbackFactory;
12
14
  import org.jruby.runtime.builtin.IRubyObject;
@@ -15,6 +17,7 @@ import org.jruby.runtime.load.BasicLibraryService;
15
17
 
16
18
  public class HpricotScanService implements BasicLibraryService {
17
19
  public static String NO_WAY_SERIOUSLY="*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
20
+ private static RubyObjectAdapter rubyApi;
18
21
 
19
22
  public void ELE(IRubyObject N) {
20
23
  if (tokend > tokstart || text) {
@@ -239,8 +242,8 @@ IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
239
242
  }
240
243
 
241
244
  buffer_size = BUFSIZE;
242
- if (recv.getInstanceVariable("@buffer_size") != null) {
243
- bufsize = recv.getInstanceVariable("@buffer_size");
245
+ if (rubyApi.getInstanceVariable(recv, "@buffer_size") != null) {
246
+ bufsize = rubyApi.getInstanceVariable(recv, "@buffer_size");
244
247
  if (!bufsize.isNil()) {
245
248
  buffer_size = RubyNumeric.fix2int(bufsize);
246
249
  }
@@ -359,5 +362,6 @@ public static void Init_hpricot_scan(Ruby runtime) {
359
362
  CallbackFactory fact = runtime.callbackFactory(HpricotScanService.class);
360
363
  mHpricot.getMetaClass().defineMethod("scan",fact.getSingletonMethod("__hpricot_scan",IRubyObject.class));
361
364
  mHpricot.defineClassUnder("ParseError",runtime.getClass("Exception"),runtime.getClass("Exception").getAllocator());
365
+ rubyApi = JavaEmbedUtils.newObjectAdapter();
362
366
  }
363
367
  }
@@ -8,6 +8,12 @@
8
8
  */
9
9
  #include <ruby.h>
10
10
 
11
+ #ifndef RARRAY_LEN
12
+ #define RARRAY_LEN(arr) RARRAY(arr)->len
13
+ #define RSTRING_LEN(str) RSTRING(str)->len
14
+ #define RSTRING_PTR(str) RSTRING(str)->ptr
15
+ #endif
16
+
11
17
  #define NO_WAY_SERIOUSLY "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!"
12
18
 
13
19
  static VALUE sym_xmldecl, sym_doctype, sym_procins, sym_stag, sym_etag, sym_emptytag, sym_comment,
@@ -169,7 +175,7 @@ VALUE hpricot_scan(VALUE self, VALUE port)
169
175
  /* We've used up the entire buffer storing an already-parsed token
170
176
  * prefix that must be preserved. Likely caused by super-long attributes.
171
177
  * See ticket #13. */
172
- rb_raise(rb_eHpricotParseError, "ran out of buffer space on element <%s>, starting on line %d.", RSTRING(tag)->ptr, curline);
178
+ rb_raise(rb_eHpricotParseError, "ran out of buffer space on element <%s>, starting on line %d.", RSTRING_PTR(tag), curline);
173
179
  }
174
180
 
175
181
  if ( rb_respond_to( port, s_read ) )
@@ -182,8 +188,8 @@ VALUE hpricot_scan(VALUE self, VALUE port)
182
188
  }
183
189
 
184
190
  StringValue(str);
185
- memcpy( p, RSTRING(str)->ptr, RSTRING(str)->len );
186
- len = RSTRING(str)->len;
191
+ memcpy( p, RSTRING_PTR(str), RSTRING_LEN(str) );
192
+ len = RSTRING_LEN(str);
187
193
  nread += len;
188
194
 
189
195
  /* If this is the last buffer, tack on an EOF. */
@@ -199,7 +205,7 @@ VALUE hpricot_scan(VALUE self, VALUE port)
199
205
  free(buf);
200
206
  if ( !NIL_P(tag) )
201
207
  {
202
- rb_raise(rb_eHpricotParseError, "parse error on element <%s>, starting on line %d.\n" NO_WAY_SERIOUSLY, RSTRING(tag)->ptr, curline);
208
+ rb_raise(rb_eHpricotParseError, "parse error on element <%s>, starting on line %d.\n" NO_WAY_SERIOUSLY, RSTRING_PTR(tag), curline);
203
209
  }
204
210
  else
205
211
  {
@@ -1,8 +1,23 @@
1
1
  require 'hpricot/tags'
2
- require 'hpricot/xchar'
2
+ require 'fast_xs'
3
3
  require 'hpricot/blankslate'
4
4
 
5
5
  module Hpricot
6
+ PREDEFINED = {
7
+ 34 => '&quot;', # quotation mark
8
+ 38 => '&amp;', # ampersand
9
+ 60 => '&lt;', # left angle bracket
10
+ 62 => '&gt;' # right angle bracket
11
+ }
12
+ PREDEFINED_U = PREDEFINED.inject({}) { |hsh, (k, v)| hsh[v] = k; hsh }
13
+
14
+ # XML unescape
15
+ def self.uxs(str)
16
+ str.to_s.
17
+ gsub(/\&\w+;/) { |x| (PREDEFINED_U[x] || ??).chr }.
18
+ gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
19
+ end
20
+
6
21
  def self.build(ele = Doc.new, assigns = {}, &blk)
7
22
  ele.extend Builder
8
23
  assigns.each do |k, v|
@@ -32,7 +47,7 @@ module Hpricot
32
47
 
33
48
  # Write a +string+ to the HTML stream, making sure to escape it.
34
49
  def text!(string)
35
- @children << Text.new(Hpricot.xs(string))
50
+ @children << Text.new(string.fast_xs)
36
51
  end
37
52
 
38
53
  # Write a +string+ to the HTML stream without escaping it.
@@ -75,16 +90,16 @@ module Hpricot
75
90
  # turn arguments into children or attributes
76
91
  childs = []
77
92
  attrs = args.grep(Hash)
78
- childs.concat((args - attrs).map do |x|
93
+ childs.concat((args - attrs).flatten.map do |x|
79
94
  if x.respond_to? :to_html
80
95
  Hpricot.make(x.to_html)
81
96
  elsif x
82
- Text.new(Hpricot.xs(x))
97
+ Text.new(x.fast_xs)
83
98
  end
84
99
  end.flatten)
85
100
  attrs = attrs.inject({}) do |hsh, ath|
86
101
  ath.each do |k, v|
87
- hsh[k] = Hpricot.xs(v.to_s) if v
102
+ hsh[k] = v.to_s.fast_xs if v
88
103
  end
89
104
  hsh
90
105
  end
@@ -130,25 +130,25 @@ module Hpricot
130
130
  # Add to the end of the contents inside each element in this list.
131
131
  # Pass in an HTML +str+, which is turned into Hpricot elements.
132
132
  def append(str = nil, &blk)
133
- each { |x| x.html(x.children + Hpricot.make(str, &blk)) }
133
+ each { |x| x.html(x.children + x.make(str, &blk)) }
134
134
  end
135
135
 
136
136
  # Add to the start of the contents inside each element in this list.
137
137
  # Pass in an HTML +str+, which is turned into Hpricot elements.
138
138
  def prepend(str = nil, &blk)
139
- each { |x| x.html(Hpricot.make(str, &blk) + x.children) }
139
+ each { |x| x.html(x.make(str, &blk) + x.children) }
140
140
  end
141
141
 
142
142
  # Add some HTML just previous to each element in this list.
143
143
  # Pass in an HTML +str+, which is turned into Hpricot elements.
144
144
  def before(str = nil, &blk)
145
- each { |x| x.parent.insert_before Hpricot.make(str, &blk), x }
145
+ each { |x| x.parent.insert_before x.make(str, &blk), x }
146
146
  end
147
147
 
148
148
  # Just after each element in this list, add some HTML.
149
149
  # Pass in an HTML +str+, which is turned into Hpricot elements.
150
150
  def after(str = nil, &blk)
151
- each { |x| x.parent.insert_after Hpricot.make(str, &blk), x }
151
+ each { |x| x.parent.insert_after x.make(str, &blk), x }
152
152
  end
153
153
 
154
154
  # Wraps each element in the list inside the element created by HTML +str+.
@@ -161,7 +161,7 @@ module Hpricot
161
161
  # This code wraps every link on the page inside a +div.link+ and a +div.link_inner+ nest.
162
162
  def wrap(str = nil, &blk)
163
163
  each do |x|
164
- wrap = Hpricot.make(str, &blk)
164
+ wrap = x.make(str, &blk)
165
165
  nest = wrap.detect { |w| w.respond_to? :children }
166
166
  unless nest
167
167
  raise Exception, "No wrapping element found."
@@ -261,7 +261,7 @@ module Hpricot
261
261
  self
262
262
  end
263
263
 
264
- ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([^\]'"]*)'?"? *\]!i
264
+ ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([^'"]*)'?"? *\]!i
265
265
  BRACK_RE = %r!(\[) *([^\]]*) *\]+!i
266
266
  FUNC_RE = %r!(:)?([a-zA-Z0-9\*_-]*)\( *[\"']?([^ \)]*?)['\"]? *\)!
267
267
  CUST_RE = %r!(:)([a-zA-Z0-9\*_-]*)()!
@@ -12,13 +12,14 @@ module Hpricot
12
12
  # Hpricot.parse parses <i>input</i> and return a document tree.
13
13
  # represented by Hpricot::Doc.
14
14
  def Hpricot.parse(input = nil, opts = {}, &blk)
15
- Doc.new(make(input, opts, &blk))
15
+ Doc.new(make(input, opts, &blk), opts)
16
16
  end
17
17
 
18
18
  # Hpricot::XML parses <i>input</i>, disregarding all the HTML rules
19
19
  # and returning a document tree.
20
- def Hpricot.XML(input, opts = {})
21
- Doc.new(make(input, opts.merge(:xml => true)))
20
+ def Hpricot.XML(input = nil, opts = {}, &blk)
21
+ opts.merge! :xml => true
22
+ Doc.new(make(input, opts, &blk), opts)
22
23
  end
23
24
 
24
25
  # :stopdoc:
@@ -3,8 +3,9 @@ module Hpricot
3
3
 
4
4
  class Doc
5
5
  attr_accessor :children
6
- def initialize(children = [])
6
+ def initialize(children = [], options = {})
7
7
  @children = children ? children.each { |c| c.parent = self } : []
8
+ @options = options
8
9
  end
9
10
  def output(out, opts = {})
10
11
  @children.each do |n|
@@ -12,6 +13,9 @@ module Hpricot
12
13
  end
13
14
  out
14
15
  end
16
+ def make(input = nil, &blk)
17
+ Hpricot.make(input, @options, &blk)
18
+ end
15
19
  def altered!; end
16
20
  end
17
21
 
@@ -100,7 +104,7 @@ module Hpricot
100
104
  if @raw_attributes
101
105
  @raw_attributes.map do |aname, aval|
102
106
  " #{aname}" +
103
- (aval ? "=\"#{aval}\"" : "")
107
+ (aval ? "=#{html_quote aval}" : "")
104
108
  end.join
105
109
  end
106
110
  end
@@ -20,6 +20,16 @@ module Hpricot
20
20
  # Is this object a stranded end tag?
21
21
  def bogusetag?() BogusETag::Trav === self end
22
22
 
23
+ # Parses an HTML string, making an HTML fragment based on
24
+ # the options used to create the container document.
25
+ def make(input = nil, &blk)
26
+ if parent and parent.respond_to? :make
27
+ parent.make(input, &blk)
28
+ else
29
+ Hpricot.make(input, &blk)
30
+ end
31
+ end
32
+
23
33
  # Builds an HTML string from this node and its contents.
24
34
  # If you need to write to a stream, try calling <tt>output(io)</tt>
25
35
  # as a method on this object.
@@ -109,12 +119,12 @@ module Hpricot
109
119
 
110
120
  # Adds elements immediately after this element, contained in the +html+ string.
111
121
  def after(html = nil, &blk)
112
- parent.insert_after(Hpricot.make(html, &blk), self)
122
+ parent.insert_after(make(html, &blk), self)
113
123
  end
114
124
 
115
125
  # Adds elements immediately before this element, contained in the +html+ string.
116
126
  def before(html = nil, &blk)
117
- parent.insert_before(Hpricot.make(html, &blk), self)
127
+ parent.insert_before(make(html, &blk), self)
118
128
  end
119
129
 
120
130
 
@@ -122,7 +132,7 @@ module Hpricot
122
132
  # in the +html+ string.
123
133
  def swap(html = nil, &blk)
124
134
  parent.altered!
125
- parent.replace_child(self, Hpricot.make(html, &blk))
135
+ parent.replace_child(self, make(html, &blk))
126
136
  end
127
137
 
128
138
  def get_subnode(*indexes)
@@ -158,7 +168,7 @@ module Hpricot
158
168
  when Array
159
169
  self.children = inner
160
170
  else
161
- self.children = Hpricot.make(inner, &blk)
171
+ self.children = make(inner, &blk)
162
172
  end
163
173
  reparent self.children
164
174
  else
@@ -513,8 +523,9 @@ module Hpricot
513
523
 
514
524
  def get_elements_by_tag_name(*a)
515
525
  list = Elements[]
526
+ a.delete("*")
516
527
  traverse_element(*a.map { |tag| [tag, "{http://www.w3.org/1999/xhtml}#{tag}"] }.flatten) do |e|
517
- list << e
528
+ list << e if e.elem?
518
529
  end
519
530
  list
520
531
  end
@@ -806,7 +817,7 @@ module Hpricot
806
817
  def set_attribute(name, val)
807
818
  altered!
808
819
  self.raw_attributes ||= {}
809
- self.raw_attributes[name.to_s] = Hpricot.xs(val)
820
+ self.raw_attributes[name.to_s] = val.fast_xs
810
821
  end
811
822
  alias_method :[]=, :set_attribute
812
823
  def remove_attribute(name)
@@ -58,6 +58,18 @@ class TestAlter < Test::Unit::TestCase
58
58
  assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
59
59
  end
60
60
 
61
+ def test_xml_casing
62
+ doc = Hpricot.XML("<root><wildCat>text</wildCat></root>")
63
+ (doc/:root/:wildCat).after("<beanPole>gravity</beanPole>")
64
+ assert_equal doc.to_s, "<root><wildCat>text</wildCat><beanPole>gravity</beanPole></root>"
65
+
66
+ frag = Hpricot.XML do
67
+ b { i "A bit of HTML" }
68
+ end
69
+ (frag/:b).after("<beanPole>gravity</beanPole>")
70
+ assert_equal frag.to_s, "<b><i>A bit of HTML</i></b><beanPole>gravity</beanPole>"
71
+ end
72
+
61
73
  def assert_changed original, selector, set, &block
62
74
  assert set.all?(&block)
63
75
  assert Hpricot(original.to_html).search(selector).all?(&block)
@@ -21,4 +21,17 @@ class TestBuilder < Test::Unit::TestCase
21
21
  assert_equal "<b>&#8364;&#8226;</b>", doc.to_html
22
22
  assert_equal "\342\202\254\342\200\242", doc.at("text()").to_s
23
23
  end
24
+
25
+ def test_escaping_attrs
26
+ text = "<span style='font-family:\"MS Mincho\"'>Some text</span>"
27
+ assert_equal "<span style=\"font-family:\\\"MS Mincho\\\"\">Some text</span>",
28
+ Hpricot(text).to_html
29
+ end
30
+
31
+ def test_korean_utf8_entities
32
+ # a = '한글'
33
+ a = "\xed\x95\x9c\xea\xb8\x80"
34
+ doc = Hpricot() { b a }
35
+ assert_equal "<b>&#54620;&#44544;</b>", doc.to_html
36
+ end
24
37
  end
@@ -47,6 +47,13 @@ class TestParser < Test::Unit::TestCase
47
47
  assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
48
48
  assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
49
49
  end
50
+
51
+ def test_get_elements_by_tag_name_star
52
+ simple = Hpricot.parse("<div><p id='first'>First</p><p id='second'>Second</p></div>")
53
+ assert_equal 3, simple.get_elements_by_tag_name("*").size
54
+ assert_equal 1, simple.get_elements_by_tag_name("div").size
55
+ assert_equal 2, simple.get_elements_by_tag_name("p").size
56
+ end
50
57
 
51
58
  def test_output_basic
52
59
  @basic = Hpricot.parse(TestFiles::BASIC)
@@ -13,4 +13,13 @@ class TestParser < Test::Unit::TestCase
13
13
  assert_equal ele, @basic.at(ele.xpath)
14
14
  end
15
15
  end
16
+ def test_attr_brackets
17
+ doc = Hpricot('<input name="vendor[porkpies]"/>')
18
+ assert_equal 1, (doc/'input[@name^="vendor[porkpies]"]').length
19
+ assert_equal 1, (doc/'input[@name="vendor[porkpies]"]').length
20
+ assert_equal 0, (doc/'input[@name$="]]]]]"]').length
21
+
22
+ doc = Hpricot('<input name="vendor[porkpies][meaty]"/>')
23
+ assert_equal 1, (doc/'input[@name^="vendor[porkpies][meaty]"]').length
24
+ end
16
25
  end
metadata CHANGED
@@ -1,82 +1,62 @@
1
- --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.0
3
- specification_version: 1
4
- name: hpricot
5
- version: !ruby/object:Gem::Version
6
- version: "0.6"
7
- date: 2007-06-15 00:00:00 -07:00
8
- summary: a swift, liberal HTML parser with a fantastic library
9
- require_paths:
10
- - lib/i686-linux
11
- - lib
12
- email: why@ruby-lang.org
1
+ --- !ruby/object:Gem::Specification
2
+ extensions: []
13
3
  homepage: http://code.whytheluckystiff.net/hpricot/
14
- rubyforge_project:
15
- description: a swift, liberal HTML parser with a fantastic library
16
- autorequire:
17
- default_executable:
18
- bindir: bin
19
- has_rdoc: true
20
- required_ruby_version: !ruby/object:Gem::Version::Requirement
21
- requirements:
22
- - - ">"
23
- - !ruby/object:Gem::Version
24
- version: 0.0.0
25
- version:
26
- platform: jruby
27
- signing_key:
28
- cert_chain:
4
+ executables: []
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.6.161
29
7
  post_install_message:
30
- authors:
31
- - why the lucky stiff
8
+ date: 2008-03-19 05:00:00 +00:00
32
9
  files:
33
10
  - CHANGELOG
34
11
  - COPYING
35
12
  - README
36
13
  - Rakefile
37
14
  - test/files
38
- - test/test_preserved.rb
39
- - test/test_paths.rb
40
15
  - test/load_files.rb
41
- - test/test_xml.rb
42
- - test/test_parser.rb
43
16
  - test/test_alter.rb
44
17
  - test/test_builder.rb
45
- - test/files/why.xml
46
- - test/files/boingboing.html
47
- - test/files/uswebgen.html
48
- - test/files/immob.html
49
- - test/files/week9.html
50
- - test/files/utf8.html
18
+ - test/test_parser.rb
19
+ - test/test_paths.rb
20
+ - test/test_preserved.rb
21
+ - test/test_xml.rb
51
22
  - test/files/basic.xhtml
23
+ - test/files/boingboing.html
52
24
  - test/files/cy0.html
53
- - test/files/tenderlove.html
25
+ - test/files/immob.html
54
26
  - test/files/pace_application.html
27
+ - test/files/tenderlove.html
28
+ - test/files/uswebgen.html
29
+ - test/files/utf8.html
30
+ - test/files/week9.html
31
+ - test/files/why.xml
55
32
  - lib/hpricot
56
33
  - lib/hpricot.rb
57
- - lib/i686-linux
34
+ - lib/hpricot/blankslate.rb
58
35
  - lib/hpricot/builder.rb
36
+ - lib/hpricot/elements.rb
59
37
  - lib/hpricot/htmlinfo.rb
60
- - lib/hpricot/xchar.rb
61
38
  - lib/hpricot/inspect.rb
62
39
  - lib/hpricot/modules.rb
63
40
  - lib/hpricot/parse.rb
64
41
  - lib/hpricot/tag.rb
65
- - lib/hpricot/traverse.rb
66
- - lib/hpricot/elements.rb
67
42
  - lib/hpricot/tags.rb
68
- - lib/hpricot/blankslate.rb
43
+ - lib/hpricot/traverse.rb
44
+ - lib/hpricot/xchar.rb
69
45
  - extras/mingw-rbconfig.rb
70
46
  - ext/hpricot_scan/hpricot_scan.h
47
+ - ext/fast_xs/FastXsService.java
48
+ - ext/hpricot_scan/hpricot_scan.java.java
71
49
  - ext/hpricot_scan/HpricotScanService.java
50
+ - ext/fast_xs/fast_xs.c
72
51
  - ext/hpricot_scan/hpricot_scan.c
52
+ - ext/fast_xs/extconf.rb
73
53
  - ext/hpricot_scan/extconf.rb
74
54
  - ext/hpricot_scan/hpricot_common.rl
75
- - ext/hpricot_scan/hpricot_scan.rl
76
55
  - ext/hpricot_scan/hpricot_scan.java.rl
77
- - lib/i686-linux/hpricot_scan.jar
78
- test_files: []
79
-
56
+ - ext/hpricot_scan/hpricot_scan.rl
57
+ - lib/universal-java1.5/hpricot_scan.jar
58
+ - lib/universal-java1.5/fast_xs.jar
59
+ rubygems_version: 1.0.1
80
60
  rdoc_options:
81
61
  - --quiet
82
62
  - --title
@@ -84,15 +64,40 @@ rdoc_options:
84
64
  - --main
85
65
  - README
86
66
  - --inline-source
67
+ signing_key:
68
+ cert_chain: []
69
+ name: hpricot
70
+ has_rdoc: true
71
+ platform: jruby
72
+ summary: a swift, liberal HTML parser with a fantastic library
73
+ default_executable:
74
+ bindir: bin
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ version:
77
+ requirements:
78
+ - - '>='
79
+ - !ruby/object:Gem::Version
80
+ version: !str 0
81
+ required_ruby_version: !ruby/object:Gem::Requirement
82
+ version:
83
+ requirements:
84
+ - - '>='
85
+ - !ruby/object:Gem::Version
86
+ version: !str 0
87
+ require_paths:
88
+ - lib/universal-java1.5
89
+ - lib
90
+ specification_version: 2
91
+ test_files: []
92
+ dependencies: []
93
+ description: a swift, liberal HTML parser with a fantastic library
94
+ email: why@ruby-lang.org
95
+ authors:
96
+ - why the lucky stiff
87
97
  extra_rdoc_files:
88
98
  - README
89
99
  - CHANGELOG
90
100
  - COPYING
91
- executables: []
92
-
93
- extensions: []
94
-
95
101
  requirements: []
96
-
97
- dependencies: []
98
-
102
+ rubyforge_project:
103
+ autorequire: