why-hpricot 0.6.210 → 0.7.229
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +13 -0
- data/Rakefile +14 -9
- data/ext/fast_xs/fast_xs.c +2 -1
- data/ext/hpricot_scan/HpricotScanService.java +1122 -342
- data/ext/hpricot_scan/hpricot_css.c +2112 -2116
- data/ext/hpricot_scan/hpricot_scan.c +1169 -923
- data/ext/hpricot_scan/hpricot_scan.java.rl +1078 -299
- data/ext/hpricot_scan/hpricot_scan.rl +327 -237
- data/lib/hpricot/elements.rb +1 -1
- data/lib/hpricot/inspect.rb +2 -2
- data/lib/hpricot/modules.rb +2 -0
- data/lib/hpricot/tag.rb +43 -22
- data/lib/hpricot/traverse.rb +1 -0
- data/test/test_alter.rb +20 -2
- data/test/test_parser.rb +19 -0
- data/test/test_preserved.rb +9 -0
- metadata +6 -6
data/lib/hpricot/elements.rb
CHANGED
@@ -422,7 +422,7 @@ module Hpricot
|
|
422
422
|
case arg
|
423
423
|
when 'even'; (parent.containers.index(self) + 1) % 2 == 0
|
424
424
|
when 'odd'; (parent.containers.index(self) + 1) % 2 == 1
|
425
|
-
else self == (parent.containers[arg.to_i
|
425
|
+
else self == (parent.containers[arg.to_i - 1])
|
426
426
|
end
|
427
427
|
end
|
428
428
|
|
data/lib/hpricot/inspect.rb
CHANGED
@@ -11,7 +11,7 @@ module Hpricot
|
|
11
11
|
|
12
12
|
class Doc
|
13
13
|
def pretty_print(q)
|
14
|
-
q.object_group(self) { children.each {|elt| q.breakable; q.pp elt } }
|
14
|
+
q.object_group(self) { children.each {|elt| q.breakable; q.pp elt } if children }
|
15
15
|
end
|
16
16
|
alias inspect pretty_print_inspect
|
17
17
|
end
|
@@ -47,7 +47,7 @@ module Hpricot
|
|
47
47
|
children.each {|elt| q.breakable; q.pp elt }
|
48
48
|
end
|
49
49
|
if etag
|
50
|
-
q.breakable; q.
|
50
|
+
q.breakable; q.text etag
|
51
51
|
end
|
52
52
|
}
|
53
53
|
end
|
data/lib/hpricot/modules.rb
CHANGED
@@ -8,11 +8,13 @@ module Hpricot
|
|
8
8
|
# :startdoc:
|
9
9
|
|
10
10
|
module Node; include Hpricot end
|
11
|
+
class ETag; include Node end
|
11
12
|
module Container; include Node end
|
12
13
|
class Doc; include Container end
|
13
14
|
class Elem; include Container end
|
14
15
|
|
15
16
|
module Leaf; include Node end
|
17
|
+
class CData; include Leaf end
|
16
18
|
class Text; include Leaf end
|
17
19
|
class XMLDecl; include Leaf end
|
18
20
|
class DocType; include Leaf end
|
data/lib/hpricot/tag.rb
CHANGED
@@ -17,10 +17,11 @@ module Hpricot
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
|
20
|
+
module Node
|
21
21
|
def html_quote(str)
|
22
22
|
"\"" + str.gsub('"', '\\"') + "\""
|
23
23
|
end
|
24
|
+
def clear_raw; end
|
24
25
|
def if_output(opts)
|
25
26
|
if opts[:preserve] and not raw_string.nil?
|
26
27
|
raw_string
|
@@ -37,15 +38,20 @@ module Hpricot
|
|
37
38
|
end
|
38
39
|
end
|
39
40
|
|
40
|
-
class
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
class Attributes
|
42
|
+
attr_accessor :element
|
43
|
+
def initialize e
|
44
|
+
@element = e
|
44
45
|
end
|
45
|
-
def
|
46
|
-
|
47
|
-
|
48
|
-
|
46
|
+
def [] k
|
47
|
+
Hpricot.uxs((@element.raw_attributes || {})[k])
|
48
|
+
end
|
49
|
+
def []= k, v
|
50
|
+
(@element.raw_attributes ||= {})[k] = v.fast_xs
|
51
|
+
end
|
52
|
+
def to_hash
|
53
|
+
if @element.raw_attributes
|
54
|
+
@element.raw_attributes.inject({}) do |hsh, (k, v)|
|
49
55
|
hsh[k] = Hpricot.uxs(v)
|
50
56
|
hsh
|
51
57
|
end
|
@@ -53,6 +59,23 @@ module Hpricot
|
|
53
59
|
{}
|
54
60
|
end
|
55
61
|
end
|
62
|
+
def to_s
|
63
|
+
to_hash.to_s
|
64
|
+
end
|
65
|
+
def inspect
|
66
|
+
to_hash.inspect
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class Elem
|
71
|
+
def initialize tag, attrs = nil, children = nil, etag = nil
|
72
|
+
self.name, self.raw_attributes, self.children, self.etag =
|
73
|
+
tag, attrs, children, etag
|
74
|
+
end
|
75
|
+
def empty?; children.nil? or children.empty? end
|
76
|
+
def attributes
|
77
|
+
Attributes.new self
|
78
|
+
end
|
56
79
|
def to_plain_text
|
57
80
|
if self.name == 'br'
|
58
81
|
"\n"
|
@@ -77,13 +100,10 @@ module Hpricot
|
|
77
100
|
if children
|
78
101
|
children.each { |n| n.output(out, opts) }
|
79
102
|
end
|
80
|
-
if
|
81
|
-
|
82
|
-
elsif
|
83
|
-
out <<
|
84
|
-
if_output(opts) do
|
85
|
-
"</#{name}>"
|
86
|
-
end
|
103
|
+
if opts[:preserve]
|
104
|
+
out << etag if etag
|
105
|
+
elsif etag or !empty?
|
106
|
+
out << "</#{name}>"
|
87
107
|
end
|
88
108
|
out
|
89
109
|
end
|
@@ -101,17 +121,14 @@ module Hpricot
|
|
101
121
|
end
|
102
122
|
end
|
103
123
|
|
104
|
-
class
|
124
|
+
class BogusETag
|
105
125
|
def initialize name; self.name = name end
|
106
126
|
def output(out, opts = {})
|
107
|
-
out <<
|
108
|
-
if_output(opts) do
|
109
|
-
"</#{name}>"
|
110
|
-
end
|
127
|
+
out << if_output(opts) { "" }
|
111
128
|
end
|
112
129
|
end
|
113
130
|
|
114
|
-
class BogusETag
|
131
|
+
class ETag < BogusETag
|
115
132
|
def output(out, opts = {}); out << if_output(opts) { '' }; end
|
116
133
|
end
|
117
134
|
|
@@ -136,6 +153,8 @@ module Hpricot
|
|
136
153
|
def initialize content; self.content = content end
|
137
154
|
alias_method :to_s, :content
|
138
155
|
alias_method :to_plain_text, :content
|
156
|
+
alias_method :inner_text, :content
|
157
|
+
def raw_string; "<![CDATA[#{content}]]>" end
|
139
158
|
def output(out, opts = {})
|
140
159
|
out <<
|
141
160
|
if_output(opts) do
|
@@ -174,6 +193,7 @@ module Hpricot
|
|
174
193
|
|
175
194
|
class ProcIns
|
176
195
|
def pathname; "procins()" end
|
196
|
+
def raw_string; output("") end
|
177
197
|
def output(out, opts = {})
|
178
198
|
out <<
|
179
199
|
if_output(opts) do
|
@@ -186,6 +206,7 @@ module Hpricot
|
|
186
206
|
|
187
207
|
class Comment
|
188
208
|
def pathname; "comment()" end
|
209
|
+
def raw_string; "<!--#{content}-->" end
|
189
210
|
def output(out, opts = {})
|
190
211
|
out <<
|
191
212
|
if_output(opts) do
|
data/lib/hpricot/traverse.rb
CHANGED
data/test/test_alter.rb
CHANGED
@@ -35,14 +35,22 @@ class TestAlter < Test::Unit::TestCase
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def test_change_attributes
|
38
|
-
all_ps = (@basic/"p").attr("title", "Some Title")
|
38
|
+
all_ps = (@basic/"p").attr("title", "Some Title & Etc…")
|
39
39
|
all_as = (@basic/"a").attr("href", "http://my_new_href.com")
|
40
40
|
all_lb = (@basic/"link").attr("href") { |e| e.name }
|
41
|
-
assert_changed(@basic, "p", all_ps) {|p| p.
|
41
|
+
assert_changed(@basic, "p", all_ps) {|p| p.raw_attributes["title"] == "Some Title & Etc…"}
|
42
42
|
assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
|
43
43
|
assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
|
44
44
|
end
|
45
45
|
|
46
|
+
def test_change_attributes2
|
47
|
+
all_as = (@basic%"a").attributes["href"] = "http://my_new_href.com"
|
48
|
+
all_ps = (@basic%"p").attributes["title"] = "Some Title & Etc…"
|
49
|
+
assert_equal (@basic%"a").raw_attributes["href"], "http://my_new_href.com"
|
50
|
+
assert_equal (@basic%"p").raw_attributes["title"], "Some Title & Etc…"
|
51
|
+
assert_equal (@basic%"p").attributes["title"], "Some Title & Etc…"
|
52
|
+
end
|
53
|
+
|
46
54
|
def test_remove_attr
|
47
55
|
all_rl = (@basic/"link").remove_attr("href")
|
48
56
|
assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
|
@@ -70,6 +78,16 @@ class TestAlter < Test::Unit::TestCase
|
|
70
78
|
assert_equal frag.to_s, "<b><i>A bit of HTML</i></b><beanPole>gravity</beanPole>"
|
71
79
|
end
|
72
80
|
|
81
|
+
def test_reparent_empty_nodes
|
82
|
+
doc = Hpricot("<div/>")
|
83
|
+
doc.root.inner_html = "foo"
|
84
|
+
assert_equal doc.root.inner_html, "foo"
|
85
|
+
doc.root.inner_html = ""
|
86
|
+
assert_equal doc.root.inner_html, ""
|
87
|
+
doc.root.swap { b "test" }
|
88
|
+
assert_equal doc.root.inner_html, "test"
|
89
|
+
end
|
90
|
+
|
73
91
|
def assert_changed original, selector, set, &block
|
74
92
|
assert set.all?(&block)
|
75
93
|
assert Hpricot(original.to_html).search(selector).all?(&block)
|
data/test/test_parser.rb
CHANGED
@@ -227,6 +227,14 @@ class TestParser < Test::Unit::TestCase
|
|
227
227
|
assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
|
228
228
|
end
|
229
229
|
|
230
|
+
def test_attr_casing
|
231
|
+
doc = Hpricot("<a HREF='a'>A simple <b>test</b> string.</a>")
|
232
|
+
assert_equal (doc % :a)[:href], "a"
|
233
|
+
assert_equal (doc % :a)[:HREF], nil
|
234
|
+
assert_equal (doc % :a)['href'], "a"
|
235
|
+
assert_equal (doc % :a)['HREF'], nil
|
236
|
+
end
|
237
|
+
|
230
238
|
def test_class_search
|
231
239
|
# test case sent by Chih-Chao Lam
|
232
240
|
doc = Hpricot("<div class=xyz'>abc</div>")
|
@@ -406,4 +414,15 @@ class TestParser < Test::Unit::TestCase
|
|
406
414
|
assert_equal "\303\251", Hpricot.uxs('é')
|
407
415
|
end
|
408
416
|
end
|
417
|
+
|
418
|
+
def test_cdata_inner_text
|
419
|
+
xml = Hpricot.XML(%{
|
420
|
+
<peon>
|
421
|
+
<id>96586</id>
|
422
|
+
<stdout><![CDATA[This is STDOUT]]></stdout>
|
423
|
+
<stderr><!-- IGNORE --><![CDATA[This is]]> STDERR</stderr>
|
424
|
+
</peon>})
|
425
|
+
assert_equal "This is STDOUT", (xml/:peon/:stdout).inner_text
|
426
|
+
assert_equal "This is STDERR", (xml/:peon/:stderr).inner_text
|
427
|
+
end
|
409
428
|
end
|
data/test/test_preserved.rb
CHANGED
@@ -53,6 +53,15 @@ class TestPreserved < Test::Unit::TestCase
|
|
53
53
|
assert_roundtrip TestFiles::CY0
|
54
54
|
end
|
55
55
|
|
56
|
+
def test_fixup_link
|
57
|
+
doc = %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
|
58
|
+
assert_roundtrip doc
|
59
|
+
assert_equal Hpricot(doc).to_s,
|
60
|
+
%{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link />ht</channel></rss>}
|
61
|
+
assert_equal Hpricot.XML(doc).to_s,
|
62
|
+
%{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
|
63
|
+
end
|
64
|
+
|
56
65
|
def test_escaping_of_attrs
|
57
66
|
# ampersands in URLs
|
58
67
|
str = %{<a href="http://google.com/search?q=hpricot&l=en">Google</a>}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: why-hpricot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.229
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- why the lucky stiff
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-03-23 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -29,7 +29,6 @@ files:
|
|
29
29
|
- COPYING
|
30
30
|
- README
|
31
31
|
- Rakefile
|
32
|
-
- test/files
|
33
32
|
- test/files/basic.xhtml
|
34
33
|
- test/files/boingboing.html
|
35
34
|
- test/files/cy0.html
|
@@ -47,7 +46,6 @@ files:
|
|
47
46
|
- test/test_paths.rb
|
48
47
|
- test/test_preserved.rb
|
49
48
|
- test/test_xml.rb
|
50
|
-
- lib/hpricot
|
51
49
|
- lib/hpricot/blankslate.rb
|
52
50
|
- lib/hpricot/builder.rb
|
53
51
|
- lib/hpricot/elements.rb
|
@@ -74,6 +72,8 @@ files:
|
|
74
72
|
- ext/hpricot_scan/hpricot_scan.rl
|
75
73
|
has_rdoc: true
|
76
74
|
homepage: http://code.whytheluckystiff.net/hpricot/
|
75
|
+
licenses: []
|
76
|
+
|
77
77
|
post_install_message:
|
78
78
|
rdoc_options:
|
79
79
|
- --quiet
|
@@ -99,9 +99,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
99
|
requirements: []
|
100
100
|
|
101
101
|
rubyforge_project:
|
102
|
-
rubygems_version: 1.
|
102
|
+
rubygems_version: 1.3.5
|
103
103
|
signing_key:
|
104
|
-
specification_version:
|
104
|
+
specification_version: 3
|
105
105
|
summary: a swift, liberal HTML parser with a fantastic library
|
106
106
|
test_files: []
|
107
107
|
|