adamh-hpricot 0.6.211 → 0.7.229
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +13 -0
- data/Rakefile +3 -3
- data/ext/fast_xs/fast_xs.c +1 -0
- data/ext/hpricot_scan/hpricot_css.c +107 -103
- data/ext/hpricot_scan/hpricot_scan.c +1159 -923
- data/ext/hpricot_scan/hpricot_scan.rl +312 -237
- data/lib/hpricot/elements.rb +1 -1
- data/lib/hpricot/inspect.rb +2 -2
- data/lib/hpricot/modules.rb +2 -0
- data/lib/hpricot/tag.rb +43 -22
- data/lib/hpricot/traverse.rb +1 -0
- data/test/test_alter.rb +20 -2
- data/test/test_parser.rb +19 -0
- data/test/test_preserved.rb +9 -0
- metadata +2 -2
data/lib/hpricot/elements.rb
CHANGED
@@ -422,7 +422,7 @@ module Hpricot
|
|
422
422
|
case arg
|
423
423
|
when 'even'; (parent.containers.index(self) + 1) % 2 == 0
|
424
424
|
when 'odd'; (parent.containers.index(self) + 1) % 2 == 1
|
425
|
-
else self == (parent.containers[arg.to_i
|
425
|
+
else self == (parent.containers[arg.to_i - 1])
|
426
426
|
end
|
427
427
|
end
|
428
428
|
|
data/lib/hpricot/inspect.rb
CHANGED
@@ -11,7 +11,7 @@ module Hpricot
|
|
11
11
|
|
12
12
|
class Doc
|
13
13
|
def pretty_print(q)
|
14
|
-
q.object_group(self) { children.each {|elt| q.breakable; q.pp elt } }
|
14
|
+
q.object_group(self) { children.each {|elt| q.breakable; q.pp elt } if children }
|
15
15
|
end
|
16
16
|
alias inspect pretty_print_inspect
|
17
17
|
end
|
@@ -47,7 +47,7 @@ module Hpricot
|
|
47
47
|
children.each {|elt| q.breakable; q.pp elt }
|
48
48
|
end
|
49
49
|
if etag
|
50
|
-
q.breakable; q.
|
50
|
+
q.breakable; q.text etag
|
51
51
|
end
|
52
52
|
}
|
53
53
|
end
|
data/lib/hpricot/modules.rb
CHANGED
@@ -8,11 +8,13 @@ module Hpricot
|
|
8
8
|
# :startdoc:
|
9
9
|
|
10
10
|
module Node; include Hpricot end
|
11
|
+
class ETag; include Node end
|
11
12
|
module Container; include Node end
|
12
13
|
class Doc; include Container end
|
13
14
|
class Elem; include Container end
|
14
15
|
|
15
16
|
module Leaf; include Node end
|
17
|
+
class CData; include Leaf end
|
16
18
|
class Text; include Leaf end
|
17
19
|
class XMLDecl; include Leaf end
|
18
20
|
class DocType; include Leaf end
|
data/lib/hpricot/tag.rb
CHANGED
@@ -17,10 +17,11 @@ module Hpricot
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
|
20
|
+
module Node
|
21
21
|
def html_quote(str)
|
22
22
|
"\"" + str.gsub('"', '\\"') + "\""
|
23
23
|
end
|
24
|
+
def clear_raw; end
|
24
25
|
def if_output(opts)
|
25
26
|
if opts[:preserve] and not raw_string.nil?
|
26
27
|
raw_string
|
@@ -37,15 +38,20 @@ module Hpricot
|
|
37
38
|
end
|
38
39
|
end
|
39
40
|
|
40
|
-
class
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
class Attributes
|
42
|
+
attr_accessor :element
|
43
|
+
def initialize e
|
44
|
+
@element = e
|
44
45
|
end
|
45
|
-
def
|
46
|
-
|
47
|
-
|
48
|
-
|
46
|
+
def [] k
|
47
|
+
Hpricot.uxs((@element.raw_attributes || {})[k])
|
48
|
+
end
|
49
|
+
def []= k, v
|
50
|
+
(@element.raw_attributes ||= {})[k] = v.fast_xs
|
51
|
+
end
|
52
|
+
def to_hash
|
53
|
+
if @element.raw_attributes
|
54
|
+
@element.raw_attributes.inject({}) do |hsh, (k, v)|
|
49
55
|
hsh[k] = Hpricot.uxs(v)
|
50
56
|
hsh
|
51
57
|
end
|
@@ -53,6 +59,23 @@ module Hpricot
|
|
53
59
|
{}
|
54
60
|
end
|
55
61
|
end
|
62
|
+
def to_s
|
63
|
+
to_hash.to_s
|
64
|
+
end
|
65
|
+
def inspect
|
66
|
+
to_hash.inspect
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class Elem
|
71
|
+
def initialize tag, attrs = nil, children = nil, etag = nil
|
72
|
+
self.name, self.raw_attributes, self.children, self.etag =
|
73
|
+
tag, attrs, children, etag
|
74
|
+
end
|
75
|
+
def empty?; children.nil? or children.empty? end
|
76
|
+
def attributes
|
77
|
+
Attributes.new self
|
78
|
+
end
|
56
79
|
def to_plain_text
|
57
80
|
if self.name == 'br'
|
58
81
|
"\n"
|
@@ -77,13 +100,10 @@ module Hpricot
|
|
77
100
|
if children
|
78
101
|
children.each { |n| n.output(out, opts) }
|
79
102
|
end
|
80
|
-
if
|
81
|
-
|
82
|
-
elsif
|
83
|
-
out <<
|
84
|
-
if_output(opts) do
|
85
|
-
"</#{name}>"
|
86
|
-
end
|
103
|
+
if opts[:preserve]
|
104
|
+
out << etag if etag
|
105
|
+
elsif etag or !empty?
|
106
|
+
out << "</#{name}>"
|
87
107
|
end
|
88
108
|
out
|
89
109
|
end
|
@@ -101,17 +121,14 @@ module Hpricot
|
|
101
121
|
end
|
102
122
|
end
|
103
123
|
|
104
|
-
class
|
124
|
+
class BogusETag
|
105
125
|
def initialize name; self.name = name end
|
106
126
|
def output(out, opts = {})
|
107
|
-
out <<
|
108
|
-
if_output(opts) do
|
109
|
-
"</#{name}>"
|
110
|
-
end
|
127
|
+
out << if_output(opts) { "" }
|
111
128
|
end
|
112
129
|
end
|
113
130
|
|
114
|
-
class BogusETag
|
131
|
+
class ETag < BogusETag
|
115
132
|
def output(out, opts = {}); out << if_output(opts) { '' }; end
|
116
133
|
end
|
117
134
|
|
@@ -137,6 +154,8 @@ module Hpricot
|
|
137
154
|
def initialize content; self.content = content end
|
138
155
|
alias_method :to_s, :content
|
139
156
|
alias_method :to_plain_text, :content
|
157
|
+
alias_method :inner_text, :content
|
158
|
+
def raw_string; "<![CDATA[#{content}]]>" end
|
140
159
|
def output(out, opts = {})
|
141
160
|
out <<
|
142
161
|
if_output(opts) do
|
@@ -175,6 +194,7 @@ module Hpricot
|
|
175
194
|
|
176
195
|
class ProcIns
|
177
196
|
def pathname; "procins()" end
|
197
|
+
def raw_string; output("") end
|
178
198
|
def output(out, opts = {})
|
179
199
|
out <<
|
180
200
|
if_output(opts) do
|
@@ -187,6 +207,7 @@ module Hpricot
|
|
187
207
|
|
188
208
|
class Comment
|
189
209
|
def pathname; "comment()" end
|
210
|
+
def raw_string; "<!--#{content}-->" end
|
190
211
|
def output(out, opts = {})
|
191
212
|
out <<
|
192
213
|
if_output(opts) do
|
data/lib/hpricot/traverse.rb
CHANGED
data/test/test_alter.rb
CHANGED
@@ -35,14 +35,22 @@ class TestAlter < Test::Unit::TestCase
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def test_change_attributes
|
38
|
-
all_ps = (@basic/"p").attr("title", "Some Title")
|
38
|
+
all_ps = (@basic/"p").attr("title", "Some Title & Etc…")
|
39
39
|
all_as = (@basic/"a").attr("href", "http://my_new_href.com")
|
40
40
|
all_lb = (@basic/"link").attr("href") { |e| e.name }
|
41
|
-
assert_changed(@basic, "p", all_ps) {|p| p.
|
41
|
+
assert_changed(@basic, "p", all_ps) {|p| p.raw_attributes["title"] == "Some Title & Etc…"}
|
42
42
|
assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
|
43
43
|
assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
|
44
44
|
end
|
45
45
|
|
46
|
+
def test_change_attributes2
|
47
|
+
all_as = (@basic%"a").attributes["href"] = "http://my_new_href.com"
|
48
|
+
all_ps = (@basic%"p").attributes["title"] = "Some Title & Etc…"
|
49
|
+
assert_equal (@basic%"a").raw_attributes["href"], "http://my_new_href.com"
|
50
|
+
assert_equal (@basic%"p").raw_attributes["title"], "Some Title & Etc…"
|
51
|
+
assert_equal (@basic%"p").attributes["title"], "Some Title & Etc…"
|
52
|
+
end
|
53
|
+
|
46
54
|
def test_remove_attr
|
47
55
|
all_rl = (@basic/"link").remove_attr("href")
|
48
56
|
assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
|
@@ -70,6 +78,16 @@ class TestAlter < Test::Unit::TestCase
|
|
70
78
|
assert_equal frag.to_s, "<b><i>A bit of HTML</i></b><beanPole>gravity</beanPole>"
|
71
79
|
end
|
72
80
|
|
81
|
+
def test_reparent_empty_nodes
|
82
|
+
doc = Hpricot("<div/>")
|
83
|
+
doc.root.inner_html = "foo"
|
84
|
+
assert_equal doc.root.inner_html, "foo"
|
85
|
+
doc.root.inner_html = ""
|
86
|
+
assert_equal doc.root.inner_html, ""
|
87
|
+
doc.root.swap { b "test" }
|
88
|
+
assert_equal doc.root.inner_html, "test"
|
89
|
+
end
|
90
|
+
|
73
91
|
def assert_changed original, selector, set, &block
|
74
92
|
assert set.all?(&block)
|
75
93
|
assert Hpricot(original.to_html).search(selector).all?(&block)
|
data/test/test_parser.rb
CHANGED
@@ -227,6 +227,14 @@ class TestParser < Test::Unit::TestCase
|
|
227
227
|
assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
|
228
228
|
end
|
229
229
|
|
230
|
+
def test_attr_casing
|
231
|
+
doc = Hpricot("<a HREF='a'>A simple <b>test</b> string.</a>")
|
232
|
+
assert_equal (doc % :a)[:href], "a"
|
233
|
+
assert_equal (doc % :a)[:HREF], nil
|
234
|
+
assert_equal (doc % :a)['href'], "a"
|
235
|
+
assert_equal (doc % :a)['HREF'], nil
|
236
|
+
end
|
237
|
+
|
230
238
|
def test_class_search
|
231
239
|
# test case sent by Chih-Chao Lam
|
232
240
|
doc = Hpricot("<div class=xyz'>abc</div>")
|
@@ -406,4 +414,15 @@ class TestParser < Test::Unit::TestCase
|
|
406
414
|
assert_equal "\303\251", Hpricot.uxs('é')
|
407
415
|
end
|
408
416
|
end
|
417
|
+
|
418
|
+
def test_cdata_inner_text
|
419
|
+
xml = Hpricot.XML(%{
|
420
|
+
<peon>
|
421
|
+
<id>96586</id>
|
422
|
+
<stdout><![CDATA[This is STDOUT]]></stdout>
|
423
|
+
<stderr><!-- IGNORE --><![CDATA[This is]]> STDERR</stderr>
|
424
|
+
</peon>})
|
425
|
+
assert_equal "This is STDOUT", (xml/:peon/:stdout).inner_text
|
426
|
+
assert_equal "This is STDERR", (xml/:peon/:stderr).inner_text
|
427
|
+
end
|
409
428
|
end
|
data/test/test_preserved.rb
CHANGED
@@ -53,6 +53,15 @@ class TestPreserved < Test::Unit::TestCase
|
|
53
53
|
assert_roundtrip TestFiles::CY0
|
54
54
|
end
|
55
55
|
|
56
|
+
def test_fixup_link
|
57
|
+
doc = %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
|
58
|
+
assert_roundtrip doc
|
59
|
+
assert_equal Hpricot(doc).to_s,
|
60
|
+
%{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link />ht</channel></rss>}
|
61
|
+
assert_equal Hpricot.XML(doc).to_s,
|
62
|
+
%{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
|
63
|
+
end
|
64
|
+
|
56
65
|
def test_escaping_of_attrs
|
57
66
|
# ampersands in URLs
|
58
67
|
str = %{<a href="http://google.com/search?q=hpricot&l=en">Google</a>}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: adamh-hpricot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.229
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- why the lucky stiff
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-03-23 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|