why-hpricot 0.6.210 → 0.7.229
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +13 -0
- data/Rakefile +14 -9
- data/ext/fast_xs/fast_xs.c +2 -1
- data/ext/hpricot_scan/HpricotScanService.java +1122 -342
- data/ext/hpricot_scan/hpricot_css.c +2112 -2116
- data/ext/hpricot_scan/hpricot_scan.c +1169 -923
- data/ext/hpricot_scan/hpricot_scan.java.rl +1078 -299
- data/ext/hpricot_scan/hpricot_scan.rl +327 -237
- data/lib/hpricot/elements.rb +1 -1
- data/lib/hpricot/inspect.rb +2 -2
- data/lib/hpricot/modules.rb +2 -0
- data/lib/hpricot/tag.rb +43 -22
- data/lib/hpricot/traverse.rb +1 -0
- data/test/test_alter.rb +20 -2
- data/test/test_parser.rb +19 -0
- data/test/test_preserved.rb +9 -0
- metadata +6 -6
data/lib/hpricot/elements.rb
CHANGED
@@ -422,7 +422,7 @@ module Hpricot
|
|
422
422
|
case arg
|
423
423
|
when 'even'; (parent.containers.index(self) + 1) % 2 == 0
|
424
424
|
when 'odd'; (parent.containers.index(self) + 1) % 2 == 1
|
425
|
-
else self == (parent.containers[arg.to_i
|
425
|
+
else self == (parent.containers[arg.to_i - 1])
|
426
426
|
end
|
427
427
|
end
|
428
428
|
|
data/lib/hpricot/inspect.rb
CHANGED
@@ -11,7 +11,7 @@ module Hpricot
|
|
11
11
|
|
12
12
|
class Doc
|
13
13
|
def pretty_print(q)
|
14
|
-
q.object_group(self) { children.each {|elt| q.breakable; q.pp elt } }
|
14
|
+
q.object_group(self) { children.each {|elt| q.breakable; q.pp elt } if children }
|
15
15
|
end
|
16
16
|
alias inspect pretty_print_inspect
|
17
17
|
end
|
@@ -47,7 +47,7 @@ module Hpricot
|
|
47
47
|
children.each {|elt| q.breakable; q.pp elt }
|
48
48
|
end
|
49
49
|
if etag
|
50
|
-
q.breakable; q.
|
50
|
+
q.breakable; q.text etag
|
51
51
|
end
|
52
52
|
}
|
53
53
|
end
|
data/lib/hpricot/modules.rb
CHANGED
@@ -8,11 +8,13 @@ module Hpricot
|
|
8
8
|
# :startdoc:
|
9
9
|
|
10
10
|
module Node; include Hpricot end
|
11
|
+
class ETag; include Node end
|
11
12
|
module Container; include Node end
|
12
13
|
class Doc; include Container end
|
13
14
|
class Elem; include Container end
|
14
15
|
|
15
16
|
module Leaf; include Node end
|
17
|
+
class CData; include Leaf end
|
16
18
|
class Text; include Leaf end
|
17
19
|
class XMLDecl; include Leaf end
|
18
20
|
class DocType; include Leaf end
|
data/lib/hpricot/tag.rb
CHANGED
@@ -17,10 +17,11 @@ module Hpricot
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
|
20
|
+
module Node
|
21
21
|
def html_quote(str)
|
22
22
|
"\"" + str.gsub('"', '\\"') + "\""
|
23
23
|
end
|
24
|
+
def clear_raw; end
|
24
25
|
def if_output(opts)
|
25
26
|
if opts[:preserve] and not raw_string.nil?
|
26
27
|
raw_string
|
@@ -37,15 +38,20 @@ module Hpricot
|
|
37
38
|
end
|
38
39
|
end
|
39
40
|
|
40
|
-
class
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
class Attributes
|
42
|
+
attr_accessor :element
|
43
|
+
def initialize e
|
44
|
+
@element = e
|
44
45
|
end
|
45
|
-
def
|
46
|
-
|
47
|
-
|
48
|
-
|
46
|
+
def [] k
|
47
|
+
Hpricot.uxs((@element.raw_attributes || {})[k])
|
48
|
+
end
|
49
|
+
def []= k, v
|
50
|
+
(@element.raw_attributes ||= {})[k] = v.fast_xs
|
51
|
+
end
|
52
|
+
def to_hash
|
53
|
+
if @element.raw_attributes
|
54
|
+
@element.raw_attributes.inject({}) do |hsh, (k, v)|
|
49
55
|
hsh[k] = Hpricot.uxs(v)
|
50
56
|
hsh
|
51
57
|
end
|
@@ -53,6 +59,23 @@ module Hpricot
|
|
53
59
|
{}
|
54
60
|
end
|
55
61
|
end
|
62
|
+
def to_s
|
63
|
+
to_hash.to_s
|
64
|
+
end
|
65
|
+
def inspect
|
66
|
+
to_hash.inspect
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class Elem
|
71
|
+
def initialize tag, attrs = nil, children = nil, etag = nil
|
72
|
+
self.name, self.raw_attributes, self.children, self.etag =
|
73
|
+
tag, attrs, children, etag
|
74
|
+
end
|
75
|
+
def empty?; children.nil? or children.empty? end
|
76
|
+
def attributes
|
77
|
+
Attributes.new self
|
78
|
+
end
|
56
79
|
def to_plain_text
|
57
80
|
if self.name == 'br'
|
58
81
|
"\n"
|
@@ -77,13 +100,10 @@ module Hpricot
|
|
77
100
|
if children
|
78
101
|
children.each { |n| n.output(out, opts) }
|
79
102
|
end
|
80
|
-
if
|
81
|
-
|
82
|
-
elsif
|
83
|
-
out <<
|
84
|
-
if_output(opts) do
|
85
|
-
"</#{name}>"
|
86
|
-
end
|
103
|
+
if opts[:preserve]
|
104
|
+
out << etag if etag
|
105
|
+
elsif etag or !empty?
|
106
|
+
out << "</#{name}>"
|
87
107
|
end
|
88
108
|
out
|
89
109
|
end
|
@@ -101,17 +121,14 @@ module Hpricot
|
|
101
121
|
end
|
102
122
|
end
|
103
123
|
|
104
|
-
class
|
124
|
+
class BogusETag
|
105
125
|
def initialize name; self.name = name end
|
106
126
|
def output(out, opts = {})
|
107
|
-
out <<
|
108
|
-
if_output(opts) do
|
109
|
-
"</#{name}>"
|
110
|
-
end
|
127
|
+
out << if_output(opts) { "" }
|
111
128
|
end
|
112
129
|
end
|
113
130
|
|
114
|
-
class BogusETag
|
131
|
+
class ETag < BogusETag
|
115
132
|
def output(out, opts = {}); out << if_output(opts) { '' }; end
|
116
133
|
end
|
117
134
|
|
@@ -136,6 +153,8 @@ module Hpricot
|
|
136
153
|
def initialize content; self.content = content end
|
137
154
|
alias_method :to_s, :content
|
138
155
|
alias_method :to_plain_text, :content
|
156
|
+
alias_method :inner_text, :content
|
157
|
+
def raw_string; "<![CDATA[#{content}]]>" end
|
139
158
|
def output(out, opts = {})
|
140
159
|
out <<
|
141
160
|
if_output(opts) do
|
@@ -174,6 +193,7 @@ module Hpricot
|
|
174
193
|
|
175
194
|
class ProcIns
|
176
195
|
def pathname; "procins()" end
|
196
|
+
def raw_string; output("") end
|
177
197
|
def output(out, opts = {})
|
178
198
|
out <<
|
179
199
|
if_output(opts) do
|
@@ -186,6 +206,7 @@ module Hpricot
|
|
186
206
|
|
187
207
|
class Comment
|
188
208
|
def pathname; "comment()" end
|
209
|
+
def raw_string; "<!--#{content}-->" end
|
189
210
|
def output(out, opts = {})
|
190
211
|
out <<
|
191
212
|
if_output(opts) do
|
data/lib/hpricot/traverse.rb
CHANGED
data/test/test_alter.rb
CHANGED
@@ -35,14 +35,22 @@ class TestAlter < Test::Unit::TestCase
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def test_change_attributes
|
38
|
-
all_ps = (@basic/"p").attr("title", "Some Title")
|
38
|
+
all_ps = (@basic/"p").attr("title", "Some Title & Etc…")
|
39
39
|
all_as = (@basic/"a").attr("href", "http://my_new_href.com")
|
40
40
|
all_lb = (@basic/"link").attr("href") { |e| e.name }
|
41
|
-
assert_changed(@basic, "p", all_ps) {|p| p.
|
41
|
+
assert_changed(@basic, "p", all_ps) {|p| p.raw_attributes["title"] == "Some Title & Etc…"}
|
42
42
|
assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
|
43
43
|
assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
|
44
44
|
end
|
45
45
|
|
46
|
+
def test_change_attributes2
|
47
|
+
all_as = (@basic%"a").attributes["href"] = "http://my_new_href.com"
|
48
|
+
all_ps = (@basic%"p").attributes["title"] = "Some Title & Etc…"
|
49
|
+
assert_equal (@basic%"a").raw_attributes["href"], "http://my_new_href.com"
|
50
|
+
assert_equal (@basic%"p").raw_attributes["title"], "Some Title & Etc…"
|
51
|
+
assert_equal (@basic%"p").attributes["title"], "Some Title & Etc…"
|
52
|
+
end
|
53
|
+
|
46
54
|
def test_remove_attr
|
47
55
|
all_rl = (@basic/"link").remove_attr("href")
|
48
56
|
assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
|
@@ -70,6 +78,16 @@ class TestAlter < Test::Unit::TestCase
|
|
70
78
|
assert_equal frag.to_s, "<b><i>A bit of HTML</i></b><beanPole>gravity</beanPole>"
|
71
79
|
end
|
72
80
|
|
81
|
+
def test_reparent_empty_nodes
|
82
|
+
doc = Hpricot("<div/>")
|
83
|
+
doc.root.inner_html = "foo"
|
84
|
+
assert_equal doc.root.inner_html, "foo"
|
85
|
+
doc.root.inner_html = ""
|
86
|
+
assert_equal doc.root.inner_html, ""
|
87
|
+
doc.root.swap { b "test" }
|
88
|
+
assert_equal doc.root.inner_html, "test"
|
89
|
+
end
|
90
|
+
|
73
91
|
def assert_changed original, selector, set, &block
|
74
92
|
assert set.all?(&block)
|
75
93
|
assert Hpricot(original.to_html).search(selector).all?(&block)
|
data/test/test_parser.rb
CHANGED
@@ -227,6 +227,14 @@ class TestParser < Test::Unit::TestCase
|
|
227
227
|
assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
|
228
228
|
end
|
229
229
|
|
230
|
+
def test_attr_casing
|
231
|
+
doc = Hpricot("<a HREF='a'>A simple <b>test</b> string.</a>")
|
232
|
+
assert_equal (doc % :a)[:href], "a"
|
233
|
+
assert_equal (doc % :a)[:HREF], nil
|
234
|
+
assert_equal (doc % :a)['href'], "a"
|
235
|
+
assert_equal (doc % :a)['HREF'], nil
|
236
|
+
end
|
237
|
+
|
230
238
|
def test_class_search
|
231
239
|
# test case sent by Chih-Chao Lam
|
232
240
|
doc = Hpricot("<div class=xyz'>abc</div>")
|
@@ -406,4 +414,15 @@ class TestParser < Test::Unit::TestCase
|
|
406
414
|
assert_equal "\303\251", Hpricot.uxs('é')
|
407
415
|
end
|
408
416
|
end
|
417
|
+
|
418
|
+
def test_cdata_inner_text
|
419
|
+
xml = Hpricot.XML(%{
|
420
|
+
<peon>
|
421
|
+
<id>96586</id>
|
422
|
+
<stdout><![CDATA[This is STDOUT]]></stdout>
|
423
|
+
<stderr><!-- IGNORE --><![CDATA[This is]]> STDERR</stderr>
|
424
|
+
</peon>})
|
425
|
+
assert_equal "This is STDOUT", (xml/:peon/:stdout).inner_text
|
426
|
+
assert_equal "This is STDERR", (xml/:peon/:stderr).inner_text
|
427
|
+
end
|
409
428
|
end
|
data/test/test_preserved.rb
CHANGED
@@ -53,6 +53,15 @@ class TestPreserved < Test::Unit::TestCase
|
|
53
53
|
assert_roundtrip TestFiles::CY0
|
54
54
|
end
|
55
55
|
|
56
|
+
def test_fixup_link
|
57
|
+
doc = %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
|
58
|
+
assert_roundtrip doc
|
59
|
+
assert_equal Hpricot(doc).to_s,
|
60
|
+
%{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link />ht</channel></rss>}
|
61
|
+
assert_equal Hpricot.XML(doc).to_s,
|
62
|
+
%{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
|
63
|
+
end
|
64
|
+
|
56
65
|
def test_escaping_of_attrs
|
57
66
|
# ampersands in URLs
|
58
67
|
str = %{<a href="http://google.com/search?q=hpricot&l=en">Google</a>}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: why-hpricot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.229
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- why the lucky stiff
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-03-23 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -29,7 +29,6 @@ files:
|
|
29
29
|
- COPYING
|
30
30
|
- README
|
31
31
|
- Rakefile
|
32
|
-
- test/files
|
33
32
|
- test/files/basic.xhtml
|
34
33
|
- test/files/boingboing.html
|
35
34
|
- test/files/cy0.html
|
@@ -47,7 +46,6 @@ files:
|
|
47
46
|
- test/test_paths.rb
|
48
47
|
- test/test_preserved.rb
|
49
48
|
- test/test_xml.rb
|
50
|
-
- lib/hpricot
|
51
49
|
- lib/hpricot/blankslate.rb
|
52
50
|
- lib/hpricot/builder.rb
|
53
51
|
- lib/hpricot/elements.rb
|
@@ -74,6 +72,8 @@ files:
|
|
74
72
|
- ext/hpricot_scan/hpricot_scan.rl
|
75
73
|
has_rdoc: true
|
76
74
|
homepage: http://code.whytheluckystiff.net/hpricot/
|
75
|
+
licenses: []
|
76
|
+
|
77
77
|
post_install_message:
|
78
78
|
rdoc_options:
|
79
79
|
- --quiet
|
@@ -99,9 +99,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
99
|
requirements: []
|
100
100
|
|
101
101
|
rubyforge_project:
|
102
|
-
rubygems_version: 1.
|
102
|
+
rubygems_version: 1.3.5
|
103
103
|
signing_key:
|
104
|
-
specification_version:
|
104
|
+
specification_version: 3
|
105
105
|
summary: a swift, liberal HTML parser with a fantastic library
|
106
106
|
test_files: []
|
107
107
|
|