hpricot 0.8.3-i386-mswin32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. data/CHANGELOG +104 -0
  2. data/COPYING +18 -0
  3. data/README.md +276 -0
  4. data/Rakefile +234 -0
  5. data/ext/fast_xs/FastXsService.java +1123 -0
  6. data/ext/fast_xs/extconf.rb +4 -0
  7. data/ext/fast_xs/fast_xs.c +210 -0
  8. data/ext/hpricot_scan/HpricotCss.java +850 -0
  9. data/ext/hpricot_scan/HpricotScanService.java +2099 -0
  10. data/ext/hpricot_scan/extconf.rb +9 -0
  11. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  12. data/ext/hpricot_scan/hpricot_css.c +3511 -0
  13. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  14. data/ext/hpricot_scan/hpricot_css.rl +120 -0
  15. data/ext/hpricot_scan/hpricot_scan.c +7039 -0
  16. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  17. data/ext/hpricot_scan/hpricot_scan.java.rl +1161 -0
  18. data/ext/hpricot_scan/hpricot_scan.rl +896 -0
  19. data/extras/hpricot.png +0 -0
  20. data/lib/fast_xs.rb +1 -0
  21. data/lib/fast_xs/1.8/fast_xs.so +0 -0
  22. data/lib/fast_xs/1.9/fast_xs.so +0 -0
  23. data/lib/hpricot.rb +26 -0
  24. data/lib/hpricot/blankslate.rb +63 -0
  25. data/lib/hpricot/builder.rb +216 -0
  26. data/lib/hpricot/elements.rb +514 -0
  27. data/lib/hpricot/htmlinfo.rb +691 -0
  28. data/lib/hpricot/inspect.rb +103 -0
  29. data/lib/hpricot/modules.rb +40 -0
  30. data/lib/hpricot/parse.rb +38 -0
  31. data/lib/hpricot/tag.rb +219 -0
  32. data/lib/hpricot/tags.rb +164 -0
  33. data/lib/hpricot/traverse.rb +839 -0
  34. data/lib/hpricot/xchar.rb +94 -0
  35. data/lib/hpricot_scan.rb +1 -0
  36. data/lib/hpricot_scan/1.8/hpricot_scan.so +0 -0
  37. data/lib/hpricot_scan/1.9/hpricot_scan.so +0 -0
  38. data/test/files/basic.xhtml +17 -0
  39. data/test/files/boingboing.html +2266 -0
  40. data/test/files/cy0.html +3653 -0
  41. data/test/files/immob.html +400 -0
  42. data/test/files/pace_application.html +1320 -0
  43. data/test/files/tenderlove.html +16 -0
  44. data/test/files/uswebgen.html +220 -0
  45. data/test/files/utf8.html +1054 -0
  46. data/test/files/week9.html +1723 -0
  47. data/test/files/why.xml +19 -0
  48. data/test/load_files.rb +7 -0
  49. data/test/nokogiri-bench.rb +64 -0
  50. data/test/test_alter.rb +96 -0
  51. data/test/test_builder.rb +37 -0
  52. data/test/test_parser.rb +457 -0
  53. data/test/test_paths.rb +25 -0
  54. data/test/test_preserved.rb +88 -0
  55. data/test/test_xml.rb +28 -0
  56. metadata +128 -0
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ def test_roundtrip
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
11
+ ele = @basic.at(css_sel)
12
+ assert_equal ele, @basic.at(ele.css_path)
13
+ assert_equal ele, @basic.at(ele.xpath)
14
+ end
15
+ end
16
+ def test_attr_brackets
17
+ doc = Hpricot('<input name="vendor[porkpies]"/>')
18
+ assert_equal 1, (doc/'input[@name^="vendor[porkpies]"]').length
19
+ assert_equal 1, (doc/'input[@name="vendor[porkpies]"]').length
20
+ assert_equal 0, (doc/'input[@name$="]]]]]"]').length
21
+
22
+ doc = Hpricot('<input name="vendor[porkpies][meaty]"/>')
23
+ assert_equal 1, (doc/'input[@name^="vendor[porkpies][meaty]"]').length
24
+ end
25
+ end
@@ -0,0 +1,88 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+ require 'load_files'
7
+
8
+ unless "".respond_to?(:lines)
9
+ require 'enumerator'
10
+ class String
11
+ def lines
12
+ Enumerable::Enumerator.new(self, :each_line)
13
+ end
14
+ end
15
+ end
16
+
17
+ class TestPreserved < Test::Unit::TestCase
18
+ def assert_roundtrip str
19
+ doc = Hpricot(str)
20
+ yield doc if block_given?
21
+ str2 = doc.to_original_html
22
+ if RUBY_VERSION =~ /^1.9/
23
+ str2.force_encoding('UTF-8')
24
+ end
25
+ str.lines.zip(str2.lines).each do |s1, s2|
26
+ assert_equal s1, s2
27
+ end
28
+ end
29
+
30
+ def assert_html str1, str2
31
+ doc = Hpricot(str2)
32
+ yield doc if block_given?
33
+ assert_equal str1, doc.to_original_html
34
+ end
35
+
36
+ def test_simple
37
+ str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
38
+ assert_html str, str
39
+ assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
40
+ (doc/:p).set('class', 'new')
41
+ end
42
+ end
43
+
44
+ def test_parent
45
+ str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
46
+ assert_html str, str
47
+ assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
48
+ (doc/:head).remove
49
+ (doc/:div).set('id', 'all')
50
+ (doc/:p).wrap('<div></div>')
51
+ end
52
+ end
53
+
54
+ def test_escaping_of_contents
55
+ doc = Hpricot(TestFiles::BOINGBOING)
56
+ assert_equal "Fukuda’s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
57
+ end
58
+
59
+ def test_files
60
+ assert_roundtrip TestFiles::BASIC
61
+ assert_roundtrip TestFiles::BOINGBOING
62
+ assert_roundtrip TestFiles::CY0
63
+ end
64
+
65
+ def test_fixup_link
66
+ doc = %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
67
+ assert_roundtrip doc
68
+ assert_equal Hpricot(doc).to_s,
69
+ %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link />ht</channel></rss>}
70
+ assert_equal Hpricot.XML(doc).to_s,
71
+ %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
72
+ end
73
+
74
+ def test_escaping_of_attrs
75
+ # ampersands in URLs
76
+ str = %{<a href="http://google.com/search?q=hpricot&amp;l=en">Google</a>}
77
+ link = (doc = Hpricot(str)).at(:a)
78
+ assert_equal "http://google.com/search?q=hpricot&l=en", link['href']
79
+ assert_equal "http://google.com/search?q=hpricot&l=en", link.attributes['href']
80
+ assert_equal "http://google.com/search?q=hpricot&l=en", link.get_attribute('href')
81
+ assert_equal "http://google.com/search?q=hpricot&amp;l=en", link.raw_attributes['href']
82
+ assert_equal str, doc.to_html
83
+
84
+ # alter the url
85
+ link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
86
+ assert_equal %{<a href="javascript:alert(&quot;AGGA-KA-BOO!&quot;)">Google</a>}, doc.to_html
87
+ end
88
+ end
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ # normally, the link tags are empty HTML tags.
9
+ # contributed by laudney.
10
+ def test_normally_empty
11
+ doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
12
+ assert_equal "this is title", (doc/:rss/:channel/:title).text
13
+ assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
14
+ end
15
+
16
+ # make sure XML doesn't get downcased
17
+ def test_casing
18
+ doc = Hpricot::XML(TestFiles::WHY)
19
+ assert_equal "hourly", (doc.at "sy:updatePeriod").inner_html
20
+ assert_equal 1, (doc/"guid[@isPermaLink]").length
21
+ end
22
+
23
+ # be sure tags named "text" are ok
24
+ def test_text_tags
25
+ doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
26
+ assert_equal "City Poisoned", (doc/"title").text
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hpricot
3
+ version: !ruby/object:Gem::Version
4
+ hash: 57
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 8
9
+ - 3
10
+ version: 0.8.3
11
+ platform: i386-mswin32
12
+ authors:
13
+ - why the lucky stiff
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-11-03 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: a swift, liberal HTML parser with a fantastic library
23
+ email: why@ruby-lang.org
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files:
29
+ - README.md
30
+ - CHANGELOG
31
+ - COPYING
32
+ files:
33
+ - CHANGELOG
34
+ - COPYING
35
+ - README.md
36
+ - Rakefile
37
+ - test/files/basic.xhtml
38
+ - test/files/boingboing.html
39
+ - test/files/cy0.html
40
+ - test/files/immob.html
41
+ - test/files/pace_application.html
42
+ - test/files/tenderlove.html
43
+ - test/files/uswebgen.html
44
+ - test/files/utf8.html
45
+ - test/files/week9.html
46
+ - test/files/why.xml
47
+ - test/load_files.rb
48
+ - test/nokogiri-bench.rb
49
+ - test/test_alter.rb
50
+ - test/test_builder.rb
51
+ - test/test_parser.rb
52
+ - test/test_paths.rb
53
+ - test/test_preserved.rb
54
+ - test/test_xml.rb
55
+ - extras/hpricot.png
56
+ - lib/hpricot/blankslate.rb
57
+ - lib/hpricot/builder.rb
58
+ - lib/hpricot/elements.rb
59
+ - lib/hpricot/htmlinfo.rb
60
+ - lib/hpricot/inspect.rb
61
+ - lib/hpricot/modules.rb
62
+ - lib/hpricot/parse.rb
63
+ - lib/hpricot/tag.rb
64
+ - lib/hpricot/tags.rb
65
+ - lib/hpricot/traverse.rb
66
+ - lib/hpricot/xchar.rb
67
+ - lib/hpricot.rb
68
+ - ext/hpricot_scan/hpricot_scan.h
69
+ - ext/fast_xs/FastXsService.java
70
+ - ext/hpricot_scan/HpricotCss.java
71
+ - ext/hpricot_scan/HpricotScanService.java
72
+ - ext/fast_xs/fast_xs.c
73
+ - ext/hpricot_scan/hpricot_css.c
74
+ - ext/hpricot_scan/hpricot_scan.c
75
+ - ext/fast_xs/extconf.rb
76
+ - ext/hpricot_scan/extconf.rb
77
+ - ext/hpricot_scan/hpricot_common.rl
78
+ - ext/hpricot_scan/hpricot_css.java.rl
79
+ - ext/hpricot_scan/hpricot_css.rl
80
+ - ext/hpricot_scan/hpricot_scan.java.rl
81
+ - ext/hpricot_scan/hpricot_scan.rl
82
+ - lib/hpricot_scan.rb
83
+ - lib/hpricot_scan/1.8/hpricot_scan.so
84
+ - lib/hpricot_scan/1.9/hpricot_scan.so
85
+ - lib/fast_xs.rb
86
+ - lib/fast_xs/1.8/fast_xs.so
87
+ - lib/fast_xs/1.9/fast_xs.so
88
+ has_rdoc: true
89
+ homepage: http://code.whytheluckystiff.net/hpricot/
90
+ licenses: []
91
+
92
+ post_install_message:
93
+ rdoc_options:
94
+ - --quiet
95
+ - --title
96
+ - The Hpricot Reference
97
+ - --main
98
+ - README.md
99
+ - --inline-source
100
+ require_paths:
101
+ - lib
102
+ required_ruby_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ hash: 3
108
+ segments:
109
+ - 0
110
+ version: "0"
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
112
+ none: false
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ hash: 3
117
+ segments:
118
+ - 0
119
+ version: "0"
120
+ requirements: []
121
+
122
+ rubyforge_project: hobix
123
+ rubygems_version: 1.3.7
124
+ signing_key:
125
+ specification_version: 3
126
+ summary: a swift, liberal HTML parser with a fantastic library
127
+ test_files: []
128
+