hpricot 0.8.3-i386-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. data/CHANGELOG +104 -0
  2. data/COPYING +18 -0
  3. data/README.md +276 -0
  4. data/Rakefile +234 -0
  5. data/ext/fast_xs/FastXsService.java +1123 -0
  6. data/ext/fast_xs/extconf.rb +4 -0
  7. data/ext/fast_xs/fast_xs.c +210 -0
  8. data/ext/hpricot_scan/HpricotCss.java +850 -0
  9. data/ext/hpricot_scan/HpricotScanService.java +2099 -0
  10. data/ext/hpricot_scan/extconf.rb +9 -0
  11. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  12. data/ext/hpricot_scan/hpricot_css.c +3511 -0
  13. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  14. data/ext/hpricot_scan/hpricot_css.rl +120 -0
  15. data/ext/hpricot_scan/hpricot_scan.c +7039 -0
  16. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  17. data/ext/hpricot_scan/hpricot_scan.java.rl +1161 -0
  18. data/ext/hpricot_scan/hpricot_scan.rl +896 -0
  19. data/extras/hpricot.png +0 -0
  20. data/lib/fast_xs.rb +1 -0
  21. data/lib/fast_xs/1.8/fast_xs.so +0 -0
  22. data/lib/fast_xs/1.9/fast_xs.so +0 -0
  23. data/lib/hpricot.rb +26 -0
  24. data/lib/hpricot/blankslate.rb +63 -0
  25. data/lib/hpricot/builder.rb +216 -0
  26. data/lib/hpricot/elements.rb +514 -0
  27. data/lib/hpricot/htmlinfo.rb +691 -0
  28. data/lib/hpricot/inspect.rb +103 -0
  29. data/lib/hpricot/modules.rb +40 -0
  30. data/lib/hpricot/parse.rb +38 -0
  31. data/lib/hpricot/tag.rb +219 -0
  32. data/lib/hpricot/tags.rb +164 -0
  33. data/lib/hpricot/traverse.rb +839 -0
  34. data/lib/hpricot/xchar.rb +94 -0
  35. data/lib/hpricot_scan.rb +1 -0
  36. data/lib/hpricot_scan/1.8/hpricot_scan.so +0 -0
  37. data/lib/hpricot_scan/1.9/hpricot_scan.so +0 -0
  38. data/test/files/basic.xhtml +17 -0
  39. data/test/files/boingboing.html +2266 -0
  40. data/test/files/cy0.html +3653 -0
  41. data/test/files/immob.html +400 -0
  42. data/test/files/pace_application.html +1320 -0
  43. data/test/files/tenderlove.html +16 -0
  44. data/test/files/uswebgen.html +220 -0
  45. data/test/files/utf8.html +1054 -0
  46. data/test/files/week9.html +1723 -0
  47. data/test/files/why.xml +19 -0
  48. data/test/load_files.rb +7 -0
  49. data/test/nokogiri-bench.rb +64 -0
  50. data/test/test_alter.rb +96 -0
  51. data/test/test_builder.rb +37 -0
  52. data/test/test_parser.rb +457 -0
  53. data/test/test_paths.rb +25 -0
  54. data/test/test_preserved.rb +88 -0
  55. data/test/test_xml.rb +28 -0
  56. metadata +128 -0
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ def test_roundtrip
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
11
+ ele = @basic.at(css_sel)
12
+ assert_equal ele, @basic.at(ele.css_path)
13
+ assert_equal ele, @basic.at(ele.xpath)
14
+ end
15
+ end
16
+ def test_attr_brackets
17
+ doc = Hpricot('<input name="vendor[porkpies]"/>')
18
+ assert_equal 1, (doc/'input[@name^="vendor[porkpies]"]').length
19
+ assert_equal 1, (doc/'input[@name="vendor[porkpies]"]').length
20
+ assert_equal 0, (doc/'input[@name$="]]]]]"]').length
21
+
22
+ doc = Hpricot('<input name="vendor[porkpies][meaty]"/>')
23
+ assert_equal 1, (doc/'input[@name^="vendor[porkpies][meaty]"]').length
24
+ end
25
+ end
@@ -0,0 +1,88 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+ require 'load_files'
7
+
8
+ unless "".respond_to?(:lines)
9
+ require 'enumerator'
10
+ class String
11
+ def lines
12
+ Enumerable::Enumerator.new(self, :each_line)
13
+ end
14
+ end
15
+ end
16
+
17
+ class TestPreserved < Test::Unit::TestCase
18
+ def assert_roundtrip str
19
+ doc = Hpricot(str)
20
+ yield doc if block_given?
21
+ str2 = doc.to_original_html
22
+ if RUBY_VERSION =~ /^1.9/
23
+ str2.force_encoding('UTF-8')
24
+ end
25
+ str.lines.zip(str2.lines).each do |s1, s2|
26
+ assert_equal s1, s2
27
+ end
28
+ end
29
+
30
+ def assert_html str1, str2
31
+ doc = Hpricot(str2)
32
+ yield doc if block_given?
33
+ assert_equal str1, doc.to_original_html
34
+ end
35
+
36
+ def test_simple
37
+ str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
38
+ assert_html str, str
39
+ assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
40
+ (doc/:p).set('class', 'new')
41
+ end
42
+ end
43
+
44
+ def test_parent
45
+ str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
46
+ assert_html str, str
47
+ assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
48
+ (doc/:head).remove
49
+ (doc/:div).set('id', 'all')
50
+ (doc/:p).wrap('<div></div>')
51
+ end
52
+ end
53
+
54
+ def test_escaping_of_contents
55
+ doc = Hpricot(TestFiles::BOINGBOING)
56
+ assert_equal "Fukuda’s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
57
+ end
58
+
59
+ def test_files
60
+ assert_roundtrip TestFiles::BASIC
61
+ assert_roundtrip TestFiles::BOINGBOING
62
+ assert_roundtrip TestFiles::CY0
63
+ end
64
+
65
+ def test_fixup_link
66
+ doc = %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
67
+ assert_roundtrip doc
68
+ assert_equal Hpricot(doc).to_s,
69
+ %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link />ht</channel></rss>}
70
+ assert_equal Hpricot.XML(doc).to_s,
71
+ %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
72
+ end
73
+
74
+ def test_escaping_of_attrs
75
+ # ampersands in URLs
76
+ str = %{<a href="http://google.com/search?q=hpricot&amp;l=en">Google</a>}
77
+ link = (doc = Hpricot(str)).at(:a)
78
+ assert_equal "http://google.com/search?q=hpricot&l=en", link['href']
79
+ assert_equal "http://google.com/search?q=hpricot&l=en", link.attributes['href']
80
+ assert_equal "http://google.com/search?q=hpricot&l=en", link.get_attribute('href')
81
+ assert_equal "http://google.com/search?q=hpricot&amp;l=en", link.raw_attributes['href']
82
+ assert_equal str, doc.to_html
83
+
84
+ # alter the url
85
+ link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
86
+ assert_equal %{<a href="javascript:alert(&quot;AGGA-KA-BOO!&quot;)">Google</a>}, doc.to_html
87
+ end
88
+ end
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ # normally, the link tags are empty HTML tags.
9
+ # contributed by laudney.
10
+ def test_normally_empty
11
+ doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
12
+ assert_equal "this is title", (doc/:rss/:channel/:title).text
13
+ assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
14
+ end
15
+
16
+ # make sure XML doesn't get downcased
17
+ def test_casing
18
+ doc = Hpricot::XML(TestFiles::WHY)
19
+ assert_equal "hourly", (doc.at "sy:updatePeriod").inner_html
20
+ assert_equal 1, (doc/"guid[@isPermaLink]").length
21
+ end
22
+
23
+ # be sure tags named "text" are ok
24
+ def test_text_tags
25
+ doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
26
+ assert_equal "City Poisoned", (doc/"title").text
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hpricot
3
+ version: !ruby/object:Gem::Version
4
+ hash: 57
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 8
9
+ - 3
10
+ version: 0.8.3
11
+ platform: i386-mswin32
12
+ authors:
13
+ - why the lucky stiff
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-11-03 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: a swift, liberal HTML parser with a fantastic library
23
+ email: why@ruby-lang.org
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files:
29
+ - README.md
30
+ - CHANGELOG
31
+ - COPYING
32
+ files:
33
+ - CHANGELOG
34
+ - COPYING
35
+ - README.md
36
+ - Rakefile
37
+ - test/files/basic.xhtml
38
+ - test/files/boingboing.html
39
+ - test/files/cy0.html
40
+ - test/files/immob.html
41
+ - test/files/pace_application.html
42
+ - test/files/tenderlove.html
43
+ - test/files/uswebgen.html
44
+ - test/files/utf8.html
45
+ - test/files/week9.html
46
+ - test/files/why.xml
47
+ - test/load_files.rb
48
+ - test/nokogiri-bench.rb
49
+ - test/test_alter.rb
50
+ - test/test_builder.rb
51
+ - test/test_parser.rb
52
+ - test/test_paths.rb
53
+ - test/test_preserved.rb
54
+ - test/test_xml.rb
55
+ - extras/hpricot.png
56
+ - lib/hpricot/blankslate.rb
57
+ - lib/hpricot/builder.rb
58
+ - lib/hpricot/elements.rb
59
+ - lib/hpricot/htmlinfo.rb
60
+ - lib/hpricot/inspect.rb
61
+ - lib/hpricot/modules.rb
62
+ - lib/hpricot/parse.rb
63
+ - lib/hpricot/tag.rb
64
+ - lib/hpricot/tags.rb
65
+ - lib/hpricot/traverse.rb
66
+ - lib/hpricot/xchar.rb
67
+ - lib/hpricot.rb
68
+ - ext/hpricot_scan/hpricot_scan.h
69
+ - ext/fast_xs/FastXsService.java
70
+ - ext/hpricot_scan/HpricotCss.java
71
+ - ext/hpricot_scan/HpricotScanService.java
72
+ - ext/fast_xs/fast_xs.c
73
+ - ext/hpricot_scan/hpricot_css.c
74
+ - ext/hpricot_scan/hpricot_scan.c
75
+ - ext/fast_xs/extconf.rb
76
+ - ext/hpricot_scan/extconf.rb
77
+ - ext/hpricot_scan/hpricot_common.rl
78
+ - ext/hpricot_scan/hpricot_css.java.rl
79
+ - ext/hpricot_scan/hpricot_css.rl
80
+ - ext/hpricot_scan/hpricot_scan.java.rl
81
+ - ext/hpricot_scan/hpricot_scan.rl
82
+ - lib/hpricot_scan.rb
83
+ - lib/hpricot_scan/1.8/hpricot_scan.so
84
+ - lib/hpricot_scan/1.9/hpricot_scan.so
85
+ - lib/fast_xs.rb
86
+ - lib/fast_xs/1.8/fast_xs.so
87
+ - lib/fast_xs/1.9/fast_xs.so
88
+ has_rdoc: true
89
+ homepage: http://code.whytheluckystiff.net/hpricot/
90
+ licenses: []
91
+
92
+ post_install_message:
93
+ rdoc_options:
94
+ - --quiet
95
+ - --title
96
+ - The Hpricot Reference
97
+ - --main
98
+ - README.md
99
+ - --inline-source
100
+ require_paths:
101
+ - lib
102
+ required_ruby_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ hash: 3
108
+ segments:
109
+ - 0
110
+ version: "0"
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
112
+ none: false
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ hash: 3
117
+ segments:
118
+ - 0
119
+ version: "0"
120
+ requirements: []
121
+
122
+ rubyforge_project: hobix
123
+ rubygems_version: 1.3.7
124
+ signing_key:
125
+ specification_version: 3
126
+ summary: a swift, liberal HTML parser with a fantastic library
127
+ test_files: []
128
+