martiantim-hpricot 0.8.236

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/CHANGELOG +75 -0
  2. data/COPYING +18 -0
  3. data/README +284 -0
  4. data/Rakefile +260 -0
  5. data/ext/fast_xs/FastXsService.java +1018 -0
  6. data/ext/fast_xs/extconf.rb +4 -0
  7. data/ext/fast_xs/fast_xs.c +201 -0
  8. data/ext/hpricot_scan/HpricotScanService.java +1305 -0
  9. data/ext/hpricot_scan/extconf.rb +6 -0
  10. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  11. data/ext/hpricot_scan/hpricot_css.c +3502 -0
  12. data/ext/hpricot_scan/hpricot_scan.c +6776 -0
  13. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  14. data/ext/hpricot_scan/hpricot_scan.java.rl +373 -0
  15. data/ext/hpricot_scan/hpricot_scan.rl +793 -0
  16. data/extras/mingw-rbconfig.rb +176 -0
  17. data/lib/hpricot.rb +26 -0
  18. data/lib/hpricot/blankslate.rb +63 -0
  19. data/lib/hpricot/builder.rb +216 -0
  20. data/lib/hpricot/elements.rb +510 -0
  21. data/lib/hpricot/htmlinfo.rb +691 -0
  22. data/lib/hpricot/inspect.rb +103 -0
  23. data/lib/hpricot/modules.rb +40 -0
  24. data/lib/hpricot/parse.rb +38 -0
  25. data/lib/hpricot/tag.rb +200 -0
  26. data/lib/hpricot/tags.rb +164 -0
  27. data/lib/hpricot/traverse.rb +838 -0
  28. data/lib/hpricot/xchar.rb +94 -0
  29. data/test/files/basic.xhtml +17 -0
  30. data/test/files/boingboing.html +2266 -0
  31. data/test/files/cy0.html +3653 -0
  32. data/test/files/immob.html +400 -0
  33. data/test/files/pace_application.html +1320 -0
  34. data/test/files/tenderlove.html +16 -0
  35. data/test/files/uswebgen.html +220 -0
  36. data/test/files/utf8.html +1054 -0
  37. data/test/files/week9.html +1723 -0
  38. data/test/files/why.xml +19 -0
  39. data/test/load_files.rb +7 -0
  40. data/test/test_alter.rb +77 -0
  41. data/test/test_builder.rb +37 -0
  42. data/test/test_parser.rb +420 -0
  43. data/test/test_paths.rb +25 -0
  44. data/test/test_preserved.rb +70 -0
  45. data/test/test_xml.rb +28 -0
  46. metadata +107 -0
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ def test_roundtrip
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
11
+ ele = @basic.at(css_sel)
12
+ assert_equal ele, @basic.at(ele.css_path)
13
+ assert_equal ele, @basic.at(ele.xpath)
14
+ end
15
+ end
16
+ def test_attr_brackets
17
+ doc = Hpricot('<input name="vendor[porkpies]"/>')
18
+ assert_equal 1, (doc/'input[@name^="vendor[porkpies]"]').length
19
+ assert_equal 1, (doc/'input[@name="vendor[porkpies]"]').length
20
+ assert_equal 0, (doc/'input[@name$="]]]]]"]').length
21
+
22
+ doc = Hpricot('<input name="vendor[porkpies][meaty]"/>')
23
+ assert_equal 1, (doc/'input[@name^="vendor[porkpies][meaty]"]').length
24
+ end
25
+ end
@@ -0,0 +1,70 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+ require 'load_files'
7
+
8
+ class TestPreserved < Test::Unit::TestCase
9
+ def assert_roundtrip str
10
+ doc = Hpricot(str)
11
+ yield doc if block_given?
12
+ str2 = doc.to_original_html
13
+ if RUBY_VERSION =~ /^1.9/
14
+ str2.force_encoding('UTF-8')
15
+ end
16
+ str.lines.zip(str2.lines).each do |s1, s2|
17
+ assert_equal s1, s2
18
+ end
19
+ end
20
+
21
+ def assert_html str1, str2
22
+ doc = Hpricot(str2)
23
+ yield doc if block_given?
24
+ assert_equal str1, doc.to_original_html
25
+ end
26
+
27
+ def test_simple
28
+ str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
29
+ assert_html str, str
30
+ assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
31
+ (doc/:p).set('class', 'new')
32
+ end
33
+ end
34
+
35
+ def test_parent
36
+ str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
37
+ assert_html str, str
38
+ assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
39
+ (doc/:head).remove
40
+ (doc/:div).set('id', 'all')
41
+ (doc/:p).wrap('<div></div>')
42
+ end
43
+ end
44
+
45
+ def test_escaping_of_contents
46
+ doc = Hpricot(TestFiles::BOINGBOING)
47
+ assert_equal "Fukuda’s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
48
+ end
49
+
50
+ def test_files
51
+ assert_roundtrip TestFiles::BASIC
52
+ assert_roundtrip TestFiles::BOINGBOING
53
+ assert_roundtrip TestFiles::CY0
54
+ end
55
+
56
+ def test_escaping_of_attrs
57
+ # ampersands in URLs
58
+ str = %{<a href="http://google.com/search?q=hpricot&amp;l=en">Google</a>}
59
+ link = (doc = Hpricot(str)).at(:a)
60
+ assert_equal "http://google.com/search?q=hpricot&l=en", link['href']
61
+ assert_equal "http://google.com/search?q=hpricot&l=en", link.attributes['href']
62
+ assert_equal "http://google.com/search?q=hpricot&l=en", link.get_attribute('href')
63
+ assert_equal "http://google.com/search?q=hpricot&amp;l=en", link.raw_attributes['href']
64
+ assert_equal str, doc.to_html
65
+
66
+ # alter the url
67
+ link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
68
+ assert_equal %{<a href="javascript:alert(&quot;AGGA-KA-BOO!&quot;)">Google</a>}, doc.to_html
69
+ end
70
+ end
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ # normally, the link tags are empty HTML tags.
9
+ # contributed by laudney.
10
+ def test_normally_empty
11
+ doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
12
+ assert_equal "this is title", (doc/:rss/:channel/:title).text
13
+ assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
14
+ end
15
+
16
+ # make sure XML doesn't get downcased
17
+ def test_casing
18
+ doc = Hpricot::XML(TestFiles::WHY)
19
+ assert_equal "hourly", (doc.at "sy:updatePeriod").inner_html
20
+ assert_equal 1, (doc/"guid[@isPermaLink]").length
21
+ end
22
+
23
+ # be sure tags named "text" are ok
24
+ def test_text_tags
25
+ doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
26
+ assert_equal "City Poisoned", (doc/"title").text
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,107 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: martiantim-hpricot
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.8.236
5
+ platform: ruby
6
+ authors:
7
+ - why the lucky stiff
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-23 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: a swift, liberal HTML parser with a fantastic library
17
+ email: why@ruby-lang.org
18
+ executables: []
19
+
20
+ extensions:
21
+ - ext/fast_xs/extconf.rb
22
+ - ext/hpricot_scan/extconf.rb
23
+ extra_rdoc_files:
24
+ - README
25
+ - CHANGELOG
26
+ - COPYING
27
+ files:
28
+ - CHANGELOG
29
+ - COPYING
30
+ - README
31
+ - Rakefile
32
+ - test/files
33
+ - test/files/basic.xhtml
34
+ - test/files/boingboing.html
35
+ - test/files/cy0.html
36
+ - test/files/immob.html
37
+ - test/files/pace_application.html
38
+ - test/files/tenderlove.html
39
+ - test/files/uswebgen.html
40
+ - test/files/utf8.html
41
+ - test/files/week9.html
42
+ - test/files/why.xml
43
+ - test/load_files.rb
44
+ - test/test_alter.rb
45
+ - test/test_builder.rb
46
+ - test/test_parser.rb
47
+ - test/test_paths.rb
48
+ - test/test_preserved.rb
49
+ - test/test_xml.rb
50
+ - lib/hpricot
51
+ - lib/hpricot/blankslate.rb
52
+ - lib/hpricot/builder.rb
53
+ - lib/hpricot/elements.rb
54
+ - lib/hpricot/htmlinfo.rb
55
+ - lib/hpricot/inspect.rb
56
+ - lib/hpricot/modules.rb
57
+ - lib/hpricot/parse.rb
58
+ - lib/hpricot/tag.rb
59
+ - lib/hpricot/tags.rb
60
+ - lib/hpricot/traverse.rb
61
+ - lib/hpricot/xchar.rb
62
+ - lib/hpricot.rb
63
+ - extras/mingw-rbconfig.rb
64
+ - ext/hpricot_scan/hpricot_scan.h
65
+ - ext/fast_xs/FastXsService.java
66
+ - ext/hpricot_scan/HpricotScanService.java
67
+ - ext/fast_xs/fast_xs.c
68
+ - ext/hpricot_scan/hpricot_scan.c
69
+ - ext/hpricot_scan/hpricot_css.c
70
+ - ext/fast_xs/extconf.rb
71
+ - ext/hpricot_scan/extconf.rb
72
+ - ext/hpricot_scan/hpricot_common.rl
73
+ - ext/hpricot_scan/hpricot_scan.java.rl
74
+ - ext/hpricot_scan/hpricot_scan.rl
75
+ has_rdoc: true
76
+ homepage: http://code.whytheluckystiff.net/hpricot/
77
+ post_install_message:
78
+ rdoc_options:
79
+ - --quiet
80
+ - --title
81
+ - The Hpricot Reference
82
+ - --main
83
+ - README
84
+ - --inline-source
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: "0"
92
+ version:
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: "0"
98
+ version:
99
+ requirements: []
100
+
101
+ rubyforge_project:
102
+ rubygems_version: 1.2.0
103
+ signing_key:
104
+ specification_version: 2
105
+ summary: a swift, liberal HTML parser with a fantastic library
106
+ test_files: []
107
+