hpricot 0.6-jruby

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/CHANGELOG +62 -0
  2. data/COPYING +18 -0
  3. data/README +284 -0
  4. data/Rakefile +211 -0
  5. data/ext/hpricot_scan/HpricotScanService.java +1340 -0
  6. data/ext/hpricot_scan/extconf.rb +6 -0
  7. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  8. data/ext/hpricot_scan/hpricot_scan.c +5976 -0
  9. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  10. data/ext/hpricot_scan/hpricot_scan.java.rl +363 -0
  11. data/ext/hpricot_scan/hpricot_scan.rl +273 -0
  12. data/extras/mingw-rbconfig.rb +176 -0
  13. data/lib/hpricot.rb +26 -0
  14. data/lib/hpricot/blankslate.rb +63 -0
  15. data/lib/hpricot/builder.rb +200 -0
  16. data/lib/hpricot/elements.rb +510 -0
  17. data/lib/hpricot/htmlinfo.rb +672 -0
  18. data/lib/hpricot/inspect.rb +107 -0
  19. data/lib/hpricot/modules.rb +37 -0
  20. data/lib/hpricot/parse.rb +297 -0
  21. data/lib/hpricot/tag.rb +228 -0
  22. data/lib/hpricot/tags.rb +164 -0
  23. data/lib/hpricot/traverse.rb +821 -0
  24. data/lib/hpricot/xchar.rb +94 -0
  25. data/lib/i686-linux/hpricot_scan.jar +0 -0
  26. data/test/files/basic.xhtml +17 -0
  27. data/test/files/boingboing.html +2266 -0
  28. data/test/files/cy0.html +3653 -0
  29. data/test/files/immob.html +400 -0
  30. data/test/files/pace_application.html +1320 -0
  31. data/test/files/tenderlove.html +16 -0
  32. data/test/files/uswebgen.html +220 -0
  33. data/test/files/utf8.html +1054 -0
  34. data/test/files/week9.html +1723 -0
  35. data/test/files/why.xml +19 -0
  36. data/test/load_files.rb +7 -0
  37. data/test/test_alter.rb +65 -0
  38. data/test/test_builder.rb +24 -0
  39. data/test/test_parser.rb +379 -0
  40. data/test/test_paths.rb +16 -0
  41. data/test/test_preserved.rb +66 -0
  42. data/test/test_xml.rb +28 -0
  43. metadata +98 -0
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ def test_roundtrip
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
11
+ ele = @basic.at(css_sel)
12
+ assert_equal ele, @basic.at(ele.css_path)
13
+ assert_equal ele, @basic.at(ele.xpath)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestPreserved < Test::Unit::TestCase
8
+ def assert_roundtrip str
9
+ doc = Hpricot(str)
10
+ yield doc if block_given?
11
+ str2 = doc.to_original_html
12
+ [*str].zip([*str2]).each do |s1, s2|
13
+ assert_equal s1, s2
14
+ end
15
+ end
16
+
17
+ def assert_html str1, str2
18
+ doc = Hpricot(str2)
19
+ yield doc if block_given?
20
+ assert_equal str1, doc.to_original_html
21
+ end
22
+
23
+ def test_simple
24
+ str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
25
+ assert_html str, str
26
+ assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
27
+ (doc/:p).set('class', 'new')
28
+ end
29
+ end
30
+
31
+ def test_parent
32
+ str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
33
+ assert_html str, str
34
+ assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
35
+ (doc/:head).remove
36
+ (doc/:div).set('id', 'all')
37
+ (doc/:p).wrap('<div></div>')
38
+ end
39
+ end
40
+
41
+ def test_escaping_of_contents
42
+ doc = Hpricot(TestFiles::BOINGBOING)
43
+ assert_equal "Fukuda\342\200\231s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
44
+ end
45
+
46
+ def test_files
47
+ assert_roundtrip TestFiles::BASIC
48
+ assert_roundtrip TestFiles::BOINGBOING
49
+ assert_roundtrip TestFiles::CY0
50
+ end
51
+
52
+ def test_escaping_of_attrs
53
+ # ampersands in URLs
54
+ str = %{<a href="http://google.com/search?q=hpricot&amp;l=en">Google</a>}
55
+ link = (doc = Hpricot(str)).at(:a)
56
+ assert_equal "http://google.com/search?q=hpricot&l=en", link['href']
57
+ assert_equal "http://google.com/search?q=hpricot&l=en", link.attributes['href']
58
+ assert_equal "http://google.com/search?q=hpricot&l=en", link.get_attribute('href')
59
+ assert_equal "http://google.com/search?q=hpricot&amp;l=en", link.raw_attributes['href']
60
+ assert_equal str, doc.to_html
61
+
62
+ # alter the url
63
+ link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
64
+ assert_equal %{<a href="javascript:alert(&quot;AGGA-KA-BOO!&quot;)">Google</a>}, doc.to_html
65
+ end
66
+ end
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ # normally, the link tags are empty HTML tags.
9
+ # contributed by laudney.
10
+ def test_normally_empty
11
+ doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
12
+ assert_equal "this is title", (doc/:rss/:channel/:title).text
13
+ assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
14
+ end
15
+
16
+ # make sure XML doesn't get downcased
17
+ def test_casing
18
+ doc = Hpricot::XML(TestFiles::WHY)
19
+ assert_equal "hourly", (doc.at "sy:updatePeriod").inner_html
20
+ assert_equal 1, (doc/"guid[@isPermaLink]").length
21
+ end
22
+
23
+ # be sure tags named "text" are ok
24
+ def test_text_tags
25
+ doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
26
+ assert_equal "City Poisoned", (doc/"title").text
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.0
3
+ specification_version: 1
4
+ name: hpricot
5
+ version: !ruby/object:Gem::Version
6
+ version: "0.6"
7
+ date: 2007-06-15 00:00:00 -07:00
8
+ summary: a swift, liberal HTML parser with a fantastic library
9
+ require_paths:
10
+ - lib/i686-linux
11
+ - lib
12
+ email: why@ruby-lang.org
13
+ homepage: http://code.whytheluckystiff.net/hpricot/
14
+ rubyforge_project:
15
+ description: a swift, liberal HTML parser with a fantastic library
16
+ autorequire:
17
+ default_executable:
18
+ bindir: bin
19
+ has_rdoc: true
20
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
21
+ requirements:
22
+ - - ">"
23
+ - !ruby/object:Gem::Version
24
+ version: 0.0.0
25
+ version:
26
+ platform: jruby
27
+ signing_key:
28
+ cert_chain:
29
+ post_install_message:
30
+ authors:
31
+ - why the lucky stiff
32
+ files:
33
+ - CHANGELOG
34
+ - COPYING
35
+ - README
36
+ - Rakefile
37
+ - test/files
38
+ - test/test_preserved.rb
39
+ - test/test_paths.rb
40
+ - test/load_files.rb
41
+ - test/test_xml.rb
42
+ - test/test_parser.rb
43
+ - test/test_alter.rb
44
+ - test/test_builder.rb
45
+ - test/files/why.xml
46
+ - test/files/boingboing.html
47
+ - test/files/uswebgen.html
48
+ - test/files/immob.html
49
+ - test/files/week9.html
50
+ - test/files/utf8.html
51
+ - test/files/basic.xhtml
52
+ - test/files/cy0.html
53
+ - test/files/tenderlove.html
54
+ - test/files/pace_application.html
55
+ - lib/hpricot
56
+ - lib/hpricot.rb
57
+ - lib/i686-linux
58
+ - lib/hpricot/builder.rb
59
+ - lib/hpricot/htmlinfo.rb
60
+ - lib/hpricot/xchar.rb
61
+ - lib/hpricot/inspect.rb
62
+ - lib/hpricot/modules.rb
63
+ - lib/hpricot/parse.rb
64
+ - lib/hpricot/tag.rb
65
+ - lib/hpricot/traverse.rb
66
+ - lib/hpricot/elements.rb
67
+ - lib/hpricot/tags.rb
68
+ - lib/hpricot/blankslate.rb
69
+ - extras/mingw-rbconfig.rb
70
+ - ext/hpricot_scan/hpricot_scan.h
71
+ - ext/hpricot_scan/HpricotScanService.java
72
+ - ext/hpricot_scan/hpricot_scan.c
73
+ - ext/hpricot_scan/extconf.rb
74
+ - ext/hpricot_scan/hpricot_common.rl
75
+ - ext/hpricot_scan/hpricot_scan.rl
76
+ - ext/hpricot_scan/hpricot_scan.java.rl
77
+ - lib/i686-linux/hpricot_scan.jar
78
+ test_files: []
79
+
80
+ rdoc_options:
81
+ - --quiet
82
+ - --title
83
+ - The Hpricot Reference
84
+ - --main
85
+ - README
86
+ - --inline-source
87
+ extra_rdoc_files:
88
+ - README
89
+ - CHANGELOG
90
+ - COPYING
91
+ executables: []
92
+
93
+ extensions: []
94
+
95
+ requirements: []
96
+
97
+ dependencies: []
98
+