hpricot 0.6-jruby

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/CHANGELOG +62 -0
  2. data/COPYING +18 -0
  3. data/README +284 -0
  4. data/Rakefile +211 -0
  5. data/ext/hpricot_scan/HpricotScanService.java +1340 -0
  6. data/ext/hpricot_scan/extconf.rb +6 -0
  7. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  8. data/ext/hpricot_scan/hpricot_scan.c +5976 -0
  9. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  10. data/ext/hpricot_scan/hpricot_scan.java.rl +363 -0
  11. data/ext/hpricot_scan/hpricot_scan.rl +273 -0
  12. data/extras/mingw-rbconfig.rb +176 -0
  13. data/lib/hpricot.rb +26 -0
  14. data/lib/hpricot/blankslate.rb +63 -0
  15. data/lib/hpricot/builder.rb +200 -0
  16. data/lib/hpricot/elements.rb +510 -0
  17. data/lib/hpricot/htmlinfo.rb +672 -0
  18. data/lib/hpricot/inspect.rb +107 -0
  19. data/lib/hpricot/modules.rb +37 -0
  20. data/lib/hpricot/parse.rb +297 -0
  21. data/lib/hpricot/tag.rb +228 -0
  22. data/lib/hpricot/tags.rb +164 -0
  23. data/lib/hpricot/traverse.rb +821 -0
  24. data/lib/hpricot/xchar.rb +94 -0
  25. data/lib/i686-linux/hpricot_scan.jar +0 -0
  26. data/test/files/basic.xhtml +17 -0
  27. data/test/files/boingboing.html +2266 -0
  28. data/test/files/cy0.html +3653 -0
  29. data/test/files/immob.html +400 -0
  30. data/test/files/pace_application.html +1320 -0
  31. data/test/files/tenderlove.html +16 -0
  32. data/test/files/uswebgen.html +220 -0
  33. data/test/files/utf8.html +1054 -0
  34. data/test/files/week9.html +1723 -0
  35. data/test/files/why.xml +19 -0
  36. data/test/load_files.rb +7 -0
  37. data/test/test_alter.rb +65 -0
  38. data/test/test_builder.rb +24 -0
  39. data/test/test_parser.rb +379 -0
  40. data/test/test_paths.rb +16 -0
  41. data/test/test_preserved.rb +66 -0
  42. data/test/test_xml.rb +28 -0
  43. metadata +98 -0
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ def test_roundtrip
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
11
+ ele = @basic.at(css_sel)
12
+ assert_equal ele, @basic.at(ele.css_path)
13
+ assert_equal ele, @basic.at(ele.xpath)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestPreserved < Test::Unit::TestCase
8
+ def assert_roundtrip str
9
+ doc = Hpricot(str)
10
+ yield doc if block_given?
11
+ str2 = doc.to_original_html
12
+ [*str].zip([*str2]).each do |s1, s2|
13
+ assert_equal s1, s2
14
+ end
15
+ end
16
+
17
+ def assert_html str1, str2
18
+ doc = Hpricot(str2)
19
+ yield doc if block_given?
20
+ assert_equal str1, doc.to_original_html
21
+ end
22
+
23
+ def test_simple
24
+ str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
25
+ assert_html str, str
26
+ assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
27
+ (doc/:p).set('class', 'new')
28
+ end
29
+ end
30
+
31
+ def test_parent
32
+ str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
33
+ assert_html str, str
34
+ assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
35
+ (doc/:head).remove
36
+ (doc/:div).set('id', 'all')
37
+ (doc/:p).wrap('<div></div>')
38
+ end
39
+ end
40
+
41
+ def test_escaping_of_contents
42
+ doc = Hpricot(TestFiles::BOINGBOING)
43
+ assert_equal "Fukuda\342\200\231s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
44
+ end
45
+
46
+ def test_files
47
+ assert_roundtrip TestFiles::BASIC
48
+ assert_roundtrip TestFiles::BOINGBOING
49
+ assert_roundtrip TestFiles::CY0
50
+ end
51
+
52
+ def test_escaping_of_attrs
53
+ # ampersands in URLs
54
+ str = %{<a href="http://google.com/search?q=hpricot&amp;l=en">Google</a>}
55
+ link = (doc = Hpricot(str)).at(:a)
56
+ assert_equal "http://google.com/search?q=hpricot&l=en", link['href']
57
+ assert_equal "http://google.com/search?q=hpricot&l=en", link.attributes['href']
58
+ assert_equal "http://google.com/search?q=hpricot&l=en", link.get_attribute('href')
59
+ assert_equal "http://google.com/search?q=hpricot&amp;l=en", link.raw_attributes['href']
60
+ assert_equal str, doc.to_html
61
+
62
+ # alter the url
63
+ link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
64
+ assert_equal %{<a href="javascript:alert(&quot;AGGA-KA-BOO!&quot;)">Google</a>}, doc.to_html
65
+ end
66
+ end
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ # normally, the link tags are empty HTML tags.
9
+ # contributed by laudney.
10
+ def test_normally_empty
11
+ doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
12
+ assert_equal "this is title", (doc/:rss/:channel/:title).text
13
+ assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
14
+ end
15
+
16
+ # make sure XML doesn't get downcased
17
+ def test_casing
18
+ doc = Hpricot::XML(TestFiles::WHY)
19
+ assert_equal "hourly", (doc.at "sy:updatePeriod").inner_html
20
+ assert_equal 1, (doc/"guid[@isPermaLink]").length
21
+ end
22
+
23
+ # be sure tags named "text" are ok
24
+ def test_text_tags
25
+ doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
26
+ assert_equal "City Poisoned", (doc/"title").text
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.0
3
+ specification_version: 1
4
+ name: hpricot
5
+ version: !ruby/object:Gem::Version
6
+ version: "0.6"
7
+ date: 2007-06-15 00:00:00 -07:00
8
+ summary: a swift, liberal HTML parser with a fantastic library
9
+ require_paths:
10
+ - lib/i686-linux
11
+ - lib
12
+ email: why@ruby-lang.org
13
+ homepage: http://code.whytheluckystiff.net/hpricot/
14
+ rubyforge_project:
15
+ description: a swift, liberal HTML parser with a fantastic library
16
+ autorequire:
17
+ default_executable:
18
+ bindir: bin
19
+ has_rdoc: true
20
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
21
+ requirements:
22
+ - - ">"
23
+ - !ruby/object:Gem::Version
24
+ version: 0.0.0
25
+ version:
26
+ platform: jruby
27
+ signing_key:
28
+ cert_chain:
29
+ post_install_message:
30
+ authors:
31
+ - why the lucky stiff
32
+ files:
33
+ - CHANGELOG
34
+ - COPYING
35
+ - README
36
+ - Rakefile
37
+ - test/files
38
+ - test/test_preserved.rb
39
+ - test/test_paths.rb
40
+ - test/load_files.rb
41
+ - test/test_xml.rb
42
+ - test/test_parser.rb
43
+ - test/test_alter.rb
44
+ - test/test_builder.rb
45
+ - test/files/why.xml
46
+ - test/files/boingboing.html
47
+ - test/files/uswebgen.html
48
+ - test/files/immob.html
49
+ - test/files/week9.html
50
+ - test/files/utf8.html
51
+ - test/files/basic.xhtml
52
+ - test/files/cy0.html
53
+ - test/files/tenderlove.html
54
+ - test/files/pace_application.html
55
+ - lib/hpricot
56
+ - lib/hpricot.rb
57
+ - lib/i686-linux
58
+ - lib/hpricot/builder.rb
59
+ - lib/hpricot/htmlinfo.rb
60
+ - lib/hpricot/xchar.rb
61
+ - lib/hpricot/inspect.rb
62
+ - lib/hpricot/modules.rb
63
+ - lib/hpricot/parse.rb
64
+ - lib/hpricot/tag.rb
65
+ - lib/hpricot/traverse.rb
66
+ - lib/hpricot/elements.rb
67
+ - lib/hpricot/tags.rb
68
+ - lib/hpricot/blankslate.rb
69
+ - extras/mingw-rbconfig.rb
70
+ - ext/hpricot_scan/hpricot_scan.h
71
+ - ext/hpricot_scan/HpricotScanService.java
72
+ - ext/hpricot_scan/hpricot_scan.c
73
+ - ext/hpricot_scan/extconf.rb
74
+ - ext/hpricot_scan/hpricot_common.rl
75
+ - ext/hpricot_scan/hpricot_scan.rl
76
+ - ext/hpricot_scan/hpricot_scan.java.rl
77
+ - lib/i686-linux/hpricot_scan.jar
78
+ test_files: []
79
+
80
+ rdoc_options:
81
+ - --quiet
82
+ - --title
83
+ - The Hpricot Reference
84
+ - --main
85
+ - README
86
+ - --inline-source
87
+ extra_rdoc_files:
88
+ - README
89
+ - CHANGELOG
90
+ - COPYING
91
+ executables: []
92
+
93
+ extensions: []
94
+
95
+ requirements: []
96
+
97
+ dependencies: []
98
+