hpricot 0.8.2-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. data/CHANGELOG +88 -0
  2. data/COPYING +18 -0
  3. data/README +275 -0
  4. data/Rakefile +272 -0
  5. data/ext/fast_xs/FastXsService.java +1030 -0
  6. data/ext/fast_xs/extconf.rb +4 -0
  7. data/ext/fast_xs/fast_xs.c +201 -0
  8. data/ext/hpricot_scan/HpricotCss.java +831 -0
  9. data/ext/hpricot_scan/HpricotScanService.java +2086 -0
  10. data/ext/hpricot_scan/extconf.rb +6 -0
  11. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  12. data/ext/hpricot_scan/hpricot_css.c +3503 -0
  13. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  14. data/ext/hpricot_scan/hpricot_css.rl +115 -0
  15. data/ext/hpricot_scan/hpricot_scan.c +6927 -0
  16. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  17. data/ext/hpricot_scan/hpricot_scan.java.rl +1152 -0
  18. data/ext/hpricot_scan/hpricot_scan.rl +788 -0
  19. data/extras/mingw-rbconfig.rb +176 -0
  20. data/lib/fast_xs.jar +0 -0
  21. data/lib/hpricot.rb +26 -0
  22. data/lib/hpricot/blankslate.rb +63 -0
  23. data/lib/hpricot/builder.rb +216 -0
  24. data/lib/hpricot/elements.rb +510 -0
  25. data/lib/hpricot/htmlinfo.rb +691 -0
  26. data/lib/hpricot/inspect.rb +103 -0
  27. data/lib/hpricot/modules.rb +40 -0
  28. data/lib/hpricot/parse.rb +38 -0
  29. data/lib/hpricot/tag.rb +219 -0
  30. data/lib/hpricot/tags.rb +164 -0
  31. data/lib/hpricot/traverse.rb +839 -0
  32. data/lib/hpricot/xchar.rb +94 -0
  33. data/lib/hpricot_scan.jar +0 -0
  34. data/test/files/basic.xhtml +17 -0
  35. data/test/files/boingboing.html +2266 -0
  36. data/test/files/cy0.html +3653 -0
  37. data/test/files/immob.html +400 -0
  38. data/test/files/pace_application.html +1320 -0
  39. data/test/files/tenderlove.html +16 -0
  40. data/test/files/uswebgen.html +220 -0
  41. data/test/files/utf8.html +1054 -0
  42. data/test/files/week9.html +1723 -0
  43. data/test/files/why.xml +19 -0
  44. data/test/load_files.rb +7 -0
  45. data/test/nokogiri-bench.rb +64 -0
  46. data/test/test_alter.rb +96 -0
  47. data/test/test_builder.rb +37 -0
  48. data/test/test_parser.rb +428 -0
  49. data/test/test_paths.rb +25 -0
  50. data/test/test_preserved.rb +88 -0
  51. data/test/test_xml.rb +28 -0
  52. metadata +112 -0
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ def test_roundtrip
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
11
+ ele = @basic.at(css_sel)
12
+ assert_equal ele, @basic.at(ele.css_path)
13
+ assert_equal ele, @basic.at(ele.xpath)
14
+ end
15
+ end
16
+ def test_attr_brackets
17
+ doc = Hpricot('<input name="vendor[porkpies]"/>')
18
+ assert_equal 1, (doc/'input[@name^="vendor[porkpies]"]').length
19
+ assert_equal 1, (doc/'input[@name="vendor[porkpies]"]').length
20
+ assert_equal 0, (doc/'input[@name$="]]]]]"]').length
21
+
22
+ doc = Hpricot('<input name="vendor[porkpies][meaty]"/>')
23
+ assert_equal 1, (doc/'input[@name^="vendor[porkpies][meaty]"]').length
24
+ end
25
+ end
@@ -0,0 +1,88 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+ require 'load_files'
7
+
8
+ unless "".respond_to?(:lines)
9
+ require 'enumerator'
10
+ class String
11
+ def lines
12
+ Enumerable::Enumerator.new(self, :each_line)
13
+ end
14
+ end
15
+ end
16
+
17
+ class TestPreserved < Test::Unit::TestCase
18
+ def assert_roundtrip str
19
+ doc = Hpricot(str)
20
+ yield doc if block_given?
21
+ str2 = doc.to_original_html
22
+ if RUBY_VERSION =~ /^1.9/
23
+ str2.force_encoding('UTF-8')
24
+ end
25
+ str.lines.zip(str2.lines).each do |s1, s2|
26
+ assert_equal s1, s2
27
+ end
28
+ end
29
+
30
+ def assert_html str1, str2
31
+ doc = Hpricot(str2)
32
+ yield doc if block_given?
33
+ assert_equal str1, doc.to_original_html
34
+ end
35
+
36
+ def test_simple
37
+ str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
38
+ assert_html str, str
39
+ assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
40
+ (doc/:p).set('class', 'new')
41
+ end
42
+ end
43
+
44
+ def test_parent
45
+ str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
46
+ assert_html str, str
47
+ assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
48
+ (doc/:head).remove
49
+ (doc/:div).set('id', 'all')
50
+ (doc/:p).wrap('<div></div>')
51
+ end
52
+ end
53
+
54
+ def test_escaping_of_contents
55
+ doc = Hpricot(TestFiles::BOINGBOING)
56
+ assert_equal "Fukuda’s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
57
+ end
58
+
59
+ def test_files
60
+ assert_roundtrip TestFiles::BASIC
61
+ assert_roundtrip TestFiles::BOINGBOING
62
+ assert_roundtrip TestFiles::CY0
63
+ end
64
+
65
+ def test_fixup_link
66
+ doc = %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
67
+ assert_roundtrip doc
68
+ assert_equal Hpricot(doc).to_s,
69
+ %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link />ht</channel></rss>}
70
+ assert_equal Hpricot.XML(doc).to_s,
71
+ %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
72
+ end
73
+
74
+ def test_escaping_of_attrs
75
+ # ampersands in URLs
76
+ str = %{<a href="http://google.com/search?q=hpricot&amp;l=en">Google</a>}
77
+ link = (doc = Hpricot(str)).at(:a)
78
+ assert_equal "http://google.com/search?q=hpricot&l=en", link['href']
79
+ assert_equal "http://google.com/search?q=hpricot&l=en", link.attributes['href']
80
+ assert_equal "http://google.com/search?q=hpricot&l=en", link.get_attribute('href')
81
+ assert_equal "http://google.com/search?q=hpricot&amp;l=en", link.raw_attributes['href']
82
+ assert_equal str, doc.to_html
83
+
84
+ # alter the url
85
+ link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
86
+ assert_equal %{<a href="javascript:alert(&quot;AGGA-KA-BOO!&quot;)">Google</a>}, doc.to_html
87
+ end
88
+ end
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ # normally, the link tags are empty HTML tags.
9
+ # contributed by laudney.
10
+ def test_normally_empty
11
+ doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
12
+ assert_equal "this is title", (doc/:rss/:channel/:title).text
13
+ assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
14
+ end
15
+
16
+ # make sure XML doesn't get downcased
17
+ def test_casing
18
+ doc = Hpricot::XML(TestFiles::WHY)
19
+ assert_equal "hourly", (doc.at "sy:updatePeriod").inner_html
20
+ assert_equal 1, (doc/"guid[@isPermaLink]").length
21
+ end
22
+
23
+ # be sure tags named "text" are ok
24
+ def test_text_tags
25
+ doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
26
+ assert_equal "City Poisoned", (doc/"title").text
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hpricot
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.8.2
5
+ platform: java
6
+ authors:
7
+ - why the lucky stiff
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-05 00:00:00 -06:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: a swift, liberal HTML parser with a fantastic library
17
+ email: why@ruby-lang.org
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README
24
+ - CHANGELOG
25
+ - COPYING
26
+ files:
27
+ - CHANGELOG
28
+ - COPYING
29
+ - README
30
+ - Rakefile
31
+ - test/files/basic.xhtml
32
+ - test/files/boingboing.html
33
+ - test/files/cy0.html
34
+ - test/files/immob.html
35
+ - test/files/pace_application.html
36
+ - test/files/tenderlove.html
37
+ - test/files/uswebgen.html
38
+ - test/files/utf8.html
39
+ - test/files/week9.html
40
+ - test/files/why.xml
41
+ - test/load_files.rb
42
+ - test/nokogiri-bench.rb
43
+ - test/test_alter.rb
44
+ - test/test_builder.rb
45
+ - test/test_parser.rb
46
+ - test/test_paths.rb
47
+ - test/test_preserved.rb
48
+ - test/test_xml.rb
49
+ - lib/hpricot/blankslate.rb
50
+ - lib/hpricot/builder.rb
51
+ - lib/hpricot/elements.rb
52
+ - lib/hpricot/htmlinfo.rb
53
+ - lib/hpricot/inspect.rb
54
+ - lib/hpricot/modules.rb
55
+ - lib/hpricot/parse.rb
56
+ - lib/hpricot/tag.rb
57
+ - lib/hpricot/tags.rb
58
+ - lib/hpricot/traverse.rb
59
+ - lib/hpricot/xchar.rb
60
+ - lib/hpricot.rb
61
+ - extras/mingw-rbconfig.rb
62
+ - ext/hpricot_scan/hpricot_scan.h
63
+ - ext/fast_xs/FastXsService.java
64
+ - ext/hpricot_scan/HpricotCss.java
65
+ - ext/hpricot_scan/HpricotScanService.java
66
+ - ext/fast_xs/fast_xs.c
67
+ - ext/hpricot_scan/hpricot_css.c
68
+ - ext/hpricot_scan/hpricot_scan.c
69
+ - ext/fast_xs/extconf.rb
70
+ - ext/hpricot_scan/extconf.rb
71
+ - ext/hpricot_scan/hpricot_common.rl
72
+ - ext/hpricot_scan/hpricot_css.java.rl
73
+ - ext/hpricot_scan/hpricot_css.rl
74
+ - ext/hpricot_scan/hpricot_scan.java.rl
75
+ - ext/hpricot_scan/hpricot_scan.rl
76
+ - lib/hpricot_scan.jar
77
+ - lib/fast_xs.jar
78
+ has_rdoc: true
79
+ homepage: http://code.whytheluckystiff.net/hpricot/
80
+ licenses: []
81
+
82
+ post_install_message:
83
+ rdoc_options:
84
+ - --quiet
85
+ - --title
86
+ - The Hpricot Reference
87
+ - --main
88
+ - README
89
+ - --inline-source
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: "0"
97
+ version:
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: "0"
103
+ version:
104
+ requirements: []
105
+
106
+ rubyforge_project: hobix
107
+ rubygems_version: 1.3.5
108
+ signing_key:
109
+ specification_version: 3
110
+ summary: a swift, liberal HTML parser with a fantastic library
111
+ test_files: []
112
+