webtranslateit-hpricot 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/CHANGELOG +122 -0
  4. data/COPYING +18 -0
  5. data/README.md +295 -0
  6. data/Rakefile +237 -0
  7. data/ext/fast_xs/FastXsService.java +1123 -0
  8. data/ext/fast_xs/extconf.rb +4 -0
  9. data/ext/fast_xs/fast_xs.c +210 -0
  10. data/ext/hpricot_scan/HpricotCss.java +850 -0
  11. data/ext/hpricot_scan/HpricotScanService.java +2085 -0
  12. data/ext/hpricot_scan/MANIFEST +0 -0
  13. data/ext/hpricot_scan/extconf.rb +9 -0
  14. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  15. data/ext/hpricot_scan/hpricot_css.c +3511 -0
  16. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  17. data/ext/hpricot_scan/hpricot_css.rl +120 -0
  18. data/ext/hpricot_scan/hpricot_scan.c +6848 -0
  19. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  20. data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
  21. data/ext/hpricot_scan/hpricot_scan.rl +911 -0
  22. data/extras/hpricot.png +0 -0
  23. data/hpricot.gemspec +18 -0
  24. data/lib/hpricot/blankslate.rb +63 -0
  25. data/lib/hpricot/builder.rb +217 -0
  26. data/lib/hpricot/elements.rb +514 -0
  27. data/lib/hpricot/htmlinfo.rb +691 -0
  28. data/lib/hpricot/inspect.rb +103 -0
  29. data/lib/hpricot/modules.rb +40 -0
  30. data/lib/hpricot/parse.rb +38 -0
  31. data/lib/hpricot/tag.rb +219 -0
  32. data/lib/hpricot/tags.rb +164 -0
  33. data/lib/hpricot/traverse.rb +839 -0
  34. data/lib/hpricot/xchar.rb +95 -0
  35. data/lib/hpricot.rb +26 -0
  36. data/setup.rb +1585 -0
  37. data/test/files/basic.xhtml +17 -0
  38. data/test/files/boingboing.html +2266 -0
  39. data/test/files/cy0.html +3653 -0
  40. data/test/files/immob.html +400 -0
  41. data/test/files/pace_application.html +1320 -0
  42. data/test/files/tenderlove.html +16 -0
  43. data/test/files/uswebgen.html +220 -0
  44. data/test/files/utf8.html +1054 -0
  45. data/test/files/week9.html +1723 -0
  46. data/test/files/why.xml +19 -0
  47. data/test/load_files.rb +7 -0
  48. data/test/nokogiri-bench.rb +64 -0
  49. data/test/test_alter.rb +96 -0
  50. data/test/test_builder.rb +37 -0
  51. data/test/test_parser.rb +496 -0
  52. data/test/test_paths.rb +25 -0
  53. data/test/test_preserved.rb +88 -0
  54. data/test/test_xml.rb +28 -0
  55. metadata +106 -0
Binary file
data/hpricot.gemspec ADDED
@@ -0,0 +1,18 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = %q{webtranslateit-hpricot}
3
+ s.version = "0.9.0"
4
+
5
+ s.authors = ["why the lucky stiff"]
6
+ s.date = %q{2012-01-17}
7
+ s.description = %q{a swift, liberal HTML parser with a fantastic library}
8
+ s.email = %q{why@ruby-lang.org}
9
+ s.license = "MIT"
10
+ s.extensions = ["ext/fast_xs/extconf.rb", "ext/hpricot_scan/extconf.rb"]
11
+ s.extra_rdoc_files = ["README.md", "CHANGELOG", "COPYING"]
12
+ s.files = %w(.gitignore CHANGELOG COPYING README.md Rakefile ext/fast_xs/FastXsService.java ext/fast_xs/extconf.rb ext/fast_xs/fast_xs.c ext/hpricot_scan/HpricotCss.java ext/hpricot_scan/HpricotScanService.java ext/hpricot_scan/MANIFEST ext/hpricot_scan/extconf.rb ext/hpricot_scan/hpricot_common.rl ext/hpricot_scan/hpricot_css.c ext/hpricot_scan/hpricot_css.java.rl ext/hpricot_scan/hpricot_css.rl ext/hpricot_scan/hpricot_scan.c ext/hpricot_scan/hpricot_scan.h ext/hpricot_scan/hpricot_scan.java.rl ext/hpricot_scan/hpricot_scan.rl extras/hpricot.png hpricot.gemspec lib/hpricot.rb lib/hpricot/blankslate.rb lib/hpricot/builder.rb lib/hpricot/elements.rb lib/hpricot/htmlinfo.rb lib/hpricot/inspect.rb lib/hpricot/modules.rb lib/hpricot/parse.rb lib/hpricot/tag.rb lib/hpricot/tags.rb lib/hpricot/traverse.rb lib/hpricot/xchar.rb setup.rb test/files/basic.xhtml test/files/boingboing.html test/files/cy0.html test/files/immob.html test/files/pace_application.html test/files/tenderlove.html test/files/uswebgen.html test/files/utf8.html test/files/week9.html test/files/why.xml test/load_files.rb test/nokogiri-bench.rb test/test_alter.rb test/test_builder.rb test/test_parser.rb test/test_paths.rb test/test_preserved.rb test/test_xml.rb)
13
+ s.has_rdoc = true
14
+ s.homepage = %q{http://wiki.github.com/hpricot/hpricot}
15
+ s.rdoc_options = ["--quiet", "--title", "The Hpricot Reference", "--main", "README", "--inline-source"]
16
+ s.require_paths = ["lib"]
17
+ s.summary = %q{a swift, liberal HTML parser with a fantastic library}
18
+ end
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+ #--
3
+ # Copyright 2004 by Jim Weirich (jim@weirichhouse.org).
4
+ # All rights reserved.
5
+
6
+ # Permission is granted for use, copying, modification, distribution,
7
+ # and distribution of modified versions of this work as long as the
8
+ # above copyright notice is included.
9
+ #++
10
+
11
+ module Hpricot
12
+
13
+ # BlankSlate provides an abstract base class with no predefined
14
+ # methods (except for <tt>\_\_send__</tt> and <tt>\_\_id__</tt>).
15
+ # BlankSlate is useful as a base class when writing classes that
16
+ # depend upon <tt>method_missing</tt> (e.g. dynamic proxies).
17
+ class BlankSlate
18
+ class << self
19
+
20
+ # Hide the method named +name+ in the BlankSlate class. Don't
21
+ # hide +instance_eval+ or any method beginning with "__".
22
+ def hide(name)
23
+ undef_method name if
24
+ instance_methods.include?(name.to_s) and
25
+ name !~ /^(__|instance_eval)/
26
+ end
27
+ end
28
+
29
+ instance_methods.each { |m| hide(m) }
30
+ end
31
+ end
32
+
33
+ # Since Ruby is very dynamic, methods added to the ancestors of
34
+ # BlankSlate <em>after BlankSlate is defined</em> will show up in the
35
+ # list of available BlankSlate methods. We handle this by defining a
36
+ # hook in the Object and Kernel classes that will hide any defined
37
+ module Kernel
38
+ class << self
39
+ alias_method :hpricot_slate_method_added, :method_added
40
+
41
+ # Detect method additions to Kernel and remove them in the
42
+ # BlankSlate class.
43
+ def method_added(name)
44
+ hpricot_slate_method_added(name)
45
+ return if self != Kernel
46
+ Hpricot::BlankSlate.hide(name)
47
+ end
48
+ end
49
+ end
50
+
51
+ class Object
52
+ class << self
53
+ alias_method :hpricot_slate_method_added, :method_added
54
+
55
+ # Detect method additions to Object and remove them in the
56
+ # BlankSlate class.
57
+ def method_added(name)
58
+ hpricot_slate_method_added(name)
59
+ return if self != Object
60
+ Hpricot::BlankSlate.hide(name)
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,217 @@
1
+ require 'hpricot/tags'
2
+ require 'fast_xs'
3
+ require 'hpricot/blankslate'
4
+ require 'hpricot/htmlinfo'
5
+
6
+ module Hpricot
7
+ # XML unescape
8
+ def self.uxs(str)
9
+ str.to_s.
10
+ gsub(/\&(\w+);/) { [NamedCharacters[$1] || 63].pack("U*") }. # 63 = ?? (query char)
11
+ gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }.
12
+ gsub(/\&\#x([0-9a-fA-F]+);/) { [$1.to_i(16)].pack("U*") }
13
+ end
14
+
15
+ def self.build(ele = Doc.new, assigns = {}, &blk)
16
+ ele.extend Builder
17
+ assigns.each do |k, v|
18
+ ele.instance_variable_set("@#{k}", v)
19
+ end
20
+ ele.instance_eval(&blk)
21
+ ele
22
+ end
23
+
24
+ module Builder
25
+
26
+ @@default = {
27
+ :indent => 0,
28
+ :output_helpers => true,
29
+ :output_xml_instruction => true,
30
+ :output_meta_tag => true,
31
+ :auto_validation => true,
32
+ :tagset => Hpricot::XHTMLTransitional,
33
+ :root_attributes => {
34
+ :xmlns => 'http://www.w3.org/1999/xhtml', :'xml:lang' => 'en', :lang => 'en'
35
+ }
36
+ }
37
+
38
+ def self.set(option, value)
39
+ @@default[option] = value
40
+ end
41
+
42
+ def add_child ele
43
+ ele.parent = self
44
+ self.children ||= []
45
+ self.children << ele
46
+ ele
47
+ end
48
+
49
+ # Write a +string+ to the HTML stream, making sure to escape it.
50
+ def text!(string)
51
+ add_child Text.new(string.fast_xs)
52
+ end
53
+
54
+ # Write a +string+ to the HTML stream without escaping it.
55
+ def text(string)
56
+ add_child Text.new(string)
57
+ nil
58
+ end
59
+ alias_method :<<, :text
60
+ alias_method :concat, :text
61
+
62
+ # Create a tag named +tag+. Other than the first argument which is the tag name,
63
+ # the arguments are the same as the tags implemented via method_missing.
64
+ def tag!(tag, *args, &block)
65
+ ele_id = nil
66
+ if @auto_validation and @tagset
67
+ if !@tagset.tagset.has_key?(tag)
68
+ raise InvalidXhtmlError, "no element `#{tag}' for #{tagset.doctype}"
69
+ elsif args.last.respond_to?(:to_hash)
70
+ attrs = args.last.to_hash
71
+
72
+ if @tagset.forms.include?(tag) and attrs[:id]
73
+ attrs[:name] ||= attrs[:id]
74
+ end
75
+
76
+ attrs.each do |k, v|
77
+ atname = k.to_s.downcase.intern
78
+ unless k =~ /:/ or @tagset.tagset[tag].include? atname
79
+ raise InvalidXhtmlError, "no attribute `#{k}' on #{tag} elements"
80
+ end
81
+ if atname == :id
82
+ ele_id = v.to_s
83
+ if @elements.has_key? ele_id
84
+ raise InvalidXhtmlError, "id `#{ele_id}' already used (id's must be unique)."
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+
91
+ # turn arguments into children or attributes
92
+ childs = []
93
+ attrs = args.grep(Hash)
94
+ childs.concat((args - attrs).flatten.map do |x|
95
+ if x.respond_to? :to_html
96
+ Hpricot.make(x.to_html)
97
+ elsif x
98
+ Text.new(x.fast_xs)
99
+ end
100
+ end.flatten)
101
+ attrs = attrs.inject({}) do |hsh, ath|
102
+ ath.each do |k, v|
103
+ hsh[k] = v.to_s.fast_xs if v
104
+ end
105
+ hsh
106
+ end
107
+
108
+ # create the element itself
109
+ tag = tag.to_s
110
+ f = Elem.new(tag, attrs, childs, ETag.new(tag))
111
+
112
+ # build children from the block
113
+ if block
114
+ build(f, &block)
115
+ end
116
+
117
+ add_child f
118
+ f
119
+ end
120
+
121
+ def build(*a, &b)
122
+ Hpricot.build(*a, &b)
123
+ end
124
+
125
+ # Every HTML tag method goes through an html_tag call. So, calling <tt>div</tt> is equivalent
126
+ # to calling <tt>html_tag(:div)</tt>. All HTML tags in Hpricot's list are given generated wrappers
127
+ # for this method.
128
+ #
129
+ # If the @auto_validation setting is on, this method will check for many common mistakes which
130
+ # could lead to invalid XHTML.
131
+ def html_tag(sym, *args, &block)
132
+ if @auto_validation and @tagset.self_closing.include?(sym) and block
133
+ raise InvalidXhtmlError, "the `#{sym}' element is self-closing, please remove the block"
134
+ elsif args.empty? and block.nil?
135
+ CssProxy.new(self, sym)
136
+ else
137
+ tag!(sym, *args, &block)
138
+ end
139
+ end
140
+
141
+ XHTMLTransitional.tags.each do |k|
142
+ class_eval %{
143
+ def #{k}(*args, &block)
144
+ html_tag(#{k.inspect}, *args, &block)
145
+ end
146
+ }
147
+ end
148
+
149
+ def doctype(target, pub, sys)
150
+ add_child DocType.new(target, pub, sys)
151
+ end
152
+
153
+ remove_method :head
154
+
155
+ # Builds a head tag. Adds a <tt>meta</tt> tag inside with Content-Type
156
+ # set to <tt>text/html; charset=utf-8</tt>.
157
+ def head(*args, &block)
158
+ tag!(:head, *args) do
159
+ tag!(:meta, "http-equiv" => "Content-Type", "content" => "text/html; charset=utf-8") if @output_meta_tag
160
+ instance_eval(&block)
161
+ end
162
+ end
163
+
164
+ # Builds an html tag. An XML 1.0 instruction and an XHTML 1.0 Transitional doctype
165
+ # are prepended. Also assumes <tt>:xmlns => "http://www.w3.org/1999/xhtml",
166
+ # :lang => "en"</tt>.
167
+ def xhtml_transitional(attrs = {}, &block)
168
+ # self.tagset = Hpricot::XHTMLTransitional
169
+ xhtml_html(attrs, &block)
170
+ end
171
+
172
+ # Builds an html tag with XHTML 1.0 Strict doctype instead.
173
+ def xhtml_strict(attrs = {}, &block)
174
+ # self.tagset = Hpricot::XHTMLStrict
175
+ xhtml_html(attrs, &block)
176
+ end
177
+
178
+ private
179
+
180
+ def xhtml_html(attrs = {}, &block)
181
+ instruct! if @output_xml_instruction
182
+ doctype(:html, *@@default[:tagset].doctype)
183
+ tag!(:html, @@default[:root_attributes].merge(attrs), &block)
184
+ end
185
+
186
+ end
187
+
188
+ # Class used by Markaby::Builder to store element options. Methods called
189
+ # against the CssProxy object are added as element classes or IDs.
190
+ #
191
+ # See the README for examples.
192
+ class CssProxy < BlankSlate
193
+
194
+ # Creates a CssProxy object.
195
+ def initialize(builder, sym)
196
+ @builder, @sym, @attrs = builder, sym, {}
197
+ end
198
+
199
+ # Adds attributes to an element. Bang methods set the :id attribute.
200
+ # Other methods add to the :class attribute.
201
+ def method_missing(id_or_class, *args, &block)
202
+ if (idc = id_or_class.to_s) =~ /!$/
203
+ @attrs[:id] = $`
204
+ else
205
+ @attrs[:class] = @attrs[:class].nil? ? idc : "#{@attrs[:class]} #{idc}".strip
206
+ end
207
+
208
+ if block or args.any?
209
+ args.push(@attrs)
210
+ return @builder.tag!(@sym, *args, &block)
211
+ end
212
+
213
+ return self
214
+ end
215
+
216
+ end
217
+ end