webtranslateit-hpricot 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/CHANGELOG +122 -0
  4. data/COPYING +18 -0
  5. data/README.md +295 -0
  6. data/Rakefile +237 -0
  7. data/ext/fast_xs/FastXsService.java +1123 -0
  8. data/ext/fast_xs/extconf.rb +4 -0
  9. data/ext/fast_xs/fast_xs.c +210 -0
  10. data/ext/hpricot_scan/HpricotCss.java +850 -0
  11. data/ext/hpricot_scan/HpricotScanService.java +2085 -0
  12. data/ext/hpricot_scan/MANIFEST +0 -0
  13. data/ext/hpricot_scan/extconf.rb +9 -0
  14. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  15. data/ext/hpricot_scan/hpricot_css.c +3511 -0
  16. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  17. data/ext/hpricot_scan/hpricot_css.rl +120 -0
  18. data/ext/hpricot_scan/hpricot_scan.c +6848 -0
  19. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  20. data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
  21. data/ext/hpricot_scan/hpricot_scan.rl +911 -0
  22. data/extras/hpricot.png +0 -0
  23. data/hpricot.gemspec +18 -0
  24. data/lib/hpricot/blankslate.rb +63 -0
  25. data/lib/hpricot/builder.rb +217 -0
  26. data/lib/hpricot/elements.rb +514 -0
  27. data/lib/hpricot/htmlinfo.rb +691 -0
  28. data/lib/hpricot/inspect.rb +103 -0
  29. data/lib/hpricot/modules.rb +40 -0
  30. data/lib/hpricot/parse.rb +38 -0
  31. data/lib/hpricot/tag.rb +219 -0
  32. data/lib/hpricot/tags.rb +164 -0
  33. data/lib/hpricot/traverse.rb +839 -0
  34. data/lib/hpricot/xchar.rb +95 -0
  35. data/lib/hpricot.rb +26 -0
  36. data/setup.rb +1585 -0
  37. data/test/files/basic.xhtml +17 -0
  38. data/test/files/boingboing.html +2266 -0
  39. data/test/files/cy0.html +3653 -0
  40. data/test/files/immob.html +400 -0
  41. data/test/files/pace_application.html +1320 -0
  42. data/test/files/tenderlove.html +16 -0
  43. data/test/files/uswebgen.html +220 -0
  44. data/test/files/utf8.html +1054 -0
  45. data/test/files/week9.html +1723 -0
  46. data/test/files/why.xml +19 -0
  47. data/test/load_files.rb +7 -0
  48. data/test/nokogiri-bench.rb +64 -0
  49. data/test/test_alter.rb +96 -0
  50. data/test/test_builder.rb +37 -0
  51. data/test/test_parser.rb +496 -0
  52. data/test/test_paths.rb +25 -0
  53. data/test/test_preserved.rb +88 -0
  54. data/test/test_xml.rb +28 -0
  55. metadata +106 -0
Binary file
data/hpricot.gemspec ADDED
@@ -0,0 +1,18 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = %q{webtranslateit-hpricot}
3
+ s.version = "0.9.0"
4
+
5
+ s.authors = ["why the lucky stiff"]
6
+ s.date = %q{2012-01-17}
7
+ s.description = %q{a swift, liberal HTML parser with a fantastic library}
8
+ s.email = %q{why@ruby-lang.org}
9
+ s.license = "MIT"
10
+ s.extensions = ["ext/fast_xs/extconf.rb", "ext/hpricot_scan/extconf.rb"]
11
+ s.extra_rdoc_files = ["README.md", "CHANGELOG", "COPYING"]
12
+ s.files = %w(.gitignore CHANGELOG COPYING README.md Rakefile ext/fast_xs/FastXsService.java ext/fast_xs/extconf.rb ext/fast_xs/fast_xs.c ext/hpricot_scan/HpricotCss.java ext/hpricot_scan/HpricotScanService.java ext/hpricot_scan/MANIFEST ext/hpricot_scan/extconf.rb ext/hpricot_scan/hpricot_common.rl ext/hpricot_scan/hpricot_css.c ext/hpricot_scan/hpricot_css.java.rl ext/hpricot_scan/hpricot_css.rl ext/hpricot_scan/hpricot_scan.c ext/hpricot_scan/hpricot_scan.h ext/hpricot_scan/hpricot_scan.java.rl ext/hpricot_scan/hpricot_scan.rl extras/hpricot.png hpricot.gemspec lib/hpricot.rb lib/hpricot/blankslate.rb lib/hpricot/builder.rb lib/hpricot/elements.rb lib/hpricot/htmlinfo.rb lib/hpricot/inspect.rb lib/hpricot/modules.rb lib/hpricot/parse.rb lib/hpricot/tag.rb lib/hpricot/tags.rb lib/hpricot/traverse.rb lib/hpricot/xchar.rb setup.rb test/files/basic.xhtml test/files/boingboing.html test/files/cy0.html test/files/immob.html test/files/pace_application.html test/files/tenderlove.html test/files/uswebgen.html test/files/utf8.html test/files/week9.html test/files/why.xml test/load_files.rb test/nokogiri-bench.rb test/test_alter.rb test/test_builder.rb test/test_parser.rb test/test_paths.rb test/test_preserved.rb test/test_xml.rb)
13
+ s.has_rdoc = true
14
+ s.homepage = %q{http://wiki.github.com/hpricot/hpricot}
15
+ s.rdoc_options = ["--quiet", "--title", "The Hpricot Reference", "--main", "README", "--inline-source"]
16
+ s.require_paths = ["lib"]
17
+ s.summary = %q{a swift, liberal HTML parser with a fantastic library}
18
+ end
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+ #--
3
+ # Copyright 2004 by Jim Weirich (jim@weirichhouse.org).
4
+ # All rights reserved.
5
+
6
+ # Permission is granted for use, copying, modification, distribution,
7
+ # and distribution of modified versions of this work as long as the
8
+ # above copyright notice is included.
9
+ #++
10
+
11
+ module Hpricot
12
+
13
+ # BlankSlate provides an abstract base class with no predefined
14
+ # methods (except for <tt>\_\_send__</tt> and <tt>\_\_id__</tt>).
15
+ # BlankSlate is useful as a base class when writing classes that
16
+ # depend upon <tt>method_missing</tt> (e.g. dynamic proxies).
17
+ class BlankSlate
18
+ class << self
19
+
20
+ # Hide the method named +name+ in the BlankSlate class. Don't
21
+ # hide +instance_eval+ or any method beginning with "__".
22
+ def hide(name)
23
+ undef_method name if
24
+ instance_methods.include?(name.to_s) and
25
+ name !~ /^(__|instance_eval)/
26
+ end
27
+ end
28
+
29
+ instance_methods.each { |m| hide(m) }
30
+ end
31
+ end
32
+
33
+ # Since Ruby is very dynamic, methods added to the ancestors of
34
+ # BlankSlate <em>after BlankSlate is defined</em> will show up in the
35
+ # list of available BlankSlate methods. We handle this by defining a
36
+ # hook in the Object and Kernel classes that will hide any defined
37
+ module Kernel
38
+ class << self
39
+ alias_method :hpricot_slate_method_added, :method_added
40
+
41
+ # Detect method additions to Kernel and remove them in the
42
+ # BlankSlate class.
43
+ def method_added(name)
44
+ hpricot_slate_method_added(name)
45
+ return if self != Kernel
46
+ Hpricot::BlankSlate.hide(name)
47
+ end
48
+ end
49
+ end
50
+
51
+ class Object
52
+ class << self
53
+ alias_method :hpricot_slate_method_added, :method_added
54
+
55
+ # Detect method additions to Object and remove them in the
56
+ # BlankSlate class.
57
+ def method_added(name)
58
+ hpricot_slate_method_added(name)
59
+ return if self != Object
60
+ Hpricot::BlankSlate.hide(name)
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,217 @@
1
+ require 'hpricot/tags'
2
+ require 'fast_xs'
3
+ require 'hpricot/blankslate'
4
+ require 'hpricot/htmlinfo'
5
+
6
+ module Hpricot
7
+ # XML unescape
8
+ def self.uxs(str)
9
+ str.to_s.
10
+ gsub(/\&(\w+);/) { [NamedCharacters[$1] || 63].pack("U*") }. # 63 = ?? (query char)
11
+ gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }.
12
+ gsub(/\&\#x([0-9a-fA-F]+);/) { [$1.to_i(16)].pack("U*") }
13
+ end
14
+
15
+ def self.build(ele = Doc.new, assigns = {}, &blk)
16
+ ele.extend Builder
17
+ assigns.each do |k, v|
18
+ ele.instance_variable_set("@#{k}", v)
19
+ end
20
+ ele.instance_eval(&blk)
21
+ ele
22
+ end
23
+
24
+ module Builder
25
+
26
+ @@default = {
27
+ :indent => 0,
28
+ :output_helpers => true,
29
+ :output_xml_instruction => true,
30
+ :output_meta_tag => true,
31
+ :auto_validation => true,
32
+ :tagset => Hpricot::XHTMLTransitional,
33
+ :root_attributes => {
34
+ :xmlns => 'http://www.w3.org/1999/xhtml', :'xml:lang' => 'en', :lang => 'en'
35
+ }
36
+ }
37
+
38
+ def self.set(option, value)
39
+ @@default[option] = value
40
+ end
41
+
42
+ def add_child ele
43
+ ele.parent = self
44
+ self.children ||= []
45
+ self.children << ele
46
+ ele
47
+ end
48
+
49
+ # Write a +string+ to the HTML stream, making sure to escape it.
50
+ def text!(string)
51
+ add_child Text.new(string.fast_xs)
52
+ end
53
+
54
+ # Write a +string+ to the HTML stream without escaping it.
55
+ def text(string)
56
+ add_child Text.new(string)
57
+ nil
58
+ end
59
+ alias_method :<<, :text
60
+ alias_method :concat, :text
61
+
62
+ # Create a tag named +tag+. Other than the first argument which is the tag name,
63
+ # the arguments are the same as the tags implemented via method_missing.
64
+ def tag!(tag, *args, &block)
65
+ ele_id = nil
66
+ if @auto_validation and @tagset
67
+ if !@tagset.tagset.has_key?(tag)
68
+ raise InvalidXhtmlError, "no element `#{tag}' for #{tagset.doctype}"
69
+ elsif args.last.respond_to?(:to_hash)
70
+ attrs = args.last.to_hash
71
+
72
+ if @tagset.forms.include?(tag) and attrs[:id]
73
+ attrs[:name] ||= attrs[:id]
74
+ end
75
+
76
+ attrs.each do |k, v|
77
+ atname = k.to_s.downcase.intern
78
+ unless k =~ /:/ or @tagset.tagset[tag].include? atname
79
+ raise InvalidXhtmlError, "no attribute `#{k}' on #{tag} elements"
80
+ end
81
+ if atname == :id
82
+ ele_id = v.to_s
83
+ if @elements.has_key? ele_id
84
+ raise InvalidXhtmlError, "id `#{ele_id}' already used (id's must be unique)."
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+
91
+ # turn arguments into children or attributes
92
+ childs = []
93
+ attrs = args.grep(Hash)
94
+ childs.concat((args - attrs).flatten.map do |x|
95
+ if x.respond_to? :to_html
96
+ Hpricot.make(x.to_html)
97
+ elsif x
98
+ Text.new(x.fast_xs)
99
+ end
100
+ end.flatten)
101
+ attrs = attrs.inject({}) do |hsh, ath|
102
+ ath.each do |k, v|
103
+ hsh[k] = v.to_s.fast_xs if v
104
+ end
105
+ hsh
106
+ end
107
+
108
+ # create the element itself
109
+ tag = tag.to_s
110
+ f = Elem.new(tag, attrs, childs, ETag.new(tag))
111
+
112
+ # build children from the block
113
+ if block
114
+ build(f, &block)
115
+ end
116
+
117
+ add_child f
118
+ f
119
+ end
120
+
121
+ def build(*a, &b)
122
+ Hpricot.build(*a, &b)
123
+ end
124
+
125
+ # Every HTML tag method goes through an html_tag call. So, calling <tt>div</tt> is equivalent
126
+ # to calling <tt>html_tag(:div)</tt>. All HTML tags in Hpricot's list are given generated wrappers
127
+ # for this method.
128
+ #
129
+ # If the @auto_validation setting is on, this method will check for many common mistakes which
130
+ # could lead to invalid XHTML.
131
+ def html_tag(sym, *args, &block)
132
+ if @auto_validation and @tagset.self_closing.include?(sym) and block
133
+ raise InvalidXhtmlError, "the `#{sym}' element is self-closing, please remove the block"
134
+ elsif args.empty? and block.nil?
135
+ CssProxy.new(self, sym)
136
+ else
137
+ tag!(sym, *args, &block)
138
+ end
139
+ end
140
+
141
+ XHTMLTransitional.tags.each do |k|
142
+ class_eval %{
143
+ def #{k}(*args, &block)
144
+ html_tag(#{k.inspect}, *args, &block)
145
+ end
146
+ }
147
+ end
148
+
149
+ def doctype(target, pub, sys)
150
+ add_child DocType.new(target, pub, sys)
151
+ end
152
+
153
+ remove_method :head
154
+
155
+ # Builds a head tag. Adds a <tt>meta</tt> tag inside with Content-Type
156
+ # set to <tt>text/html; charset=utf-8</tt>.
157
+ def head(*args, &block)
158
+ tag!(:head, *args) do
159
+ tag!(:meta, "http-equiv" => "Content-Type", "content" => "text/html; charset=utf-8") if @output_meta_tag
160
+ instance_eval(&block)
161
+ end
162
+ end
163
+
164
+ # Builds an html tag. An XML 1.0 instruction and an XHTML 1.0 Transitional doctype
165
+ # are prepended. Also assumes <tt>:xmlns => "http://www.w3.org/1999/xhtml",
166
+ # :lang => "en"</tt>.
167
+ def xhtml_transitional(attrs = {}, &block)
168
+ # self.tagset = Hpricot::XHTMLTransitional
169
+ xhtml_html(attrs, &block)
170
+ end
171
+
172
+ # Builds an html tag with XHTML 1.0 Strict doctype instead.
173
+ def xhtml_strict(attrs = {}, &block)
174
+ # self.tagset = Hpricot::XHTMLStrict
175
+ xhtml_html(attrs, &block)
176
+ end
177
+
178
+ private
179
+
180
+ def xhtml_html(attrs = {}, &block)
181
+ instruct! if @output_xml_instruction
182
+ doctype(:html, *@@default[:tagset].doctype)
183
+ tag!(:html, @@default[:root_attributes].merge(attrs), &block)
184
+ end
185
+
186
+ end
187
+
188
+ # Class used by Markaby::Builder to store element options. Methods called
189
+ # against the CssProxy object are added as element classes or IDs.
190
+ #
191
+ # See the README for examples.
192
+ class CssProxy < BlankSlate
193
+
194
+ # Creates a CssProxy object.
195
+ def initialize(builder, sym)
196
+ @builder, @sym, @attrs = builder, sym, {}
197
+ end
198
+
199
+ # Adds attributes to an element. Bang methods set the :id attribute.
200
+ # Other methods add to the :class attribute.
201
+ def method_missing(id_or_class, *args, &block)
202
+ if (idc = id_or_class.to_s) =~ /!$/
203
+ @attrs[:id] = $`
204
+ else
205
+ @attrs[:class] = @attrs[:class].nil? ? idc : "#{@attrs[:class]} #{idc}".strip
206
+ end
207
+
208
+ if block or args.any?
209
+ args.push(@attrs)
210
+ return @builder.tag!(@sym, *args, &block)
211
+ end
212
+
213
+ return self
214
+ end
215
+
216
+ end
217
+ end