adamh-hpricot 0.6.168

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/CHANGELOG +62 -0
  2. data/COPYING +18 -0
  3. data/README +284 -0
  4. data/Rakefile +259 -0
  5. data/ext/fast_xs/FastXsService.java +1018 -0
  6. data/ext/fast_xs/extconf.rb +4 -0
  7. data/ext/fast_xs/fast_xs.c +194 -0
  8. data/ext/hpricot_scan/extconf.rb +6 -0
  9. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  10. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  11. data/ext/hpricot_scan/hpricot_scan.java.rl +373 -0
  12. data/ext/hpricot_scan/hpricot_scan.rl +649 -0
  13. data/extras/mingw-rbconfig.rb +176 -0
  14. data/lib/hpricot/blankslate.rb +63 -0
  15. data/lib/hpricot/builder.rb +209 -0
  16. data/lib/hpricot/elements.rb +510 -0
  17. data/lib/hpricot/htmlinfo.rb +672 -0
  18. data/lib/hpricot/inspect.rb +103 -0
  19. data/lib/hpricot/modules.rb +38 -0
  20. data/lib/hpricot/parse.rb +36 -0
  21. data/lib/hpricot/tag.rb +186 -0
  22. data/lib/hpricot/tags.rb +164 -0
  23. data/lib/hpricot/traverse.rb +838 -0
  24. data/lib/hpricot/xchar.rb +94 -0
  25. data/lib/hpricot.rb +26 -0
  26. data/test/files/basic.xhtml +17 -0
  27. data/test/files/boingboing.html +2266 -0
  28. data/test/files/cy0.html +3653 -0
  29. data/test/files/immob.html +400 -0
  30. data/test/files/pace_application.html +1320 -0
  31. data/test/files/tenderlove.html +16 -0
  32. data/test/files/uswebgen.html +220 -0
  33. data/test/files/utf8.html +1054 -0
  34. data/test/files/week9.html +1723 -0
  35. data/test/files/why.xml +19 -0
  36. data/test/load_files.rb +7 -0
  37. data/test/test_alter.rb +77 -0
  38. data/test/test_builder.rb +37 -0
  39. data/test/test_parser.rb +400 -0
  40. data/test/test_paths.rb +25 -0
  41. data/test/test_preserved.rb +66 -0
  42. data/test/test_xml.rb +28 -0
  43. metadata +107 -0
@@ -0,0 +1,176 @@
1
+
2
+ # This rbconfig.rb corresponds to a Ruby installation for win32 cross-compiled
3
+ # with mingw under i686-linux. It can be used to cross-compile extensions for
4
+ # win32 using said toolchain.
5
+ #
6
+ # This file assumes that a cross-compiled mingw32 build (compatible with the
7
+ # mswin32 builds) is installed under $HOME/ruby-mingw32.
8
+
9
+ module Config
10
+ #RUBY_VERSION == "1.8.5" or
11
+ # raise "ruby lib version (1.8.5) doesn't match executable version (#{RUBY_VERSION})"
12
+
13
+ mingw32 = ENV['MINGW32_RUBY'] || "#{ENV["HOME"]}/ruby-mingw32"
14
+ mingwpre = ENV['MINGW32_PREFIX']
15
+ TOPDIR = File.dirname(__FILE__).chomp!("/lib/ruby/1.8/i386-mingw32")
16
+ DESTDIR = '' unless defined? DESTDIR
17
+ CONFIG = {}
18
+ CONFIG["DESTDIR"] = DESTDIR
19
+ CONFIG["INSTALL"] = "/usr/bin/install -c"
20
+ CONFIG["prefix"] = (TOPDIR || DESTDIR + mingw32)
21
+ CONFIG["EXEEXT"] = ".exe"
22
+ CONFIG["ruby_install_name"] = "ruby"
23
+ CONFIG["RUBY_INSTALL_NAME"] = "ruby"
24
+ CONFIG["RUBY_SO_NAME"] = "msvcrt-ruby18"
25
+ CONFIG["SHELL"] = "/bin/sh"
26
+ CONFIG["PATH_SEPARATOR"] = ":"
27
+ CONFIG["PACKAGE_NAME"] = ""
28
+ CONFIG["PACKAGE_TARNAME"] = ""
29
+ CONFIG["PACKAGE_VERSION"] = ""
30
+ CONFIG["PACKAGE_STRING"] = ""
31
+ CONFIG["PACKAGE_BUGREPORT"] = ""
32
+ CONFIG["exec_prefix"] = "$(prefix)"
33
+ CONFIG["bindir"] = "$(exec_prefix)/bin"
34
+ CONFIG["sbindir"] = "$(exec_prefix)/sbin"
35
+ CONFIG["libexecdir"] = "$(exec_prefix)/libexec"
36
+ CONFIG["datadir"] = "$(prefix)/share"
37
+ CONFIG["sysconfdir"] = "$(prefix)/etc"
38
+ CONFIG["sharedstatedir"] = "$(prefix)/com"
39
+ CONFIG["localstatedir"] = "$(prefix)/var"
40
+ CONFIG["libdir"] = "$(exec_prefix)/lib"
41
+ CONFIG["includedir"] = "$(prefix)/include"
42
+ CONFIG["oldincludedir"] = "/usr/include"
43
+ CONFIG["infodir"] = "$(prefix)/info"
44
+ CONFIG["mandir"] = "$(prefix)/man"
45
+ CONFIG["build_alias"] = "i686-linux"
46
+ CONFIG["host_alias"] = "#{mingwpre}"
47
+ CONFIG["target_alias"] = "i386-mingw32"
48
+ CONFIG["ECHO_C"] = ""
49
+ CONFIG["ECHO_N"] = "-n"
50
+ CONFIG["ECHO_T"] = ""
51
+ CONFIG["LIBS"] = "-lwsock32 "
52
+ CONFIG["MAJOR"] = "1"
53
+ CONFIG["MINOR"] = "8"
54
+ CONFIG["TEENY"] = "4"
55
+ CONFIG["build"] = "i686-pc-linux"
56
+ CONFIG["build_cpu"] = "i686"
57
+ CONFIG["build_vendor"] = "pc"
58
+ CONFIG["build_os"] = "linux"
59
+ CONFIG["host"] = "i586-pc-mingw32msvc"
60
+ CONFIG["host_cpu"] = "i586"
61
+ CONFIG["host_vendor"] = "pc"
62
+ CONFIG["host_os"] = "mingw32msvc"
63
+ CONFIG["target"] = "i386-pc-mingw32"
64
+ CONFIG["target_cpu"] = "i386"
65
+ CONFIG["target_vendor"] = "pc"
66
+ CONFIG["target_os"] = "mingw32"
67
+ CONFIG["CC"] = "#{mingwpre}-gcc"
68
+ CONFIG["CFLAGS"] = "-g -O2 "
69
+ CONFIG["LDFLAGS"] = ""
70
+ CONFIG["CPPFLAGS"] = ""
71
+ CONFIG["OBJEXT"] = "o"
72
+ CONFIG["CPP"] = "#{mingwpre}-gcc -E"
73
+ CONFIG["EGREP"] = "grep -E"
74
+ CONFIG["GNU_LD"] = "yes"
75
+ CONFIG["CPPOUTFILE"] = "-o conftest.i"
76
+ CONFIG["OUTFLAG"] = "-o "
77
+ CONFIG["YACC"] = "bison -y"
78
+ CONFIG["RANLIB"] = "#{mingwpre}-ranlib"
79
+ CONFIG["AR"] = "#{mingwpre}-ar"
80
+ CONFIG["NM"] = "#{mingwpre}-nm"
81
+ CONFIG["WINDRES"] = "#{mingwpre}-windres"
82
+ CONFIG["DLLWRAP"] = "#{mingwpre}-dllwrap"
83
+ CONFIG["OBJDUMP"] = "#{mingwpre}-objdump"
84
+ CONFIG["LN_S"] = "ln -s"
85
+ CONFIG["SET_MAKE"] = ""
86
+ CONFIG["INSTALL_PROGRAM"] = "$(INSTALL)"
87
+ CONFIG["INSTALL_SCRIPT"] = "$(INSTALL)"
88
+ CONFIG["INSTALL_DATA"] = "$(INSTALL) -m 644"
89
+ CONFIG["RM"] = "rm -f"
90
+ CONFIG["CP"] = "cp"
91
+ CONFIG["MAKEDIRS"] = "mkdir -p"
92
+ CONFIG["LIBOBJS"] = " fileblocks$(U).o crypt$(U).o flock$(U).o acosh$(U).o win32$(U).o"
93
+ CONFIG["ALLOCA"] = ""
94
+ CONFIG["DLDFLAGS"] = " -Wl,--enable-auto-import,--export-all"
95
+ CONFIG["ARCH_FLAG"] = ""
96
+ CONFIG["STATIC"] = ""
97
+ CONFIG["CCDLFLAGS"] = ""
98
+ CONFIG["LDSHARED"] = "#{mingwpre}-gcc -shared -s"
99
+ CONFIG["DLEXT"] = "so"
100
+ CONFIG["DLEXT2"] = "dll"
101
+ CONFIG["LIBEXT"] = "a"
102
+ CONFIG["LINK_SO"] = ""
103
+ CONFIG["LIBPATHFLAG"] = " -L\"%s\""
104
+ CONFIG["RPATHFLAG"] = ""
105
+ CONFIG["LIBPATHENV"] = ""
106
+ CONFIG["TRY_LINK"] = ""
107
+ CONFIG["STRIP"] = "strip"
108
+ CONFIG["EXTSTATIC"] = ""
109
+ CONFIG["setup"] = "Setup"
110
+ CONFIG["MINIRUBY"] = "ruby -rfake"
111
+ CONFIG["PREP"] = "fake.rb"
112
+ CONFIG["RUNRUBY"] = "$(MINIRUBY) -I`cd $(srcdir)/lib; pwd`"
113
+ CONFIG["EXTOUT"] = ".ext"
114
+ CONFIG["ARCHFILE"] = ""
115
+ CONFIG["RDOCTARGET"] = ""
116
+ CONFIG["XCFLAGS"] = " -DRUBY_EXPORT"
117
+ CONFIG["XLDFLAGS"] = " -Wl,--stack,0x02000000 -L."
118
+ CONFIG["LIBRUBY_LDSHARED"] = "#{mingwpre}-gcc -shared -s"
119
+ CONFIG["LIBRUBY_DLDFLAGS"] = " -Wl,--enable-auto-import,--export-all -Wl,--out-implib=$(LIBRUBY)"
120
+ CONFIG["rubyw_install_name"] = "rubyw"
121
+ CONFIG["RUBYW_INSTALL_NAME"] = "rubyw"
122
+ CONFIG["LIBRUBY_A"] = "lib$(RUBY_SO_NAME)-static.a"
123
+ CONFIG["LIBRUBY_SO"] = "$(RUBY_SO_NAME).dll"
124
+ CONFIG["LIBRUBY_ALIASES"] = ""
125
+ CONFIG["LIBRUBY"] = "lib$(LIBRUBY_SO).a"
126
+ CONFIG["LIBRUBYARG"] = "$(LIBRUBYARG_SHARED)"
127
+ CONFIG["LIBRUBYARG_STATIC"] = "-l$(RUBY_SO_NAME)-static"
128
+ CONFIG["LIBRUBYARG_SHARED"] = "-l$(RUBY_SO_NAME)"
129
+ CONFIG["SOLIBS"] = "$(LIBS)"
130
+ CONFIG["DLDLIBS"] = ""
131
+ CONFIG["ENABLE_SHARED"] = "yes"
132
+ CONFIG["MAINLIBS"] = ""
133
+ CONFIG["COMMON_LIBS"] = "m"
134
+ CONFIG["COMMON_MACROS"] = ""
135
+ CONFIG["COMMON_HEADERS"] = "windows.h winsock.h"
136
+ CONFIG["EXPORT_PREFIX"] = ""
137
+ CONFIG["MINIOBJS"] = "dmydln.o"
138
+ CONFIG["MAKEFILES"] = "Makefile GNUmakefile"
139
+ CONFIG["arch"] = "i386-mingw32"
140
+ CONFIG["sitearch"] = "i386-msvcrt"
141
+ CONFIG["sitedir"] = "$(prefix)/lib/ruby/site_ruby"
142
+ CONFIG["configure_args"] = "'--host=#{mingwpre}' '--target=i386-mingw32' '--build=i686-linux' '--prefix=#{mingw32}' 'build_alias=i686-linux' 'host_alias=#{mingwpre}' 'target_alias=i386-mingw32'"
143
+ CONFIG["NROFF"] = "/usr/bin/nroff"
144
+ CONFIG["MANTYPE"] = "doc"
145
+ CONFIG["LTLIBOBJS"] = " fileblocks$(U).lo crypt$(U).lo flock$(U).lo acosh$(U).lo win32$(U).lo"
146
+ CONFIG["ruby_version"] = "$(MAJOR).$(MINOR)"
147
+ CONFIG["rubylibdir"] = "$(libdir)/ruby/$(ruby_version)"
148
+ CONFIG["archdir"] = "$(rubylibdir)/$(arch)"
149
+ CONFIG["sitelibdir"] = "$(sitedir)/$(ruby_version)"
150
+ CONFIG["sitearchdir"] = "$(sitelibdir)/$(sitearch)"
151
+ CONFIG["topdir"] = File.dirname(__FILE__)
152
+ MAKEFILE_CONFIG = {}
153
+ CONFIG.each{|k,v| MAKEFILE_CONFIG[k] = v.dup}
154
+ def Config::expand(val, config = CONFIG)
155
+ val.gsub!(/\$\$|\$\(([^()]+)\)|\$\{([^{}]+)\}/) do |var|
156
+ if !(v = $1 || $2)
157
+ '$'
158
+ elsif key = config[v = v[/\A[^:]+(?=(?::(.*?)=(.*))?\z)/]]
159
+ pat, sub = $1, $2
160
+ config[v] = false
161
+ Config::expand(key, config)
162
+ config[v] = key
163
+ key = key.gsub(/#{Regexp.quote(pat)}(?=\s|\z)/n) {sub} if pat
164
+ key
165
+ else
166
+ var
167
+ end
168
+ end
169
+ val
170
+ end
171
+ CONFIG.each_value do |val|
172
+ Config::expand(val)
173
+ end
174
+ end
175
+ RbConfig = Config # compatibility for ruby-1.9
176
+ CROSS_COMPILING = nil unless defined? CROSS_COMPILING
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+ #--
3
+ # Copyright 2004 by Jim Weirich (jim@weirichhouse.org).
4
+ # All rights reserved.
5
+
6
+ # Permission is granted for use, copying, modification, distribution,
7
+ # and distribution of modified versions of this work as long as the
8
+ # above copyright notice is included.
9
+ #++
10
+
11
+ module Hpricot
12
+
13
+ # BlankSlate provides an abstract base class with no predefined
14
+ # methods (except for <tt>\_\_send__</tt> and <tt>\_\_id__</tt>).
15
+ # BlankSlate is useful as a base class when writing classes that
16
+ # depend upon <tt>method_missing</tt> (e.g. dynamic proxies).
17
+ class BlankSlate
18
+ class << self
19
+
20
+ # Hide the method named +name+ in the BlankSlate class. Don't
21
+ # hide +instance_eval+ or any method beginning with "__".
22
+ def hide(name)
23
+ undef_method name if
24
+ instance_methods.include?(name.to_s) and
25
+ name !~ /^(__|instance_eval)/
26
+ end
27
+ end
28
+
29
+ instance_methods.each { |m| hide(m) }
30
+ end
31
+ end
32
+
33
+ # Since Ruby is very dynamic, methods added to the ancestors of
34
+ # BlankSlate <em>after BlankSlate is defined</em> will show up in the
35
+ # list of available BlankSlate methods. We handle this by defining a
36
+ # hook in the Object and Kernel classes that will hide any defined
37
+ module Kernel
38
+ class << self
39
+ alias_method :hpricot_slate_method_added, :method_added
40
+
41
+ # Detect method additions to Kernel and remove them in the
42
+ # BlankSlate class.
43
+ def method_added(name)
44
+ hpricot_slate_method_added(name)
45
+ return if self != Kernel
46
+ Hpricot::BlankSlate.hide(name)
47
+ end
48
+ end
49
+ end
50
+
51
+ class Object
52
+ class << self
53
+ alias_method :hpricot_slate_method_added, :method_added
54
+
55
+ # Detect method additions to Object and remove them in the
56
+ # BlankSlate class.
57
+ def method_added(name)
58
+ hpricot_slate_method_added(name)
59
+ return if self != Object
60
+ Hpricot::BlankSlate.hide(name)
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,209 @@
1
+ require 'hpricot/tags'
2
+ require 'fast_xs'
3
+ require 'hpricot/blankslate'
4
+ require 'hpricot/htmlinfo'
5
+
6
+ module Hpricot
7
+ # XML unescape
8
+ def self.uxs(str)
9
+ str.to_s.
10
+ gsub(/\&(\w+);/) { [NamedCharacters[$1] || ??].pack("U*") }.
11
+ gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
12
+ end
13
+
14
+ def self.build(ele = Doc.new, assigns = {}, &blk)
15
+ ele.extend Builder
16
+ assigns.each do |k, v|
17
+ ele.instance_variable_set("@#{k}", v)
18
+ end
19
+ ele.instance_eval &blk
20
+ ele
21
+ end
22
+
23
+ module Builder
24
+
25
+ @@default = {
26
+ :indent => 0,
27
+ :output_helpers => true,
28
+ :output_xml_instruction => true,
29
+ :output_meta_tag => true,
30
+ :auto_validation => true,
31
+ :tagset => Hpricot::XHTMLTransitional,
32
+ :root_attributes => {
33
+ :xmlns => 'http://www.w3.org/1999/xhtml', :'xml:lang' => 'en', :lang => 'en'
34
+ }
35
+ }
36
+
37
+ def self.set(option, value)
38
+ @@default[option] = value
39
+ end
40
+
41
+ # Write a +string+ to the HTML stream, making sure to escape it.
42
+ def text!(string)
43
+ (self.children ||= []) << Text.new(string.fast_xs)
44
+ end
45
+
46
+ # Write a +string+ to the HTML stream without escaping it.
47
+ def text(string)
48
+ (self.children ||= []) << Text.new(string)
49
+ nil
50
+ end
51
+ alias_method :<<, :text
52
+ alias_method :concat, :text
53
+
54
+ # Create a tag named +tag+. Other than the first argument which is the tag name,
55
+ # the arguments are the same as the tags implemented via method_missing.
56
+ def tag!(tag, *args, &block)
57
+ ele_id = nil
58
+ if @auto_validation and @tagset
59
+ if !@tagset.tagset.has_key?(tag)
60
+ raise InvalidXhtmlError, "no element `#{tag}' for #{tagset.doctype}"
61
+ elsif args.last.respond_to?(:to_hash)
62
+ attrs = args.last.to_hash
63
+
64
+ if @tagset.forms.include?(tag) and attrs[:id]
65
+ attrs[:name] ||= attrs[:id]
66
+ end
67
+
68
+ attrs.each do |k, v|
69
+ atname = k.to_s.downcase.intern
70
+ unless k =~ /:/ or @tagset.tagset[tag].include? atname
71
+ raise InvalidXhtmlError, "no attribute `#{k}' on #{tag} elements"
72
+ end
73
+ if atname == :id
74
+ ele_id = v.to_s
75
+ if @elements.has_key? ele_id
76
+ raise InvalidXhtmlError, "id `#{ele_id}' already used (id's must be unique)."
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+
83
+ # turn arguments into children or attributes
84
+ childs = []
85
+ attrs = args.grep(Hash)
86
+ childs.concat((args - attrs).flatten.map do |x|
87
+ if x.respond_to? :to_html
88
+ Hpricot.make(x.to_html)
89
+ elsif x
90
+ Text.new(x.fast_xs)
91
+ end
92
+ end.flatten)
93
+ attrs = attrs.inject({}) do |hsh, ath|
94
+ ath.each do |k, v|
95
+ hsh[k] = v.to_s.fast_xs if v
96
+ end
97
+ hsh
98
+ end
99
+
100
+ # create the element itself
101
+ tag = tag.to_s
102
+ f = Elem.new(tag, attrs, childs, ETag.new(tag))
103
+
104
+ # build children from the block
105
+ if block
106
+ build(f, &block)
107
+ end
108
+
109
+ (self.children ||= []) << f
110
+ f
111
+ end
112
+
113
+ def build(*a, &b)
114
+ Hpricot.build(*a, &b)
115
+ end
116
+
117
+ # Every HTML tag method goes through an html_tag call. So, calling <tt>div</tt> is equivalent
118
+ # to calling <tt>html_tag(:div)</tt>. All HTML tags in Hpricot's list are given generated wrappers
119
+ # for this method.
120
+ #
121
+ # If the @auto_validation setting is on, this method will check for many common mistakes which
122
+ # could lead to invalid XHTML.
123
+ def html_tag(sym, *args, &block)
124
+ if @auto_validation and @tagset.self_closing.include?(sym) and block
125
+ raise InvalidXhtmlError, "the `#{sym}' element is self-closing, please remove the block"
126
+ elsif args.empty? and block.nil?
127
+ CssProxy.new(self, sym)
128
+ else
129
+ tag!(sym, *args, &block)
130
+ end
131
+ end
132
+
133
+ XHTMLTransitional.tags.each do |k|
134
+ class_eval %{
135
+ def #{k}(*args, &block)
136
+ html_tag(#{k.inspect}, *args, &block)
137
+ end
138
+ }
139
+ end
140
+
141
+ def doctype(target, pub, sys)
142
+ (self.children ||= []) << DocType.new(target, pub, sys)
143
+ end
144
+
145
+ remove_method :head
146
+
147
+ # Builds a head tag. Adds a <tt>meta</tt> tag inside with Content-Type
148
+ # set to <tt>text/html; charset=utf-8</tt>.
149
+ def head(*args, &block)
150
+ tag!(:head, *args) do
151
+ tag!(:meta, "http-equiv" => "Content-Type", "content" => "text/html; charset=utf-8") if @output_meta_tag
152
+ instance_eval(&block)
153
+ end
154
+ end
155
+
156
+ # Builds an html tag. An XML 1.0 instruction and an XHTML 1.0 Transitional doctype
157
+ # are prepended. Also assumes <tt>:xmlns => "http://www.w3.org/1999/xhtml",
158
+ # :lang => "en"</tt>.
159
+ def xhtml_transitional(attrs = {}, &block)
160
+ # self.tagset = Hpricot::XHTMLTransitional
161
+ xhtml_html(attrs, &block)
162
+ end
163
+
164
+ # Builds an html tag with XHTML 1.0 Strict doctype instead.
165
+ def xhtml_strict(attrs = {}, &block)
166
+ # self.tagset = Hpricot::XHTMLStrict
167
+ xhtml_html(attrs, &block)
168
+ end
169
+
170
+ private
171
+
172
+ def xhtml_html(attrs = {}, &block)
173
+ instruct! if @output_xml_instruction
174
+ doctype(:html, *@@default[:tagset].doctype)
175
+ tag!(:html, @@default[:root_attributes].merge(attrs), &block)
176
+ end
177
+
178
+ end
179
+
180
+ # Class used by Markaby::Builder to store element options. Methods called
181
+ # against the CssProxy object are added as element classes or IDs.
182
+ #
183
+ # See the README for examples.
184
+ class CssProxy < BlankSlate
185
+
186
+ # Creates a CssProxy object.
187
+ def initialize(builder, sym)
188
+ @builder, @sym, @attrs = builder, sym, {}
189
+ end
190
+
191
+ # Adds attributes to an element. Bang methods set the :id attribute.
192
+ # Other methods add to the :class attribute.
193
+ def method_missing(id_or_class, *args, &block)
194
+ if (idc = id_or_class.to_s) =~ /!$/
195
+ @attrs[:id] = $`
196
+ else
197
+ @attrs[:class] = @attrs[:class].nil? ? idc : "#{@attrs[:class]} #{idc}".strip
198
+ end
199
+
200
+ if block or args.any?
201
+ args.push(@attrs)
202
+ return @builder.tag!(@sym, *args, &block)
203
+ end
204
+
205
+ return self
206
+ end
207
+
208
+ end
209
+ end