tenderlove-nokogiri 0.0.0.20081001111445

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. data/History.txt +6 -0
  2. data/Manifest.txt +105 -0
  3. data/README.txt +51 -0
  4. data/Rakefile +70 -0
  5. data/ext/nokogiri/extconf.rb +24 -0
  6. data/ext/nokogiri/html_document.c +85 -0
  7. data/ext/nokogiri/html_document.h +10 -0
  8. data/ext/nokogiri/html_sax_parser.c +32 -0
  9. data/ext/nokogiri/html_sax_parser.h +11 -0
  10. data/ext/nokogiri/native.c +35 -0
  11. data/ext/nokogiri/native.h +32 -0
  12. data/ext/nokogiri/xml_cdata.c +36 -0
  13. data/ext/nokogiri/xml_cdata.h +9 -0
  14. data/ext/nokogiri/xml_document.c +159 -0
  15. data/ext/nokogiri/xml_document.h +10 -0
  16. data/ext/nokogiri/xml_node.c +573 -0
  17. data/ext/nokogiri/xml_node.h +13 -0
  18. data/ext/nokogiri/xml_node_set.c +90 -0
  19. data/ext/nokogiri/xml_node_set.h +9 -0
  20. data/ext/nokogiri/xml_reader.c +420 -0
  21. data/ext/nokogiri/xml_reader.h +10 -0
  22. data/ext/nokogiri/xml_sax_parser.c +161 -0
  23. data/ext/nokogiri/xml_sax_parser.h +10 -0
  24. data/ext/nokogiri/xml_text.c +25 -0
  25. data/ext/nokogiri/xml_text.h +9 -0
  26. data/ext/nokogiri/xml_xpath.c +39 -0
  27. data/ext/nokogiri/xml_xpath.h +11 -0
  28. data/ext/nokogiri/xml_xpath_context.c +69 -0
  29. data/ext/nokogiri/xml_xpath_context.h +9 -0
  30. data/ext/nokogiri/xslt_stylesheet.c +83 -0
  31. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  32. data/lib/nokogiri.rb +45 -0
  33. data/lib/nokogiri/css.rb +6 -0
  34. data/lib/nokogiri/css/node.rb +95 -0
  35. data/lib/nokogiri/css/parser.rb +24 -0
  36. data/lib/nokogiri/css/parser.y +198 -0
  37. data/lib/nokogiri/css/tokenizer.rb +9 -0
  38. data/lib/nokogiri/css/tokenizer.rex +63 -0
  39. data/lib/nokogiri/css/xpath_visitor.rb +153 -0
  40. data/lib/nokogiri/decorators.rb +1 -0
  41. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  42. data/lib/nokogiri/decorators/hpricot/node.rb +47 -0
  43. data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
  44. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +13 -0
  45. data/lib/nokogiri/hpricot.rb +46 -0
  46. data/lib/nokogiri/html.rb +64 -0
  47. data/lib/nokogiri/html/builder.rb +9 -0
  48. data/lib/nokogiri/html/document.rb +9 -0
  49. data/lib/nokogiri/html/sax/parser.rb +21 -0
  50. data/lib/nokogiri/version.rb +3 -0
  51. data/lib/nokogiri/xml.rb +29 -0
  52. data/lib/nokogiri/xml/after_handler.rb +18 -0
  53. data/lib/nokogiri/xml/before_handler.rb +32 -0
  54. data/lib/nokogiri/xml/builder.rb +79 -0
  55. data/lib/nokogiri/xml/document.rb +22 -0
  56. data/lib/nokogiri/xml/node.rb +162 -0
  57. data/lib/nokogiri/xml/node_set.rb +136 -0
  58. data/lib/nokogiri/xml/reader.rb +14 -0
  59. data/lib/nokogiri/xml/sax.rb +9 -0
  60. data/lib/nokogiri/xml/sax/document.rb +59 -0
  61. data/lib/nokogiri/xml/sax/parser.rb +33 -0
  62. data/lib/nokogiri/xml/text.rb +6 -0
  63. data/lib/nokogiri/xml/xpath.rb +6 -0
  64. data/lib/nokogiri/xslt.rb +11 -0
  65. data/lib/nokogiri/xslt/stylesheet.rb +6 -0
  66. data/nokogiri.gemspec +33 -0
  67. data/test/css/test_nthiness.rb +141 -0
  68. data/test/css/test_parser.rb +214 -0
  69. data/test/css/test_tokenizer.rb +162 -0
  70. data/test/files/staff.xml +57 -0
  71. data/test/files/staff.xslt +32 -0
  72. data/test/files/tlm.html +850 -0
  73. data/test/helper.rb +70 -0
  74. data/test/hpricot/files/basic.xhtml +17 -0
  75. data/test/hpricot/files/boingboing.html +2266 -0
  76. data/test/hpricot/files/cy0.html +3653 -0
  77. data/test/hpricot/files/immob.html +400 -0
  78. data/test/hpricot/files/pace_application.html +1320 -0
  79. data/test/hpricot/files/tenderlove.html +16 -0
  80. data/test/hpricot/files/uswebgen.html +220 -0
  81. data/test/hpricot/files/utf8.html +1054 -0
  82. data/test/hpricot/files/week9.html +1723 -0
  83. data/test/hpricot/files/why.xml +19 -0
  84. data/test/hpricot/load_files.rb +7 -0
  85. data/test/hpricot/test_alter.rb +67 -0
  86. data/test/hpricot/test_builder.rb +27 -0
  87. data/test/hpricot/test_parser.rb +412 -0
  88. data/test/hpricot/test_paths.rb +15 -0
  89. data/test/hpricot/test_preserved.rb +72 -0
  90. data/test/hpricot/test_xml.rb +26 -0
  91. data/test/html/sax/test_parser.rb +27 -0
  92. data/test/html/test_builder.rb +78 -0
  93. data/test/html/test_document.rb +22 -0
  94. data/test/test_convert_xpath.rb +173 -0
  95. data/test/test_nokogiri.rb +36 -0
  96. data/test/test_reader.rb +222 -0
  97. data/test/test_xslt_transforms.rb +29 -0
  98. data/test/xml/sax/test_parser.rb +93 -0
  99. data/test/xml/test_builder.rb +16 -0
  100. data/test/xml/test_document.rb +141 -0
  101. data/test/xml/test_node.rb +148 -0
  102. data/test/xml/test_node_set.rb +54 -0
  103. data/test/xml/test_text.rb +13 -0
  104. metadata +191 -0
@@ -0,0 +1,32 @@
1
+ module Nokogiri
2
+ module XML
3
+ class BeforeHandler < Nokogiri::XML::SAX::Document # :nodoc:
4
+ def initialize node, original_html
5
+ @original_html = original_html
6
+ @node = node
7
+ @stack = []
8
+ end
9
+
10
+ def start_element name, attrs = []
11
+ return unless @original_html =~ /<#{name}/i
12
+ node = Node.new(name)
13
+ Hash[*attrs].each do |k,v|
14
+ node[k] = v
15
+ end
16
+ node.parent = @stack.last if @stack.length != 0
17
+ @stack << node
18
+ end
19
+
20
+ def characters string
21
+ node = @stack.last
22
+ node.content += string
23
+ end
24
+
25
+ def end_element name
26
+ return unless @original_html =~ /<#{name}/i
27
+ @node.add_previous_sibling @stack.last if @stack.length == 1
28
+ @stack.pop
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,79 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Builder
4
+ attr_accessor :doc, :parent
5
+ def initialize(&block)
6
+ namespace = self.class.name.split('::')
7
+ namespace[-1] = 'Document'
8
+ @doc = eval(namespace.join('::')).new
9
+ @parent = @doc
10
+ instance_eval(&block)
11
+ @parent = @doc
12
+ end
13
+
14
+ def text(string)
15
+ node = Nokogiri::XML::Text.new(string)
16
+ insert(node)
17
+ end
18
+
19
+ def cdata(string)
20
+ node = Nokogiri::XML::CData.new(@doc, string)
21
+ insert(node)
22
+ end
23
+
24
+ def to_xml
25
+ @doc.to_xml
26
+ end
27
+
28
+ def method_missing(method, *args, &block)
29
+ node = Nokogiri::XML::Node.new(method.to_s) { |n|
30
+ if content = args.first
31
+ if content.is_a?(Hash)
32
+ content.each { |k,v| n[k.to_s] = v.to_s }
33
+ else
34
+ n.content = content
35
+ end
36
+ end
37
+ }
38
+ insert(node, &block)
39
+ end
40
+
41
+ private
42
+ def insert(node, &block)
43
+ node.parent = @parent
44
+ if block_given?
45
+ @parent = node
46
+ instance_eval(&block)
47
+ @parent = node.parent
48
+ end
49
+ NodeBuilder.new(node, self)
50
+ end
51
+
52
+ class NodeBuilder # :nodoc:
53
+ def initialize(node, doc_builder)
54
+ @node = node
55
+ @doc_builder = doc_builder
56
+ end
57
+
58
+ def method_missing(method, *args, &block)
59
+ case method.to_s
60
+ when /^(.*)!$/
61
+ @node['id'] = $1
62
+ @node.content = args.first if args.first
63
+ when /^(.*)=/
64
+ @node[$1] = args.first
65
+ else
66
+ @node['class'] =
67
+ ((@node['class'] || '').split(/\s/) + [method.to_s]).join(' ')
68
+ @node.content = args.first if args.first
69
+ end
70
+ if block_given?
71
+ @doc_builder.parent = @node
72
+ return @doc_builder.instance_eval(&block)
73
+ end
74
+ self
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,22 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Document < Node
4
+ def decorators
5
+ @decorators ||= Hash.new { |h,k| h[k] = [] }
6
+ end
7
+
8
+ ###
9
+ # Apply any decorators to +node+
10
+ def decorate(node)
11
+ key = node.class.name.split('::').last.downcase
12
+ decorators[key].each do |klass|
13
+ node.extend(klass)
14
+ end
15
+ end
16
+
17
+ def to_xml
18
+ serialize
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,162 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Node
4
+ COMMENT_NODE = 8
5
+ DOCUMENT_NODE = 9
6
+ HTML_DOCUMENT_NODE = 13
7
+ DTD_NODE = 14
8
+ ELEMENT_DECL = 15
9
+ ATTRIBUTE_DECL = 16
10
+ ENTITY_DECL = 17
11
+ NAMESPACE_DECL = 18
12
+ XINCLUDE_START = 19
13
+ XINCLUDE_END = 20
14
+ DOCB_DOCUMENT_NODE = 21
15
+
16
+ def decorate!
17
+ document.decorate(self) if document
18
+ end
19
+
20
+ def children
21
+ list = NodeSet.new
22
+ list.document = document
23
+ document.decorate(list)
24
+
25
+ first = self.child
26
+ return list unless first # Empty list
27
+
28
+ list << first unless first.blank?
29
+ while first = first.next
30
+ list << first unless first.blank?
31
+ end
32
+ list
33
+ end
34
+
35
+ def find(*paths)
36
+ find_by_xpath(*(paths.map { |path|
37
+ path =~ /^(\.\/|\/)/ ? path : CSS::Parser.parse(path).map { |ast|
38
+ ast.to_xpath
39
+ }
40
+ }.flatten.uniq))
41
+ end
42
+ alias :search :find
43
+ alias :/ :find
44
+
45
+ def find_by_xpath *paths
46
+ sets = paths.map { |path|
47
+ set = XPathContext.new(self).evaluate(path).node_set
48
+ set.document = document
49
+ document.decorate(set)
50
+ set
51
+ }
52
+ return sets.first if sets.length == 1
53
+
54
+ NodeSet.new do |combined|
55
+ document.decorate(combined)
56
+ sets.each do |set|
57
+ set.each do |node|
58
+ combined << node
59
+ end
60
+ end
61
+ end
62
+ end
63
+
64
+ def find_by_css *rules
65
+ find_by_xpath(*(rules.map { |rule|
66
+ CSS::Parser.parse(rule).map { |ast| ast.to_xpath }
67
+ }.flatten.uniq))
68
+ end
69
+
70
+ def at path
71
+ search("#{path}").first
72
+ end
73
+
74
+ def [](property)
75
+ return nil unless key?(property)
76
+ get(property)
77
+ end
78
+
79
+ def next
80
+ next_sibling
81
+ end
82
+
83
+ ####
84
+ # Create nodes from +data+ and insert them before this node
85
+ # (as a sibling).
86
+ def before data
87
+ classes = document.class.name.split('::')
88
+ classes[-1] = 'SAX::Parser'
89
+
90
+ parser = eval(classes.join('::')).new(BeforeHandler.new(self, data))
91
+ parser.parse(data)
92
+ end
93
+
94
+ ####
95
+ # Create nodes from +data+ and insert them after this node
96
+ # (as a sibling).
97
+ def after data
98
+ classes = document.class.name.split('::')
99
+ classes[-1] = 'SAX::Parser'
100
+
101
+ handler = AfterHandler.new(self, data)
102
+ parser = eval(classes.join('::')).new(handler)
103
+ parser.parse(data)
104
+ handler.after_nodes.reverse.each do |sibling|
105
+ self.add_next_sibling sibling
106
+ end
107
+ end
108
+
109
+ def has_attribute?(property)
110
+ key? property
111
+ end
112
+
113
+ alias :get_attribute :[]
114
+ def set_attribute(name, value)
115
+ self[name] = value
116
+ end
117
+
118
+ def remove_attribute name
119
+ remove(name)
120
+ end
121
+
122
+ def inner_text
123
+ content
124
+ end
125
+
126
+ ####
127
+ # Set the content to +string+.
128
+ # If +encode+, encode any special characters first.
129
+ def content= string, encode = true
130
+ self.native_content = encode_special_chars(string)
131
+ end
132
+
133
+ def comment?
134
+ type == COMMENT_NODE
135
+ end
136
+
137
+ def xml?
138
+ type == DOCUMENT_NODE
139
+ end
140
+
141
+ def html?
142
+ type == HTML_DOCUMENT_NODE
143
+ end
144
+
145
+ def to_html
146
+ to_xml
147
+ end
148
+ alias :to_s :to_html
149
+ alias :inner_html :to_html
150
+
151
+ def css_path
152
+ path.split(/\//).map { |part|
153
+ part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
154
+ }.compact.join(' > ')
155
+ end
156
+
157
+ def xpath
158
+ path
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,136 @@
1
+ module Nokogiri
2
+ module XML
3
+ class NodeSet
4
+ include Enumerable
5
+
6
+ attr_accessor :document
7
+
8
+ def initialize
9
+ yield self if block_given?
10
+ end
11
+
12
+ def first
13
+ self[0]
14
+ end
15
+
16
+ def last
17
+ self[length - 1]
18
+ end
19
+
20
+ def before datum
21
+ first.before datum
22
+ end
23
+
24
+ def after datum
25
+ last.after datum
26
+ end
27
+
28
+ def << node
29
+ push(node)
30
+ end
31
+
32
+ def search path
33
+ sub_set = NodeSet.new
34
+ document.decorate(sub_set)
35
+ each do |node|
36
+ node.search(path).each do |sub_node|
37
+ sub_set << sub_node
38
+ end
39
+ end
40
+ sub_set.document = document
41
+ sub_set
42
+ end
43
+ alias :/ :search
44
+
45
+ def at path
46
+ search(path).first
47
+ end
48
+
49
+ def add_class name
50
+ each do |el|
51
+ next unless el.respond_to? :get_attribute
52
+ classes = el.get_attribute('class').to_s.split(" ")
53
+ el.set_attribute('class', classes.push(name).uniq.join(" "))
54
+ end
55
+ self
56
+ end
57
+
58
+ def remove_class name = nil
59
+ each do |el|
60
+ next unless el.respond_to? :get_attribute
61
+ if name
62
+ classes = el.get_attribute('class').to_s.split(" ")
63
+ el.set_attribute('class', (classes - [name]).uniq.join(" "))
64
+ else
65
+ el.remove_attribute("class")
66
+ end
67
+ end
68
+ self
69
+ end
70
+
71
+ def attr key, value = nil, &blk
72
+ if value or blk
73
+ each do |el|
74
+ el.set_attribute(key, value || blk[el])
75
+ end
76
+ return self
77
+ end
78
+ if key.is_a? Hash
79
+ key.each { |k,v| self.attr(k,v) }
80
+ return self
81
+ else
82
+ return self[0].get_attribute(key)
83
+ end
84
+ end
85
+ alias_method :set, :attr
86
+
87
+ def remove_attr name
88
+ each do |el|
89
+ next unless el.respond_to? :remove_attribute
90
+ el.remove_attribute(name)
91
+ end
92
+ self
93
+ end
94
+
95
+ ###
96
+ # Iterate over each node, yielding to +block+
97
+ def each(&block)
98
+ x = 0
99
+ while x < length
100
+ yield self[x]
101
+ x += 1
102
+ end
103
+ end
104
+
105
+ def inner_text
106
+ collect{|j| j.inner_text}.join('')
107
+ end
108
+ alias :text :inner_text
109
+
110
+ def wrap(html, &blk)
111
+ each do |j|
112
+ new_parent = Nokogiri.make(html, &blk)
113
+ j.replace(new_parent)
114
+ nest = new_parent
115
+ if nest.child
116
+ nest = nest.child until nest.child.nil?
117
+ end
118
+ j.parent = nest
119
+ end
120
+ self
121
+ end
122
+
123
+ def to_s
124
+ map { |x| x.to_s }.join
125
+ end
126
+
127
+ def to_html
128
+ map { |x| x.to_html }.join('')
129
+ end
130
+
131
+ def size
132
+ length
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,14 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Reader
4
+ include Enumerable
5
+
6
+ def each(&block)
7
+ while node = self.read
8
+ block.call(node)
9
+ end
10
+ end
11
+ private :initialize
12
+ end
13
+ end
14
+ end