tenderlove-nokogiri 0.0.0.20081001111445

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. data/History.txt +6 -0
  2. data/Manifest.txt +105 -0
  3. data/README.txt +51 -0
  4. data/Rakefile +70 -0
  5. data/ext/nokogiri/extconf.rb +24 -0
  6. data/ext/nokogiri/html_document.c +85 -0
  7. data/ext/nokogiri/html_document.h +10 -0
  8. data/ext/nokogiri/html_sax_parser.c +32 -0
  9. data/ext/nokogiri/html_sax_parser.h +11 -0
  10. data/ext/nokogiri/native.c +35 -0
  11. data/ext/nokogiri/native.h +32 -0
  12. data/ext/nokogiri/xml_cdata.c +36 -0
  13. data/ext/nokogiri/xml_cdata.h +9 -0
  14. data/ext/nokogiri/xml_document.c +159 -0
  15. data/ext/nokogiri/xml_document.h +10 -0
  16. data/ext/nokogiri/xml_node.c +573 -0
  17. data/ext/nokogiri/xml_node.h +13 -0
  18. data/ext/nokogiri/xml_node_set.c +90 -0
  19. data/ext/nokogiri/xml_node_set.h +9 -0
  20. data/ext/nokogiri/xml_reader.c +420 -0
  21. data/ext/nokogiri/xml_reader.h +10 -0
  22. data/ext/nokogiri/xml_sax_parser.c +161 -0
  23. data/ext/nokogiri/xml_sax_parser.h +10 -0
  24. data/ext/nokogiri/xml_text.c +25 -0
  25. data/ext/nokogiri/xml_text.h +9 -0
  26. data/ext/nokogiri/xml_xpath.c +39 -0
  27. data/ext/nokogiri/xml_xpath.h +11 -0
  28. data/ext/nokogiri/xml_xpath_context.c +69 -0
  29. data/ext/nokogiri/xml_xpath_context.h +9 -0
  30. data/ext/nokogiri/xslt_stylesheet.c +83 -0
  31. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  32. data/lib/nokogiri.rb +45 -0
  33. data/lib/nokogiri/css.rb +6 -0
  34. data/lib/nokogiri/css/node.rb +95 -0
  35. data/lib/nokogiri/css/parser.rb +24 -0
  36. data/lib/nokogiri/css/parser.y +198 -0
  37. data/lib/nokogiri/css/tokenizer.rb +9 -0
  38. data/lib/nokogiri/css/tokenizer.rex +63 -0
  39. data/lib/nokogiri/css/xpath_visitor.rb +153 -0
  40. data/lib/nokogiri/decorators.rb +1 -0
  41. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  42. data/lib/nokogiri/decorators/hpricot/node.rb +47 -0
  43. data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
  44. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +13 -0
  45. data/lib/nokogiri/hpricot.rb +46 -0
  46. data/lib/nokogiri/html.rb +64 -0
  47. data/lib/nokogiri/html/builder.rb +9 -0
  48. data/lib/nokogiri/html/document.rb +9 -0
  49. data/lib/nokogiri/html/sax/parser.rb +21 -0
  50. data/lib/nokogiri/version.rb +3 -0
  51. data/lib/nokogiri/xml.rb +29 -0
  52. data/lib/nokogiri/xml/after_handler.rb +18 -0
  53. data/lib/nokogiri/xml/before_handler.rb +32 -0
  54. data/lib/nokogiri/xml/builder.rb +79 -0
  55. data/lib/nokogiri/xml/document.rb +22 -0
  56. data/lib/nokogiri/xml/node.rb +162 -0
  57. data/lib/nokogiri/xml/node_set.rb +136 -0
  58. data/lib/nokogiri/xml/reader.rb +14 -0
  59. data/lib/nokogiri/xml/sax.rb +9 -0
  60. data/lib/nokogiri/xml/sax/document.rb +59 -0
  61. data/lib/nokogiri/xml/sax/parser.rb +33 -0
  62. data/lib/nokogiri/xml/text.rb +6 -0
  63. data/lib/nokogiri/xml/xpath.rb +6 -0
  64. data/lib/nokogiri/xslt.rb +11 -0
  65. data/lib/nokogiri/xslt/stylesheet.rb +6 -0
  66. data/nokogiri.gemspec +33 -0
  67. data/test/css/test_nthiness.rb +141 -0
  68. data/test/css/test_parser.rb +214 -0
  69. data/test/css/test_tokenizer.rb +162 -0
  70. data/test/files/staff.xml +57 -0
  71. data/test/files/staff.xslt +32 -0
  72. data/test/files/tlm.html +850 -0
  73. data/test/helper.rb +70 -0
  74. data/test/hpricot/files/basic.xhtml +17 -0
  75. data/test/hpricot/files/boingboing.html +2266 -0
  76. data/test/hpricot/files/cy0.html +3653 -0
  77. data/test/hpricot/files/immob.html +400 -0
  78. data/test/hpricot/files/pace_application.html +1320 -0
  79. data/test/hpricot/files/tenderlove.html +16 -0
  80. data/test/hpricot/files/uswebgen.html +220 -0
  81. data/test/hpricot/files/utf8.html +1054 -0
  82. data/test/hpricot/files/week9.html +1723 -0
  83. data/test/hpricot/files/why.xml +19 -0
  84. data/test/hpricot/load_files.rb +7 -0
  85. data/test/hpricot/test_alter.rb +67 -0
  86. data/test/hpricot/test_builder.rb +27 -0
  87. data/test/hpricot/test_parser.rb +412 -0
  88. data/test/hpricot/test_paths.rb +15 -0
  89. data/test/hpricot/test_preserved.rb +72 -0
  90. data/test/hpricot/test_xml.rb +26 -0
  91. data/test/html/sax/test_parser.rb +27 -0
  92. data/test/html/test_builder.rb +78 -0
  93. data/test/html/test_document.rb +22 -0
  94. data/test/test_convert_xpath.rb +173 -0
  95. data/test/test_nokogiri.rb +36 -0
  96. data/test/test_reader.rb +222 -0
  97. data/test/test_xslt_transforms.rb +29 -0
  98. data/test/xml/sax/test_parser.rb +93 -0
  99. data/test/xml/test_builder.rb +16 -0
  100. data/test/xml/test_document.rb +141 -0
  101. data/test/xml/test_node.rb +148 -0
  102. data/test/xml/test_node_set.rb +54 -0
  103. data/test/xml/test_text.rb +13 -0
  104. metadata +191 -0
@@ -0,0 +1,32 @@
1
+ module Nokogiri
2
+ module XML
3
+ class BeforeHandler < Nokogiri::XML::SAX::Document # :nodoc:
4
+ def initialize node, original_html
5
+ @original_html = original_html
6
+ @node = node
7
+ @stack = []
8
+ end
9
+
10
+ def start_element name, attrs = []
11
+ return unless @original_html =~ /<#{name}/i
12
+ node = Node.new(name)
13
+ Hash[*attrs].each do |k,v|
14
+ node[k] = v
15
+ end
16
+ node.parent = @stack.last if @stack.length != 0
17
+ @stack << node
18
+ end
19
+
20
+ def characters string
21
+ node = @stack.last
22
+ node.content += string
23
+ end
24
+
25
+ def end_element name
26
+ return unless @original_html =~ /<#{name}/i
27
+ @node.add_previous_sibling @stack.last if @stack.length == 1
28
+ @stack.pop
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,79 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Builder
4
+ attr_accessor :doc, :parent
5
+ def initialize(&block)
6
+ namespace = self.class.name.split('::')
7
+ namespace[-1] = 'Document'
8
+ @doc = eval(namespace.join('::')).new
9
+ @parent = @doc
10
+ instance_eval(&block)
11
+ @parent = @doc
12
+ end
13
+
14
+ def text(string)
15
+ node = Nokogiri::XML::Text.new(string)
16
+ insert(node)
17
+ end
18
+
19
+ def cdata(string)
20
+ node = Nokogiri::XML::CData.new(@doc, string)
21
+ insert(node)
22
+ end
23
+
24
+ def to_xml
25
+ @doc.to_xml
26
+ end
27
+
28
+ def method_missing(method, *args, &block)
29
+ node = Nokogiri::XML::Node.new(method.to_s) { |n|
30
+ if content = args.first
31
+ if content.is_a?(Hash)
32
+ content.each { |k,v| n[k.to_s] = v.to_s }
33
+ else
34
+ n.content = content
35
+ end
36
+ end
37
+ }
38
+ insert(node, &block)
39
+ end
40
+
41
+ private
42
+ def insert(node, &block)
43
+ node.parent = @parent
44
+ if block_given?
45
+ @parent = node
46
+ instance_eval(&block)
47
+ @parent = node.parent
48
+ end
49
+ NodeBuilder.new(node, self)
50
+ end
51
+
52
+ class NodeBuilder # :nodoc:
53
+ def initialize(node, doc_builder)
54
+ @node = node
55
+ @doc_builder = doc_builder
56
+ end
57
+
58
+ def method_missing(method, *args, &block)
59
+ case method.to_s
60
+ when /^(.*)!$/
61
+ @node['id'] = $1
62
+ @node.content = args.first if args.first
63
+ when /^(.*)=/
64
+ @node[$1] = args.first
65
+ else
66
+ @node['class'] =
67
+ ((@node['class'] || '').split(/\s/) + [method.to_s]).join(' ')
68
+ @node.content = args.first if args.first
69
+ end
70
+ if block_given?
71
+ @doc_builder.parent = @node
72
+ return @doc_builder.instance_eval(&block)
73
+ end
74
+ self
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,22 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Document < Node
4
+ def decorators
5
+ @decorators ||= Hash.new { |h,k| h[k] = [] }
6
+ end
7
+
8
+ ###
9
+ # Apply any decorators to +node+
10
+ def decorate(node)
11
+ key = node.class.name.split('::').last.downcase
12
+ decorators[key].each do |klass|
13
+ node.extend(klass)
14
+ end
15
+ end
16
+
17
+ def to_xml
18
+ serialize
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,162 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Node
4
+ COMMENT_NODE = 8
5
+ DOCUMENT_NODE = 9
6
+ HTML_DOCUMENT_NODE = 13
7
+ DTD_NODE = 14
8
+ ELEMENT_DECL = 15
9
+ ATTRIBUTE_DECL = 16
10
+ ENTITY_DECL = 17
11
+ NAMESPACE_DECL = 18
12
+ XINCLUDE_START = 19
13
+ XINCLUDE_END = 20
14
+ DOCB_DOCUMENT_NODE = 21
15
+
16
+ def decorate!
17
+ document.decorate(self) if document
18
+ end
19
+
20
+ def children
21
+ list = NodeSet.new
22
+ list.document = document
23
+ document.decorate(list)
24
+
25
+ first = self.child
26
+ return list unless first # Empty list
27
+
28
+ list << first unless first.blank?
29
+ while first = first.next
30
+ list << first unless first.blank?
31
+ end
32
+ list
33
+ end
34
+
35
+ def find(*paths)
36
+ find_by_xpath(*(paths.map { |path|
37
+ path =~ /^(\.\/|\/)/ ? path : CSS::Parser.parse(path).map { |ast|
38
+ ast.to_xpath
39
+ }
40
+ }.flatten.uniq))
41
+ end
42
+ alias :search :find
43
+ alias :/ :find
44
+
45
+ def find_by_xpath *paths
46
+ sets = paths.map { |path|
47
+ set = XPathContext.new(self).evaluate(path).node_set
48
+ set.document = document
49
+ document.decorate(set)
50
+ set
51
+ }
52
+ return sets.first if sets.length == 1
53
+
54
+ NodeSet.new do |combined|
55
+ document.decorate(combined)
56
+ sets.each do |set|
57
+ set.each do |node|
58
+ combined << node
59
+ end
60
+ end
61
+ end
62
+ end
63
+
64
+ def find_by_css *rules
65
+ find_by_xpath(*(rules.map { |rule|
66
+ CSS::Parser.parse(rule).map { |ast| ast.to_xpath }
67
+ }.flatten.uniq))
68
+ end
69
+
70
+ def at path
71
+ search("#{path}").first
72
+ end
73
+
74
+ def [](property)
75
+ return nil unless key?(property)
76
+ get(property)
77
+ end
78
+
79
+ def next
80
+ next_sibling
81
+ end
82
+
83
+ ####
84
+ # Create nodes from +data+ and insert them before this node
85
+ # (as a sibling).
86
+ def before data
87
+ classes = document.class.name.split('::')
88
+ classes[-1] = 'SAX::Parser'
89
+
90
+ parser = eval(classes.join('::')).new(BeforeHandler.new(self, data))
91
+ parser.parse(data)
92
+ end
93
+
94
+ ####
95
+ # Create nodes from +data+ and insert them after this node
96
+ # (as a sibling).
97
+ def after data
98
+ classes = document.class.name.split('::')
99
+ classes[-1] = 'SAX::Parser'
100
+
101
+ handler = AfterHandler.new(self, data)
102
+ parser = eval(classes.join('::')).new(handler)
103
+ parser.parse(data)
104
+ handler.after_nodes.reverse.each do |sibling|
105
+ self.add_next_sibling sibling
106
+ end
107
+ end
108
+
109
+ def has_attribute?(property)
110
+ key? property
111
+ end
112
+
113
+ alias :get_attribute :[]
114
+ def set_attribute(name, value)
115
+ self[name] = value
116
+ end
117
+
118
+ def remove_attribute name
119
+ remove(name)
120
+ end
121
+
122
+ def inner_text
123
+ content
124
+ end
125
+
126
+ ####
127
+ # Set the content to +string+.
128
+ # If +encode+, encode any special characters first.
129
+ def content= string, encode = true
130
+ self.native_content = encode_special_chars(string)
131
+ end
132
+
133
+ def comment?
134
+ type == COMMENT_NODE
135
+ end
136
+
137
+ def xml?
138
+ type == DOCUMENT_NODE
139
+ end
140
+
141
+ def html?
142
+ type == HTML_DOCUMENT_NODE
143
+ end
144
+
145
+ def to_html
146
+ to_xml
147
+ end
148
+ alias :to_s :to_html
149
+ alias :inner_html :to_html
150
+
151
+ def css_path
152
+ path.split(/\//).map { |part|
153
+ part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
154
+ }.compact.join(' > ')
155
+ end
156
+
157
+ def xpath
158
+ path
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,136 @@
1
+ module Nokogiri
2
+ module XML
3
+ class NodeSet
4
+ include Enumerable
5
+
6
+ attr_accessor :document
7
+
8
+ def initialize
9
+ yield self if block_given?
10
+ end
11
+
12
+ def first
13
+ self[0]
14
+ end
15
+
16
+ def last
17
+ self[length - 1]
18
+ end
19
+
20
+ def before datum
21
+ first.before datum
22
+ end
23
+
24
+ def after datum
25
+ last.after datum
26
+ end
27
+
28
+ def << node
29
+ push(node)
30
+ end
31
+
32
+ def search path
33
+ sub_set = NodeSet.new
34
+ document.decorate(sub_set)
35
+ each do |node|
36
+ node.search(path).each do |sub_node|
37
+ sub_set << sub_node
38
+ end
39
+ end
40
+ sub_set.document = document
41
+ sub_set
42
+ end
43
+ alias :/ :search
44
+
45
+ def at path
46
+ search(path).first
47
+ end
48
+
49
+ def add_class name
50
+ each do |el|
51
+ next unless el.respond_to? :get_attribute
52
+ classes = el.get_attribute('class').to_s.split(" ")
53
+ el.set_attribute('class', classes.push(name).uniq.join(" "))
54
+ end
55
+ self
56
+ end
57
+
58
+ def remove_class name = nil
59
+ each do |el|
60
+ next unless el.respond_to? :get_attribute
61
+ if name
62
+ classes = el.get_attribute('class').to_s.split(" ")
63
+ el.set_attribute('class', (classes - [name]).uniq.join(" "))
64
+ else
65
+ el.remove_attribute("class")
66
+ end
67
+ end
68
+ self
69
+ end
70
+
71
+ def attr key, value = nil, &blk
72
+ if value or blk
73
+ each do |el|
74
+ el.set_attribute(key, value || blk[el])
75
+ end
76
+ return self
77
+ end
78
+ if key.is_a? Hash
79
+ key.each { |k,v| self.attr(k,v) }
80
+ return self
81
+ else
82
+ return self[0].get_attribute(key)
83
+ end
84
+ end
85
+ alias_method :set, :attr
86
+
87
+ def remove_attr name
88
+ each do |el|
89
+ next unless el.respond_to? :remove_attribute
90
+ el.remove_attribute(name)
91
+ end
92
+ self
93
+ end
94
+
95
+ ###
96
+ # Iterate over each node, yielding to +block+
97
+ def each(&block)
98
+ x = 0
99
+ while x < length
100
+ yield self[x]
101
+ x += 1
102
+ end
103
+ end
104
+
105
+ def inner_text
106
+ collect{|j| j.inner_text}.join('')
107
+ end
108
+ alias :text :inner_text
109
+
110
+ def wrap(html, &blk)
111
+ each do |j|
112
+ new_parent = Nokogiri.make(html, &blk)
113
+ j.replace(new_parent)
114
+ nest = new_parent
115
+ if nest.child
116
+ nest = nest.child until nest.child.nil?
117
+ end
118
+ j.parent = nest
119
+ end
120
+ self
121
+ end
122
+
123
+ def to_s
124
+ map { |x| x.to_s }.join
125
+ end
126
+
127
+ def to_html
128
+ map { |x| x.to_html }.join('')
129
+ end
130
+
131
+ def size
132
+ length
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,14 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Reader
4
+ include Enumerable
5
+
6
+ def each(&block)
7
+ while node = self.read
8
+ block.call(node)
9
+ end
10
+ end
11
+ private :initialize
12
+ end
13
+ end
14
+ end