superfeedr-nokogiri 1.4.0.20091116183308

Sign up to get free protection for your applications and to get access to all the features.
Files changed (270) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +330 -0
  3. data/CHANGELOG.rdoc +314 -0
  4. data/Manifest.txt +269 -0
  5. data/README.ja.rdoc +105 -0
  6. data/README.rdoc +118 -0
  7. data/Rakefile +244 -0
  8. data/bin/nokogiri +49 -0
  9. data/ext/nokogiri/extconf.rb +145 -0
  10. data/ext/nokogiri/html_document.c +145 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +32 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  17. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  18. data/ext/nokogiri/nokogiri.c +89 -0
  19. data/ext/nokogiri/nokogiri.h +145 -0
  20. data/ext/nokogiri/xml_attr.c +92 -0
  21. data/ext/nokogiri/xml_attr.h +9 -0
  22. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  23. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  24. data/ext/nokogiri/xml_cdata.c +54 -0
  25. data/ext/nokogiri/xml_cdata.h +9 -0
  26. data/ext/nokogiri/xml_comment.c +52 -0
  27. data/ext/nokogiri/xml_comment.h +9 -0
  28. data/ext/nokogiri/xml_document.c +388 -0
  29. data/ext/nokogiri/xml_document.h +24 -0
  30. data/ext/nokogiri/xml_document_fragment.c +46 -0
  31. data/ext/nokogiri/xml_document_fragment.h +10 -0
  32. data/ext/nokogiri/xml_dtd.c +192 -0
  33. data/ext/nokogiri/xml_dtd.h +10 -0
  34. data/ext/nokogiri/xml_element_content.c +123 -0
  35. data/ext/nokogiri/xml_element_content.h +10 -0
  36. data/ext/nokogiri/xml_element_decl.c +69 -0
  37. data/ext/nokogiri/xml_element_decl.h +9 -0
  38. data/ext/nokogiri/xml_entity_decl.c +97 -0
  39. data/ext/nokogiri/xml_entity_decl.h +10 -0
  40. data/ext/nokogiri/xml_entity_reference.c +50 -0
  41. data/ext/nokogiri/xml_entity_reference.h +9 -0
  42. data/ext/nokogiri/xml_io.c +31 -0
  43. data/ext/nokogiri/xml_io.h +11 -0
  44. data/ext/nokogiri/xml_namespace.c +74 -0
  45. data/ext/nokogiri/xml_namespace.h +12 -0
  46. data/ext/nokogiri/xml_node.c +1060 -0
  47. data/ext/nokogiri/xml_node.h +13 -0
  48. data/ext/nokogiri/xml_node_set.c +397 -0
  49. data/ext/nokogiri/xml_node_set.h +9 -0
  50. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  51. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  52. data/ext/nokogiri/xml_reader.c +593 -0
  53. data/ext/nokogiri/xml_reader.h +10 -0
  54. data/ext/nokogiri/xml_relax_ng.c +159 -0
  55. data/ext/nokogiri/xml_relax_ng.h +9 -0
  56. data/ext/nokogiri/xml_sax_parser.c +286 -0
  57. data/ext/nokogiri/xml_sax_parser.h +43 -0
  58. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  59. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  60. data/ext/nokogiri/xml_sax_push_parser.c +114 -0
  61. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  62. data/ext/nokogiri/xml_schema.c +156 -0
  63. data/ext/nokogiri/xml_schema.h +9 -0
  64. data/ext/nokogiri/xml_syntax_error.c +261 -0
  65. data/ext/nokogiri/xml_syntax_error.h +13 -0
  66. data/ext/nokogiri/xml_text.c +48 -0
  67. data/ext/nokogiri/xml_text.h +9 -0
  68. data/ext/nokogiri/xml_xpath.c +53 -0
  69. data/ext/nokogiri/xml_xpath.h +11 -0
  70. data/ext/nokogiri/xml_xpath_context.c +239 -0
  71. data/ext/nokogiri/xml_xpath_context.h +9 -0
  72. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  73. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  74. data/lib/nokogiri.rb +116 -0
  75. data/lib/nokogiri/css.rb +25 -0
  76. data/lib/nokogiri/css/generated_parser.rb +646 -0
  77. data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
  78. data/lib/nokogiri/css/node.rb +99 -0
  79. data/lib/nokogiri/css/parser.rb +82 -0
  80. data/lib/nokogiri/css/parser.y +227 -0
  81. data/lib/nokogiri/css/syntax_error.rb +7 -0
  82. data/lib/nokogiri/css/tokenizer.rb +7 -0
  83. data/lib/nokogiri/css/tokenizer.rex +54 -0
  84. data/lib/nokogiri/css/xpath_visitor.rb +162 -0
  85. data/lib/nokogiri/decorators/slop.rb +33 -0
  86. data/lib/nokogiri/ffi/html/document.rb +28 -0
  87. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  88. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  89. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  90. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  91. data/lib/nokogiri/ffi/libxml.rb +356 -0
  92. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  93. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  94. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  95. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  96. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  97. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  98. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  100. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  101. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  102. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  103. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  104. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  105. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  106. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  107. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  108. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  109. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  110. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  111. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  112. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
  113. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  114. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  115. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  116. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  117. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  118. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  119. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  120. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  121. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  122. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  123. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  124. data/lib/nokogiri/ffi/xml/document.rb +135 -0
  125. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  126. data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
  127. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  128. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  129. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  130. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  131. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  132. data/lib/nokogiri/ffi/xml/node.rb +444 -0
  133. data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
  134. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  135. data/lib/nokogiri/ffi/xml/reader.rb +227 -0
  136. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  137. data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
  138. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  139. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
  140. data/lib/nokogiri/ffi/xml/schema.rb +92 -0
  141. data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
  142. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  143. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  144. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  145. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  146. data/lib/nokogiri/html.rb +35 -0
  147. data/lib/nokogiri/html/builder.rb +35 -0
  148. data/lib/nokogiri/html/document.rb +88 -0
  149. data/lib/nokogiri/html/document_fragment.rb +15 -0
  150. data/lib/nokogiri/html/element_description.rb +23 -0
  151. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  152. data/lib/nokogiri/html/sax/parser.rb +48 -0
  153. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  154. data/lib/nokogiri/syntax_error.rb +4 -0
  155. data/lib/nokogiri/version.rb +33 -0
  156. data/lib/nokogiri/version_warning.rb +11 -0
  157. data/lib/nokogiri/xml.rb +67 -0
  158. data/lib/nokogiri/xml/attr.rb +14 -0
  159. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  160. data/lib/nokogiri/xml/builder.rb +405 -0
  161. data/lib/nokogiri/xml/cdata.rb +11 -0
  162. data/lib/nokogiri/xml/character_data.rb +7 -0
  163. data/lib/nokogiri/xml/document.rb +131 -0
  164. data/lib/nokogiri/xml/document_fragment.rb +69 -0
  165. data/lib/nokogiri/xml/dtd.rb +11 -0
  166. data/lib/nokogiri/xml/element_content.rb +36 -0
  167. data/lib/nokogiri/xml/element_decl.rb +13 -0
  168. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  169. data/lib/nokogiri/xml/fragment_handler.rb +71 -0
  170. data/lib/nokogiri/xml/namespace.rb +13 -0
  171. data/lib/nokogiri/xml/node.rb +665 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  173. data/lib/nokogiri/xml/node_set.rb +307 -0
  174. data/lib/nokogiri/xml/notation.rb +6 -0
  175. data/lib/nokogiri/xml/parse_options.rb +85 -0
  176. data/lib/nokogiri/xml/pp.rb +2 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  178. data/lib/nokogiri/xml/pp/node.rb +56 -0
  179. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  180. data/lib/nokogiri/xml/reader.rb +74 -0
  181. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  182. data/lib/nokogiri/xml/sax.rb +4 -0
  183. data/lib/nokogiri/xml/sax/document.rb +160 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  187. data/lib/nokogiri/xml/schema.rb +61 -0
  188. data/lib/nokogiri/xml/syntax_error.rb +38 -0
  189. data/lib/nokogiri/xml/xpath.rb +10 -0
  190. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  191. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  192. data/lib/nokogiri/xslt.rb +48 -0
  193. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  194. data/lib/xsd/xmlparser/nokogiri.rb +71 -0
  195. data/tasks/test.rb +100 -0
  196. data/test/css/test_nthiness.rb +159 -0
  197. data/test/css/test_parser.rb +277 -0
  198. data/test/css/test_tokenizer.rb +183 -0
  199. data/test/css/test_xpath_visitor.rb +76 -0
  200. data/test/ffi/test_document.rb +35 -0
  201. data/test/files/2ch.html +108 -0
  202. data/test/files/address_book.rlx +12 -0
  203. data/test/files/address_book.xml +10 -0
  204. data/test/files/bar/bar.xsd +4 -0
  205. data/test/files/dont_hurt_em_why.xml +422 -0
  206. data/test/files/exslt.xml +8 -0
  207. data/test/files/exslt.xslt +35 -0
  208. data/test/files/foo/foo.xsd +4 -0
  209. data/test/files/po.xml +32 -0
  210. data/test/files/po.xsd +66 -0
  211. data/test/files/shift_jis.html +10 -0
  212. data/test/files/shift_jis.xml +5 -0
  213. data/test/files/snuggles.xml +3 -0
  214. data/test/files/staff.dtd +10 -0
  215. data/test/files/staff.xml +59 -0
  216. data/test/files/staff.xslt +32 -0
  217. data/test/files/tlm.html +850 -0
  218. data/test/files/valid_bar.xml +2 -0
  219. data/test/helper.rb +136 -0
  220. data/test/html/sax/test_parser.rb +64 -0
  221. data/test/html/sax/test_parser_context.rb +48 -0
  222. data/test/html/test_builder.rb +164 -0
  223. data/test/html/test_document.rb +390 -0
  224. data/test/html/test_document_encoding.rb +77 -0
  225. data/test/html/test_document_fragment.rb +132 -0
  226. data/test/html/test_element_description.rb +94 -0
  227. data/test/html/test_named_characters.rb +14 -0
  228. data/test/html/test_node.rb +228 -0
  229. data/test/html/test_node_encoding.rb +27 -0
  230. data/test/test_convert_xpath.rb +135 -0
  231. data/test/test_css_cache.rb +45 -0
  232. data/test/test_gc.rb +15 -0
  233. data/test/test_memory_leak.rb +77 -0
  234. data/test/test_nokogiri.rb +134 -0
  235. data/test/test_reader.rb +358 -0
  236. data/test/test_xslt_transforms.rb +131 -0
  237. data/test/xml/node/test_save_options.rb +20 -0
  238. data/test/xml/node/test_subclass.rb +44 -0
  239. data/test/xml/sax/test_parser.rb +307 -0
  240. data/test/xml/sax/test_parser_context.rb +56 -0
  241. data/test/xml/sax/test_push_parser.rb +131 -0
  242. data/test/xml/test_attr.rb +38 -0
  243. data/test/xml/test_attribute_decl.rb +82 -0
  244. data/test/xml/test_builder.rb +167 -0
  245. data/test/xml/test_cdata.rb +38 -0
  246. data/test/xml/test_comment.rb +29 -0
  247. data/test/xml/test_document.rb +607 -0
  248. data/test/xml/test_document_encoding.rb +26 -0
  249. data/test/xml/test_document_fragment.rb +138 -0
  250. data/test/xml/test_dtd.rb +82 -0
  251. data/test/xml/test_dtd_encoding.rb +33 -0
  252. data/test/xml/test_element_content.rb +56 -0
  253. data/test/xml/test_element_decl.rb +73 -0
  254. data/test/xml/test_entity_decl.rb +83 -0
  255. data/test/xml/test_entity_reference.rb +21 -0
  256. data/test/xml/test_namespace.rb +68 -0
  257. data/test/xml/test_node.rb +889 -0
  258. data/test/xml/test_node_attributes.rb +34 -0
  259. data/test/xml/test_node_encoding.rb +107 -0
  260. data/test/xml/test_node_set.rb +531 -0
  261. data/test/xml/test_parse_options.rb +52 -0
  262. data/test/xml/test_processing_instruction.rb +30 -0
  263. data/test/xml/test_reader_encoding.rb +126 -0
  264. data/test/xml/test_relax_ng.rb +60 -0
  265. data/test/xml/test_schema.rb +89 -0
  266. data/test/xml/test_syntax_error.rb +27 -0
  267. data/test/xml/test_text.rb +30 -0
  268. data/test/xml/test_unparented_node.rb +381 -0
  269. data/test/xml/test_xpath.rb +106 -0
  270. metadata +430 -0
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_PROCESSING_INSTRUCTION
2
+ #define NOKOGIRI_XML_PROCESSING_INSTRUCTION
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_processing_instruction();
7
+
8
+ extern VALUE cNokogiriXmlProcessingInstruction;
9
+ #endif
@@ -0,0 +1,593 @@
1
+ #include <xml_reader.h>
2
+
3
+ static void dealloc(xmlTextReaderPtr reader)
4
+ {
5
+ NOKOGIRI_DEBUG_START(reader);
6
+ xmlFreeTextReader(reader);
7
+ NOKOGIRI_DEBUG_END(reader);
8
+ }
9
+
10
+ static int has_attributes(xmlTextReaderPtr reader)
11
+ {
12
+ /*
13
+ * this implementation of xmlTextReaderHasAttributes explicitly includes
14
+ * namespaces and properties, because some earlier versions ignore
15
+ * namespaces.
16
+ */
17
+ xmlNodePtr node ;
18
+ node = xmlTextReaderCurrentNode(reader);
19
+ if (node == NULL)
20
+ return(0);
21
+
22
+ if ((node->type == XML_ELEMENT_NODE) &&
23
+ ((node->properties != NULL) || (node->nsDef != NULL)))
24
+ return(1);
25
+ return(0);
26
+ }
27
+
28
+ #define XMLNS_PREFIX "xmlns"
29
+ #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
30
+ #define XMLNS_BUFFER_LEN 128
31
+ static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
32
+ {
33
+ xmlNsPtr ns;
34
+ static char buffer[XMLNS_BUFFER_LEN] ;
35
+ char *key ;
36
+ size_t keylen ;
37
+
38
+ if (node->type != XML_ELEMENT_NODE) return ;
39
+
40
+ ns = node->nsDef;
41
+ while (ns != NULL) {
42
+
43
+ keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ;
44
+ if (keylen > XMLNS_BUFFER_LEN) {
45
+ key = (char*)malloc(keylen) ;
46
+ } else {
47
+ key = buffer ;
48
+ }
49
+
50
+ if (ns->prefix) {
51
+ sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix);
52
+ } else {
53
+ sprintf(key, "%s", XMLNS_PREFIX);
54
+ }
55
+
56
+ rb_hash_aset(attr_hash,
57
+ NOKOGIRI_STR_NEW2(key),
58
+ (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
59
+ );
60
+ if (key != buffer) {
61
+ free(key);
62
+ }
63
+ ns = ns->next ;
64
+ }
65
+ }
66
+
67
+
68
+ /*
69
+ * call-seq:
70
+ * default?
71
+ *
72
+ * Was an attribute generated from the default value in the DTD or schema?
73
+ */
74
+ static VALUE default_eh(VALUE self)
75
+ {
76
+ xmlTextReaderPtr reader;
77
+ Data_Get_Struct(self, xmlTextReader, reader);
78
+ int eh = xmlTextReaderIsDefault(reader);
79
+ if(eh == 0) return Qfalse;
80
+ if(eh == 1) return Qtrue;
81
+
82
+ return Qnil;
83
+ }
84
+
85
+ /*
86
+ * call-seq:
87
+ * value?
88
+ *
89
+ * Does this node have a text value?
90
+ */
91
+ static VALUE value_eh(VALUE self)
92
+ {
93
+ xmlTextReaderPtr reader;
94
+ Data_Get_Struct(self, xmlTextReader, reader);
95
+ int eh = xmlTextReaderHasValue(reader);
96
+ if(eh == 0) return Qfalse;
97
+ if(eh == 1) return Qtrue;
98
+
99
+ return Qnil;
100
+ }
101
+
102
+ /*
103
+ * call-seq:
104
+ * attributes?
105
+ *
106
+ * Does this node have attributes?
107
+ */
108
+ static VALUE attributes_eh(VALUE self)
109
+ {
110
+ xmlTextReaderPtr reader;
111
+ Data_Get_Struct(self, xmlTextReader, reader);
112
+ int eh = has_attributes(reader);
113
+ if(eh == 0) return Qfalse;
114
+ if(eh == 1) return Qtrue;
115
+
116
+ return Qnil;
117
+ }
118
+
119
+ /*
120
+ * call-seq:
121
+ * namespaces
122
+ *
123
+ * Get a hash of namespaces for this Node
124
+ */
125
+ static VALUE namespaces(VALUE self)
126
+ {
127
+ xmlTextReaderPtr reader;
128
+ VALUE attr ;
129
+
130
+ Data_Get_Struct(self, xmlTextReader, reader);
131
+
132
+ attr = rb_hash_new() ;
133
+
134
+ if (! has_attributes(reader))
135
+ return attr ;
136
+
137
+ xmlNodePtr ptr = xmlTextReaderExpand(reader);
138
+ if(ptr == NULL) return Qnil;
139
+
140
+ Nokogiri_xml_node_namespaces(ptr, attr);
141
+
142
+ return attr ;
143
+ }
144
+
145
+ /*
146
+ * call-seq:
147
+ * attribute_nodes
148
+ *
149
+ * Get a list of attributes for this Node
150
+ */
151
+ static VALUE attribute_nodes(VALUE self)
152
+ {
153
+ xmlTextReaderPtr reader;
154
+ VALUE attr ;
155
+
156
+ Data_Get_Struct(self, xmlTextReader, reader);
157
+
158
+ attr = rb_ary_new() ;
159
+
160
+ if (! has_attributes(reader))
161
+ return attr ;
162
+
163
+ xmlNodePtr ptr = xmlTextReaderExpand(reader);
164
+ if(ptr == NULL) return Qnil;
165
+
166
+ Nokogiri_xml_node_properties(ptr, attr);
167
+
168
+ return attr ;
169
+ }
170
+
171
+ /*
172
+ * call-seq:
173
+ * attribute_at(index)
174
+ *
175
+ * Get the value of attribute at +index+
176
+ */
177
+ static VALUE attribute_at(VALUE self, VALUE index)
178
+ {
179
+ xmlTextReaderPtr reader;
180
+ Data_Get_Struct(self, xmlTextReader, reader);
181
+
182
+ if(NIL_P(index)) return Qnil;
183
+ index = rb_Integer(index);
184
+
185
+ xmlChar * value = xmlTextReaderGetAttributeNo(
186
+ reader,
187
+ NUM2INT(index)
188
+ );
189
+ if(value == NULL) return Qnil;
190
+
191
+ VALUE rb_value = NOKOGIRI_STR_NEW2(value);
192
+ xmlFree(value);
193
+ return rb_value;
194
+ }
195
+
196
+ /*
197
+ * call-seq:
198
+ * attribute(name)
199
+ *
200
+ * Get the value of attribute named +name+
201
+ */
202
+ static VALUE reader_attribute(VALUE self, VALUE name)
203
+ {
204
+ xmlTextReaderPtr reader;
205
+ xmlChar *value ;
206
+ Data_Get_Struct(self, xmlTextReader, reader);
207
+
208
+ if(NIL_P(name)) return Qnil;
209
+ name = StringValue(name) ;
210
+
211
+ value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValuePtr(name));
212
+ if(value == NULL) {
213
+ /* this section is an attempt to workaround older versions of libxml that
214
+ don't handle namespaces properly in all attribute-and-friends functions */
215
+ xmlChar *prefix = NULL ;
216
+ xmlChar *localname = xmlSplitQName2((xmlChar*)StringValuePtr(name), &prefix);
217
+ if (localname != NULL) {
218
+ value = xmlTextReaderLookupNamespace(reader, localname);
219
+ xmlFree(localname) ;
220
+ } else {
221
+ value = xmlTextReaderLookupNamespace(reader, prefix);
222
+ }
223
+ xmlFree(prefix);
224
+ }
225
+ if(value == NULL) return Qnil;
226
+
227
+ VALUE rb_value = NOKOGIRI_STR_NEW2(value);
228
+ xmlFree(value);
229
+ return rb_value;
230
+ }
231
+
232
+ /*
233
+ * call-seq:
234
+ * attribute_count
235
+ *
236
+ * Get the number of attributes for the current node
237
+ */
238
+ static VALUE attribute_count(VALUE self)
239
+ {
240
+ xmlTextReaderPtr reader;
241
+ Data_Get_Struct(self, xmlTextReader, reader);
242
+ int count = xmlTextReaderAttributeCount(reader);
243
+ if(count == -1) return Qnil;
244
+
245
+ return INT2NUM((long)count);
246
+ }
247
+
248
+ /*
249
+ * call-seq:
250
+ * depth
251
+ *
252
+ * Get the depth of the node
253
+ */
254
+ static VALUE depth(VALUE self)
255
+ {
256
+ xmlTextReaderPtr reader;
257
+ Data_Get_Struct(self, xmlTextReader, reader);
258
+ int depth = xmlTextReaderDepth(reader);
259
+ if(depth == -1) return Qnil;
260
+
261
+ return INT2NUM((long)depth);
262
+ }
263
+
264
+ /*
265
+ * call-seq:
266
+ * xml_version
267
+ *
268
+ * Get the XML version of the document being read
269
+ */
270
+ static VALUE xml_version(VALUE self)
271
+ {
272
+ xmlTextReaderPtr reader;
273
+ Data_Get_Struct(self, xmlTextReader, reader);
274
+ const char * version = (const char *)xmlTextReaderConstXmlVersion(reader);
275
+ if(version == NULL) return Qnil;
276
+
277
+ return NOKOGIRI_STR_NEW2(version);
278
+ }
279
+
280
+ /*
281
+ * call-seq:
282
+ * lang
283
+ *
284
+ * Get the xml:lang scope within which the node resides.
285
+ */
286
+ static VALUE lang(VALUE self)
287
+ {
288
+ xmlTextReaderPtr reader;
289
+ Data_Get_Struct(self, xmlTextReader, reader);
290
+ const char * lang = (const char *)xmlTextReaderConstXmlLang(reader);
291
+ if(lang == NULL) return Qnil;
292
+
293
+ return NOKOGIRI_STR_NEW2(lang);
294
+ }
295
+
296
+ /*
297
+ * call-seq:
298
+ * value
299
+ *
300
+ * Get the text value of the node if present
301
+ */
302
+ static VALUE value(VALUE self)
303
+ {
304
+ xmlTextReaderPtr reader;
305
+ Data_Get_Struct(self, xmlTextReader, reader);
306
+ const char * value = (const char *)xmlTextReaderConstValue(reader);
307
+ if(value == NULL) return Qnil;
308
+
309
+ return NOKOGIRI_STR_NEW2(value);
310
+ }
311
+
312
+ /*
313
+ * call-seq:
314
+ * prefix
315
+ *
316
+ * Get the shorthand reference to the namespace associated with the node.
317
+ */
318
+ static VALUE prefix(VALUE self)
319
+ {
320
+ xmlTextReaderPtr reader;
321
+ Data_Get_Struct(self, xmlTextReader, reader);
322
+ const char * prefix = (const char *)xmlTextReaderConstPrefix(reader);
323
+ if(prefix == NULL) return Qnil;
324
+
325
+ return NOKOGIRI_STR_NEW2(prefix);
326
+ }
327
+
328
+ /*
329
+ * call-seq:
330
+ * namespace_uri
331
+ *
332
+ * Get the URI defining the namespace associated with the node
333
+ */
334
+ static VALUE namespace_uri(VALUE self)
335
+ {
336
+ xmlTextReaderPtr reader;
337
+ Data_Get_Struct(self, xmlTextReader, reader);
338
+ const char * uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
339
+ if(uri == NULL) return Qnil;
340
+
341
+ return NOKOGIRI_STR_NEW2(uri);
342
+ }
343
+
344
+ /*
345
+ * call-seq:
346
+ * local_name
347
+ *
348
+ * Get the local name of the node
349
+ */
350
+ static VALUE local_name(VALUE self)
351
+ {
352
+ xmlTextReaderPtr reader;
353
+ Data_Get_Struct(self, xmlTextReader, reader);
354
+ const char * name = (const char *)xmlTextReaderConstLocalName(reader);
355
+ if(name == NULL) return Qnil;
356
+
357
+ return NOKOGIRI_STR_NEW2(name);
358
+ }
359
+
360
+ /*
361
+ * call-seq:
362
+ * name
363
+ *
364
+ * Get the name of the node
365
+ */
366
+ static VALUE name(VALUE self)
367
+ {
368
+ xmlTextReaderPtr reader;
369
+ Data_Get_Struct(self, xmlTextReader, reader);
370
+ const char * name = (const char *)xmlTextReaderConstName(reader);
371
+ if(name == NULL) return Qnil;
372
+
373
+ return NOKOGIRI_STR_NEW2(name);
374
+ }
375
+
376
+ /*
377
+ * call-seq:
378
+ * state
379
+ *
380
+ * Get the state of the reader
381
+ */
382
+ static VALUE state(VALUE self)
383
+ {
384
+ xmlTextReaderPtr reader;
385
+ Data_Get_Struct(self, xmlTextReader, reader);
386
+ return INT2NUM((long)xmlTextReaderReadState(reader));
387
+ }
388
+
389
+ /*
390
+ * call-seq:
391
+ * node_type
392
+ *
393
+ * Get the type of readers current node
394
+ */
395
+ static VALUE node_type(VALUE self)
396
+ {
397
+ xmlTextReaderPtr reader;
398
+ Data_Get_Struct(self, xmlTextReader, reader);
399
+ return INT2NUM((long)xmlTextReaderNodeType(reader));
400
+ }
401
+
402
+ /*
403
+ * call-seq:
404
+ * read
405
+ *
406
+ * Move the Reader forward through the XML document.
407
+ */
408
+ static VALUE read_more(VALUE self)
409
+ {
410
+ xmlTextReaderPtr reader;
411
+ Data_Get_Struct(self, xmlTextReader, reader);
412
+
413
+ VALUE error_list = rb_funcall(self, rb_intern("errors"), 0);
414
+
415
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
416
+ int ret = xmlTextReaderRead(reader);
417
+ xmlSetStructuredErrorFunc(NULL, NULL);
418
+
419
+ if(ret == 1) return self;
420
+ if(ret == 0) return Qnil;
421
+
422
+ xmlErrorPtr error = xmlGetLastError();
423
+ if(error)
424
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
425
+ else
426
+ rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
427
+
428
+ return Qnil;
429
+ }
430
+
431
+ /*
432
+ * call-seq:
433
+ * inner_xml
434
+ *
435
+ * Read the contents of the current node, including child nodes and markup.
436
+ */
437
+ static VALUE inner_xml(VALUE self)
438
+ {
439
+ xmlTextReaderPtr reader;
440
+ Data_Get_Struct(self, xmlTextReader, reader);
441
+
442
+ const char * value = (const char *)xmlTextReaderReadInnerXml(reader);
443
+
444
+ if(value == NULL)
445
+ return Qnil;
446
+ else
447
+ return NOKOGIRI_STR_NEW2(value);
448
+ }
449
+
450
+ /*
451
+ * call-seq:
452
+ * outer_xml
453
+ *
454
+ * Read the current node and its contents, including child nodes and markup.
455
+ */
456
+ static VALUE outer_xml(VALUE self)
457
+ {
458
+ xmlTextReaderPtr reader;
459
+ Data_Get_Struct(self, xmlTextReader, reader);
460
+
461
+ const char * value = (const char *)xmlTextReaderReadOuterXml(reader);
462
+
463
+ if(value == NULL)
464
+ return Qnil;
465
+ else
466
+ return NOKOGIRI_STR_NEW2(value);
467
+ }
468
+
469
+ /*
470
+ * call-seq:
471
+ * from_memory(string, url = nil, encoding = nil, options = 0)
472
+ *
473
+ * Create a new reader that parses +string+
474
+ */
475
+ static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
476
+ {
477
+ VALUE rb_buffer, rb_url, encoding, rb_options;
478
+
479
+ const char * c_url = NULL;
480
+ const char * c_encoding = NULL;
481
+ int c_options = 0;
482
+
483
+ rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
484
+
485
+ if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil");
486
+ if (RTEST(rb_url)) c_url = StringValuePtr(rb_url);
487
+ if (RTEST(encoding)) c_encoding = StringValuePtr(encoding);
488
+ if (RTEST(rb_options)) c_options = NUM2INT(rb_options);
489
+
490
+ xmlTextReaderPtr reader = xmlReaderForMemory(
491
+ StringValuePtr(rb_buffer),
492
+ RSTRING_LEN(rb_buffer),
493
+ c_url,
494
+ c_encoding,
495
+ c_options
496
+ );
497
+
498
+ if(reader == NULL) {
499
+ xmlFreeTextReader(reader);
500
+ rb_raise(rb_eRuntimeError, "couldn't create a parser");
501
+ }
502
+
503
+ VALUE rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
504
+ VALUE args[3] = {rb_buffer, rb_url, encoding};
505
+ rb_obj_call_init(rb_reader, 3, args);
506
+
507
+ return rb_reader;
508
+ }
509
+
510
+ /*
511
+ * call-seq:
512
+ * from_io(io, url = nil, encoding = nil, options = 0)
513
+ *
514
+ * Create a new reader that parses +io+
515
+ */
516
+ static VALUE from_io(int argc, VALUE *argv, VALUE klass)
517
+ {
518
+ VALUE rb_io, rb_url, encoding, rb_options;
519
+
520
+ const char * c_url = NULL;
521
+ const char * c_encoding = NULL;
522
+ int c_options = 0;
523
+
524
+ rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
525
+
526
+ if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil");
527
+ if (RTEST(rb_url)) c_url = StringValuePtr(rb_url);
528
+ if (RTEST(encoding)) c_encoding = StringValuePtr(encoding);
529
+ if (RTEST(rb_options)) c_options = NUM2INT(rb_options);
530
+
531
+ xmlTextReaderPtr reader = xmlReaderForIO(
532
+ (xmlInputReadCallback)io_read_callback,
533
+ (xmlInputCloseCallback)io_close_callback,
534
+ (void *)rb_io,
535
+ c_url,
536
+ c_encoding,
537
+ c_options
538
+ );
539
+
540
+ if(reader == NULL) {
541
+ xmlFreeTextReader(reader);
542
+ rb_raise(rb_eRuntimeError, "couldn't create a parser");
543
+ }
544
+
545
+ VALUE rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
546
+ VALUE args[3] = {rb_io, rb_url, encoding};
547
+ rb_obj_call_init(rb_reader, 3, args);
548
+
549
+ return rb_reader;
550
+ }
551
+
552
+ VALUE cNokogiriXmlReader;
553
+
554
+ void init_xml_reader()
555
+ {
556
+ VALUE module = rb_define_module("Nokogiri");
557
+ VALUE xml = rb_define_module_under(module, "XML");
558
+
559
+ /*
560
+ * The Reader parser allows you to effectively pull parse an XML document.
561
+ * Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
562
+ * node. Note that you may only iterate over the document once!
563
+ */
564
+ VALUE klass = rb_define_class_under(xml, "Reader", rb_cObject);
565
+
566
+ cNokogiriXmlReader = klass;
567
+
568
+ rb_define_singleton_method(klass, "from_memory", from_memory, -1);
569
+ rb_define_singleton_method(klass, "from_io", from_io, -1);
570
+
571
+ rb_define_method(klass, "read", read_more, 0);
572
+ rb_define_method(klass, "inner_xml", inner_xml, 0);
573
+ rb_define_method(klass, "outer_xml", outer_xml, 0);
574
+ rb_define_method(klass, "state", state, 0);
575
+ rb_define_method(klass, "node_type", node_type, 0);
576
+ rb_define_method(klass, "name", name, 0);
577
+ rb_define_method(klass, "local_name", local_name, 0);
578
+ rb_define_method(klass, "namespace_uri", namespace_uri, 0);
579
+ rb_define_method(klass, "prefix", prefix, 0);
580
+ rb_define_method(klass, "value", value, 0);
581
+ rb_define_method(klass, "lang", lang, 0);
582
+ rb_define_method(klass, "xml_version", xml_version, 0);
583
+ rb_define_method(klass, "depth", depth, 0);
584
+ rb_define_method(klass, "attribute_count", attribute_count, 0);
585
+ rb_define_method(klass, "attribute", reader_attribute, 1);
586
+ rb_define_method(klass, "namespaces", namespaces, 0);
587
+ rb_define_method(klass, "attribute_at", attribute_at, 1);
588
+ rb_define_method(klass, "attributes?", attributes_eh, 0);
589
+ rb_define_method(klass, "value?", value_eh, 0);
590
+ rb_define_method(klass, "default?", default_eh, 0);
591
+
592
+ rb_define_private_method(klass, "attr_nodes", attribute_nodes, 0);
593
+ }