superfeedr-nokogiri 1.4.0.20091116183308

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +330 -0
  3. data/CHANGELOG.rdoc +314 -0
  4. data/Manifest.txt +269 -0
  5. data/README.ja.rdoc +105 -0
  6. data/README.rdoc +118 -0
  7. data/Rakefile +244 -0
  8. data/bin/nokogiri +49 -0
  9. data/ext/nokogiri/extconf.rb +145 -0
  10. data/ext/nokogiri/html_document.c +145 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +32 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  17. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  18. data/ext/nokogiri/nokogiri.c +89 -0
  19. data/ext/nokogiri/nokogiri.h +145 -0
  20. data/ext/nokogiri/xml_attr.c +92 -0
  21. data/ext/nokogiri/xml_attr.h +9 -0
  22. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  23. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  24. data/ext/nokogiri/xml_cdata.c +54 -0
  25. data/ext/nokogiri/xml_cdata.h +9 -0
  26. data/ext/nokogiri/xml_comment.c +52 -0
  27. data/ext/nokogiri/xml_comment.h +9 -0
  28. data/ext/nokogiri/xml_document.c +388 -0
  29. data/ext/nokogiri/xml_document.h +24 -0
  30. data/ext/nokogiri/xml_document_fragment.c +46 -0
  31. data/ext/nokogiri/xml_document_fragment.h +10 -0
  32. data/ext/nokogiri/xml_dtd.c +192 -0
  33. data/ext/nokogiri/xml_dtd.h +10 -0
  34. data/ext/nokogiri/xml_element_content.c +123 -0
  35. data/ext/nokogiri/xml_element_content.h +10 -0
  36. data/ext/nokogiri/xml_element_decl.c +69 -0
  37. data/ext/nokogiri/xml_element_decl.h +9 -0
  38. data/ext/nokogiri/xml_entity_decl.c +97 -0
  39. data/ext/nokogiri/xml_entity_decl.h +10 -0
  40. data/ext/nokogiri/xml_entity_reference.c +50 -0
  41. data/ext/nokogiri/xml_entity_reference.h +9 -0
  42. data/ext/nokogiri/xml_io.c +31 -0
  43. data/ext/nokogiri/xml_io.h +11 -0
  44. data/ext/nokogiri/xml_namespace.c +74 -0
  45. data/ext/nokogiri/xml_namespace.h +12 -0
  46. data/ext/nokogiri/xml_node.c +1060 -0
  47. data/ext/nokogiri/xml_node.h +13 -0
  48. data/ext/nokogiri/xml_node_set.c +397 -0
  49. data/ext/nokogiri/xml_node_set.h +9 -0
  50. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  51. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  52. data/ext/nokogiri/xml_reader.c +593 -0
  53. data/ext/nokogiri/xml_reader.h +10 -0
  54. data/ext/nokogiri/xml_relax_ng.c +159 -0
  55. data/ext/nokogiri/xml_relax_ng.h +9 -0
  56. data/ext/nokogiri/xml_sax_parser.c +286 -0
  57. data/ext/nokogiri/xml_sax_parser.h +43 -0
  58. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  59. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  60. data/ext/nokogiri/xml_sax_push_parser.c +114 -0
  61. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  62. data/ext/nokogiri/xml_schema.c +156 -0
  63. data/ext/nokogiri/xml_schema.h +9 -0
  64. data/ext/nokogiri/xml_syntax_error.c +261 -0
  65. data/ext/nokogiri/xml_syntax_error.h +13 -0
  66. data/ext/nokogiri/xml_text.c +48 -0
  67. data/ext/nokogiri/xml_text.h +9 -0
  68. data/ext/nokogiri/xml_xpath.c +53 -0
  69. data/ext/nokogiri/xml_xpath.h +11 -0
  70. data/ext/nokogiri/xml_xpath_context.c +239 -0
  71. data/ext/nokogiri/xml_xpath_context.h +9 -0
  72. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  73. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  74. data/lib/nokogiri.rb +116 -0
  75. data/lib/nokogiri/css.rb +25 -0
  76. data/lib/nokogiri/css/generated_parser.rb +646 -0
  77. data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
  78. data/lib/nokogiri/css/node.rb +99 -0
  79. data/lib/nokogiri/css/parser.rb +82 -0
  80. data/lib/nokogiri/css/parser.y +227 -0
  81. data/lib/nokogiri/css/syntax_error.rb +7 -0
  82. data/lib/nokogiri/css/tokenizer.rb +7 -0
  83. data/lib/nokogiri/css/tokenizer.rex +54 -0
  84. data/lib/nokogiri/css/xpath_visitor.rb +162 -0
  85. data/lib/nokogiri/decorators/slop.rb +33 -0
  86. data/lib/nokogiri/ffi/html/document.rb +28 -0
  87. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  88. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  89. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  90. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  91. data/lib/nokogiri/ffi/libxml.rb +356 -0
  92. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  93. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  94. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  95. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  96. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  97. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  98. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  100. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  101. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  102. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  103. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  104. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  105. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  106. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  107. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  108. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  109. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  110. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  111. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  112. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
  113. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  114. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  115. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  116. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  117. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  118. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  119. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  120. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  121. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  122. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  123. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  124. data/lib/nokogiri/ffi/xml/document.rb +135 -0
  125. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  126. data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
  127. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  128. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  129. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  130. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  131. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  132. data/lib/nokogiri/ffi/xml/node.rb +444 -0
  133. data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
  134. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  135. data/lib/nokogiri/ffi/xml/reader.rb +227 -0
  136. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  137. data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
  138. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  139. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
  140. data/lib/nokogiri/ffi/xml/schema.rb +92 -0
  141. data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
  142. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  143. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  144. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  145. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  146. data/lib/nokogiri/html.rb +35 -0
  147. data/lib/nokogiri/html/builder.rb +35 -0
  148. data/lib/nokogiri/html/document.rb +88 -0
  149. data/lib/nokogiri/html/document_fragment.rb +15 -0
  150. data/lib/nokogiri/html/element_description.rb +23 -0
  151. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  152. data/lib/nokogiri/html/sax/parser.rb +48 -0
  153. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  154. data/lib/nokogiri/syntax_error.rb +4 -0
  155. data/lib/nokogiri/version.rb +33 -0
  156. data/lib/nokogiri/version_warning.rb +11 -0
  157. data/lib/nokogiri/xml.rb +67 -0
  158. data/lib/nokogiri/xml/attr.rb +14 -0
  159. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  160. data/lib/nokogiri/xml/builder.rb +405 -0
  161. data/lib/nokogiri/xml/cdata.rb +11 -0
  162. data/lib/nokogiri/xml/character_data.rb +7 -0
  163. data/lib/nokogiri/xml/document.rb +131 -0
  164. data/lib/nokogiri/xml/document_fragment.rb +69 -0
  165. data/lib/nokogiri/xml/dtd.rb +11 -0
  166. data/lib/nokogiri/xml/element_content.rb +36 -0
  167. data/lib/nokogiri/xml/element_decl.rb +13 -0
  168. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  169. data/lib/nokogiri/xml/fragment_handler.rb +71 -0
  170. data/lib/nokogiri/xml/namespace.rb +13 -0
  171. data/lib/nokogiri/xml/node.rb +665 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  173. data/lib/nokogiri/xml/node_set.rb +307 -0
  174. data/lib/nokogiri/xml/notation.rb +6 -0
  175. data/lib/nokogiri/xml/parse_options.rb +85 -0
  176. data/lib/nokogiri/xml/pp.rb +2 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  178. data/lib/nokogiri/xml/pp/node.rb +56 -0
  179. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  180. data/lib/nokogiri/xml/reader.rb +74 -0
  181. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  182. data/lib/nokogiri/xml/sax.rb +4 -0
  183. data/lib/nokogiri/xml/sax/document.rb +160 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  187. data/lib/nokogiri/xml/schema.rb +61 -0
  188. data/lib/nokogiri/xml/syntax_error.rb +38 -0
  189. data/lib/nokogiri/xml/xpath.rb +10 -0
  190. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  191. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  192. data/lib/nokogiri/xslt.rb +48 -0
  193. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  194. data/lib/xsd/xmlparser/nokogiri.rb +71 -0
  195. data/tasks/test.rb +100 -0
  196. data/test/css/test_nthiness.rb +159 -0
  197. data/test/css/test_parser.rb +277 -0
  198. data/test/css/test_tokenizer.rb +183 -0
  199. data/test/css/test_xpath_visitor.rb +76 -0
  200. data/test/ffi/test_document.rb +35 -0
  201. data/test/files/2ch.html +108 -0
  202. data/test/files/address_book.rlx +12 -0
  203. data/test/files/address_book.xml +10 -0
  204. data/test/files/bar/bar.xsd +4 -0
  205. data/test/files/dont_hurt_em_why.xml +422 -0
  206. data/test/files/exslt.xml +8 -0
  207. data/test/files/exslt.xslt +35 -0
  208. data/test/files/foo/foo.xsd +4 -0
  209. data/test/files/po.xml +32 -0
  210. data/test/files/po.xsd +66 -0
  211. data/test/files/shift_jis.html +10 -0
  212. data/test/files/shift_jis.xml +5 -0
  213. data/test/files/snuggles.xml +3 -0
  214. data/test/files/staff.dtd +10 -0
  215. data/test/files/staff.xml +59 -0
  216. data/test/files/staff.xslt +32 -0
  217. data/test/files/tlm.html +850 -0
  218. data/test/files/valid_bar.xml +2 -0
  219. data/test/helper.rb +136 -0
  220. data/test/html/sax/test_parser.rb +64 -0
  221. data/test/html/sax/test_parser_context.rb +48 -0
  222. data/test/html/test_builder.rb +164 -0
  223. data/test/html/test_document.rb +390 -0
  224. data/test/html/test_document_encoding.rb +77 -0
  225. data/test/html/test_document_fragment.rb +132 -0
  226. data/test/html/test_element_description.rb +94 -0
  227. data/test/html/test_named_characters.rb +14 -0
  228. data/test/html/test_node.rb +228 -0
  229. data/test/html/test_node_encoding.rb +27 -0
  230. data/test/test_convert_xpath.rb +135 -0
  231. data/test/test_css_cache.rb +45 -0
  232. data/test/test_gc.rb +15 -0
  233. data/test/test_memory_leak.rb +77 -0
  234. data/test/test_nokogiri.rb +134 -0
  235. data/test/test_reader.rb +358 -0
  236. data/test/test_xslt_transforms.rb +131 -0
  237. data/test/xml/node/test_save_options.rb +20 -0
  238. data/test/xml/node/test_subclass.rb +44 -0
  239. data/test/xml/sax/test_parser.rb +307 -0
  240. data/test/xml/sax/test_parser_context.rb +56 -0
  241. data/test/xml/sax/test_push_parser.rb +131 -0
  242. data/test/xml/test_attr.rb +38 -0
  243. data/test/xml/test_attribute_decl.rb +82 -0
  244. data/test/xml/test_builder.rb +167 -0
  245. data/test/xml/test_cdata.rb +38 -0
  246. data/test/xml/test_comment.rb +29 -0
  247. data/test/xml/test_document.rb +607 -0
  248. data/test/xml/test_document_encoding.rb +26 -0
  249. data/test/xml/test_document_fragment.rb +138 -0
  250. data/test/xml/test_dtd.rb +82 -0
  251. data/test/xml/test_dtd_encoding.rb +33 -0
  252. data/test/xml/test_element_content.rb +56 -0
  253. data/test/xml/test_element_decl.rb +73 -0
  254. data/test/xml/test_entity_decl.rb +83 -0
  255. data/test/xml/test_entity_reference.rb +21 -0
  256. data/test/xml/test_namespace.rb +68 -0
  257. data/test/xml/test_node.rb +889 -0
  258. data/test/xml/test_node_attributes.rb +34 -0
  259. data/test/xml/test_node_encoding.rb +107 -0
  260. data/test/xml/test_node_set.rb +531 -0
  261. data/test/xml/test_parse_options.rb +52 -0
  262. data/test/xml/test_processing_instruction.rb +30 -0
  263. data/test/xml/test_reader_encoding.rb +126 -0
  264. data/test/xml/test_relax_ng.rb +60 -0
  265. data/test/xml/test_schema.rb +89 -0
  266. data/test/xml/test_syntax_error.rb +27 -0
  267. data/test/xml/test_text.rb +30 -0
  268. data/test/xml/test_unparented_node.rb +381 -0
  269. data/test/xml/test_xpath.rb +106 -0
  270. metadata +430 -0
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_PROCESSING_INSTRUCTION
2
+ #define NOKOGIRI_XML_PROCESSING_INSTRUCTION
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_processing_instruction();
7
+
8
+ extern VALUE cNokogiriXmlProcessingInstruction;
9
+ #endif
@@ -0,0 +1,593 @@
1
+ #include <xml_reader.h>
2
+
3
+ static void dealloc(xmlTextReaderPtr reader)
4
+ {
5
+ NOKOGIRI_DEBUG_START(reader);
6
+ xmlFreeTextReader(reader);
7
+ NOKOGIRI_DEBUG_END(reader);
8
+ }
9
+
10
+ static int has_attributes(xmlTextReaderPtr reader)
11
+ {
12
+ /*
13
+ * this implementation of xmlTextReaderHasAttributes explicitly includes
14
+ * namespaces and properties, because some earlier versions ignore
15
+ * namespaces.
16
+ */
17
+ xmlNodePtr node ;
18
+ node = xmlTextReaderCurrentNode(reader);
19
+ if (node == NULL)
20
+ return(0);
21
+
22
+ if ((node->type == XML_ELEMENT_NODE) &&
23
+ ((node->properties != NULL) || (node->nsDef != NULL)))
24
+ return(1);
25
+ return(0);
26
+ }
27
+
28
+ #define XMLNS_PREFIX "xmlns"
29
+ #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
30
+ #define XMLNS_BUFFER_LEN 128
31
+ static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
32
+ {
33
+ xmlNsPtr ns;
34
+ static char buffer[XMLNS_BUFFER_LEN] ;
35
+ char *key ;
36
+ size_t keylen ;
37
+
38
+ if (node->type != XML_ELEMENT_NODE) return ;
39
+
40
+ ns = node->nsDef;
41
+ while (ns != NULL) {
42
+
43
+ keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ;
44
+ if (keylen > XMLNS_BUFFER_LEN) {
45
+ key = (char*)malloc(keylen) ;
46
+ } else {
47
+ key = buffer ;
48
+ }
49
+
50
+ if (ns->prefix) {
51
+ sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix);
52
+ } else {
53
+ sprintf(key, "%s", XMLNS_PREFIX);
54
+ }
55
+
56
+ rb_hash_aset(attr_hash,
57
+ NOKOGIRI_STR_NEW2(key),
58
+ (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
59
+ );
60
+ if (key != buffer) {
61
+ free(key);
62
+ }
63
+ ns = ns->next ;
64
+ }
65
+ }
66
+
67
+
68
+ /*
69
+ * call-seq:
70
+ * default?
71
+ *
72
+ * Was an attribute generated from the default value in the DTD or schema?
73
+ */
74
+ static VALUE default_eh(VALUE self)
75
+ {
76
+ xmlTextReaderPtr reader;
77
+ Data_Get_Struct(self, xmlTextReader, reader);
78
+ int eh = xmlTextReaderIsDefault(reader);
79
+ if(eh == 0) return Qfalse;
80
+ if(eh == 1) return Qtrue;
81
+
82
+ return Qnil;
83
+ }
84
+
85
+ /*
86
+ * call-seq:
87
+ * value?
88
+ *
89
+ * Does this node have a text value?
90
+ */
91
+ static VALUE value_eh(VALUE self)
92
+ {
93
+ xmlTextReaderPtr reader;
94
+ Data_Get_Struct(self, xmlTextReader, reader);
95
+ int eh = xmlTextReaderHasValue(reader);
96
+ if(eh == 0) return Qfalse;
97
+ if(eh == 1) return Qtrue;
98
+
99
+ return Qnil;
100
+ }
101
+
102
+ /*
103
+ * call-seq:
104
+ * attributes?
105
+ *
106
+ * Does this node have attributes?
107
+ */
108
+ static VALUE attributes_eh(VALUE self)
109
+ {
110
+ xmlTextReaderPtr reader;
111
+ Data_Get_Struct(self, xmlTextReader, reader);
112
+ int eh = has_attributes(reader);
113
+ if(eh == 0) return Qfalse;
114
+ if(eh == 1) return Qtrue;
115
+
116
+ return Qnil;
117
+ }
118
+
119
+ /*
120
+ * call-seq:
121
+ * namespaces
122
+ *
123
+ * Get a hash of namespaces for this Node
124
+ */
125
+ static VALUE namespaces(VALUE self)
126
+ {
127
+ xmlTextReaderPtr reader;
128
+ VALUE attr ;
129
+
130
+ Data_Get_Struct(self, xmlTextReader, reader);
131
+
132
+ attr = rb_hash_new() ;
133
+
134
+ if (! has_attributes(reader))
135
+ return attr ;
136
+
137
+ xmlNodePtr ptr = xmlTextReaderExpand(reader);
138
+ if(ptr == NULL) return Qnil;
139
+
140
+ Nokogiri_xml_node_namespaces(ptr, attr);
141
+
142
+ return attr ;
143
+ }
144
+
145
+ /*
146
+ * call-seq:
147
+ * attribute_nodes
148
+ *
149
+ * Get a list of attributes for this Node
150
+ */
151
+ static VALUE attribute_nodes(VALUE self)
152
+ {
153
+ xmlTextReaderPtr reader;
154
+ VALUE attr ;
155
+
156
+ Data_Get_Struct(self, xmlTextReader, reader);
157
+
158
+ attr = rb_ary_new() ;
159
+
160
+ if (! has_attributes(reader))
161
+ return attr ;
162
+
163
+ xmlNodePtr ptr = xmlTextReaderExpand(reader);
164
+ if(ptr == NULL) return Qnil;
165
+
166
+ Nokogiri_xml_node_properties(ptr, attr);
167
+
168
+ return attr ;
169
+ }
170
+
171
+ /*
172
+ * call-seq:
173
+ * attribute_at(index)
174
+ *
175
+ * Get the value of attribute at +index+
176
+ */
177
+ static VALUE attribute_at(VALUE self, VALUE index)
178
+ {
179
+ xmlTextReaderPtr reader;
180
+ Data_Get_Struct(self, xmlTextReader, reader);
181
+
182
+ if(NIL_P(index)) return Qnil;
183
+ index = rb_Integer(index);
184
+
185
+ xmlChar * value = xmlTextReaderGetAttributeNo(
186
+ reader,
187
+ NUM2INT(index)
188
+ );
189
+ if(value == NULL) return Qnil;
190
+
191
+ VALUE rb_value = NOKOGIRI_STR_NEW2(value);
192
+ xmlFree(value);
193
+ return rb_value;
194
+ }
195
+
196
+ /*
197
+ * call-seq:
198
+ * attribute(name)
199
+ *
200
+ * Get the value of attribute named +name+
201
+ */
202
+ static VALUE reader_attribute(VALUE self, VALUE name)
203
+ {
204
+ xmlTextReaderPtr reader;
205
+ xmlChar *value ;
206
+ Data_Get_Struct(self, xmlTextReader, reader);
207
+
208
+ if(NIL_P(name)) return Qnil;
209
+ name = StringValue(name) ;
210
+
211
+ value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValuePtr(name));
212
+ if(value == NULL) {
213
+ /* this section is an attempt to workaround older versions of libxml that
214
+ don't handle namespaces properly in all attribute-and-friends functions */
215
+ xmlChar *prefix = NULL ;
216
+ xmlChar *localname = xmlSplitQName2((xmlChar*)StringValuePtr(name), &prefix);
217
+ if (localname != NULL) {
218
+ value = xmlTextReaderLookupNamespace(reader, localname);
219
+ xmlFree(localname) ;
220
+ } else {
221
+ value = xmlTextReaderLookupNamespace(reader, prefix);
222
+ }
223
+ xmlFree(prefix);
224
+ }
225
+ if(value == NULL) return Qnil;
226
+
227
+ VALUE rb_value = NOKOGIRI_STR_NEW2(value);
228
+ xmlFree(value);
229
+ return rb_value;
230
+ }
231
+
232
+ /*
233
+ * call-seq:
234
+ * attribute_count
235
+ *
236
+ * Get the number of attributes for the current node
237
+ */
238
+ static VALUE attribute_count(VALUE self)
239
+ {
240
+ xmlTextReaderPtr reader;
241
+ Data_Get_Struct(self, xmlTextReader, reader);
242
+ int count = xmlTextReaderAttributeCount(reader);
243
+ if(count == -1) return Qnil;
244
+
245
+ return INT2NUM((long)count);
246
+ }
247
+
248
+ /*
249
+ * call-seq:
250
+ * depth
251
+ *
252
+ * Get the depth of the node
253
+ */
254
+ static VALUE depth(VALUE self)
255
+ {
256
+ xmlTextReaderPtr reader;
257
+ Data_Get_Struct(self, xmlTextReader, reader);
258
+ int depth = xmlTextReaderDepth(reader);
259
+ if(depth == -1) return Qnil;
260
+
261
+ return INT2NUM((long)depth);
262
+ }
263
+
264
+ /*
265
+ * call-seq:
266
+ * xml_version
267
+ *
268
+ * Get the XML version of the document being read
269
+ */
270
+ static VALUE xml_version(VALUE self)
271
+ {
272
+ xmlTextReaderPtr reader;
273
+ Data_Get_Struct(self, xmlTextReader, reader);
274
+ const char * version = (const char *)xmlTextReaderConstXmlVersion(reader);
275
+ if(version == NULL) return Qnil;
276
+
277
+ return NOKOGIRI_STR_NEW2(version);
278
+ }
279
+
280
+ /*
281
+ * call-seq:
282
+ * lang
283
+ *
284
+ * Get the xml:lang scope within which the node resides.
285
+ */
286
+ static VALUE lang(VALUE self)
287
+ {
288
+ xmlTextReaderPtr reader;
289
+ Data_Get_Struct(self, xmlTextReader, reader);
290
+ const char * lang = (const char *)xmlTextReaderConstXmlLang(reader);
291
+ if(lang == NULL) return Qnil;
292
+
293
+ return NOKOGIRI_STR_NEW2(lang);
294
+ }
295
+
296
+ /*
297
+ * call-seq:
298
+ * value
299
+ *
300
+ * Get the text value of the node if present
301
+ */
302
+ static VALUE value(VALUE self)
303
+ {
304
+ xmlTextReaderPtr reader;
305
+ Data_Get_Struct(self, xmlTextReader, reader);
306
+ const char * value = (const char *)xmlTextReaderConstValue(reader);
307
+ if(value == NULL) return Qnil;
308
+
309
+ return NOKOGIRI_STR_NEW2(value);
310
+ }
311
+
312
+ /*
313
+ * call-seq:
314
+ * prefix
315
+ *
316
+ * Get the shorthand reference to the namespace associated with the node.
317
+ */
318
+ static VALUE prefix(VALUE self)
319
+ {
320
+ xmlTextReaderPtr reader;
321
+ Data_Get_Struct(self, xmlTextReader, reader);
322
+ const char * prefix = (const char *)xmlTextReaderConstPrefix(reader);
323
+ if(prefix == NULL) return Qnil;
324
+
325
+ return NOKOGIRI_STR_NEW2(prefix);
326
+ }
327
+
328
+ /*
329
+ * call-seq:
330
+ * namespace_uri
331
+ *
332
+ * Get the URI defining the namespace associated with the node
333
+ */
334
+ static VALUE namespace_uri(VALUE self)
335
+ {
336
+ xmlTextReaderPtr reader;
337
+ Data_Get_Struct(self, xmlTextReader, reader);
338
+ const char * uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
339
+ if(uri == NULL) return Qnil;
340
+
341
+ return NOKOGIRI_STR_NEW2(uri);
342
+ }
343
+
344
+ /*
345
+ * call-seq:
346
+ * local_name
347
+ *
348
+ * Get the local name of the node
349
+ */
350
+ static VALUE local_name(VALUE self)
351
+ {
352
+ xmlTextReaderPtr reader;
353
+ Data_Get_Struct(self, xmlTextReader, reader);
354
+ const char * name = (const char *)xmlTextReaderConstLocalName(reader);
355
+ if(name == NULL) return Qnil;
356
+
357
+ return NOKOGIRI_STR_NEW2(name);
358
+ }
359
+
360
+ /*
361
+ * call-seq:
362
+ * name
363
+ *
364
+ * Get the name of the node
365
+ */
366
+ static VALUE name(VALUE self)
367
+ {
368
+ xmlTextReaderPtr reader;
369
+ Data_Get_Struct(self, xmlTextReader, reader);
370
+ const char * name = (const char *)xmlTextReaderConstName(reader);
371
+ if(name == NULL) return Qnil;
372
+
373
+ return NOKOGIRI_STR_NEW2(name);
374
+ }
375
+
376
+ /*
377
+ * call-seq:
378
+ * state
379
+ *
380
+ * Get the state of the reader
381
+ */
382
+ static VALUE state(VALUE self)
383
+ {
384
+ xmlTextReaderPtr reader;
385
+ Data_Get_Struct(self, xmlTextReader, reader);
386
+ return INT2NUM((long)xmlTextReaderReadState(reader));
387
+ }
388
+
389
+ /*
390
+ * call-seq:
391
+ * node_type
392
+ *
393
+ * Get the type of readers current node
394
+ */
395
+ static VALUE node_type(VALUE self)
396
+ {
397
+ xmlTextReaderPtr reader;
398
+ Data_Get_Struct(self, xmlTextReader, reader);
399
+ return INT2NUM((long)xmlTextReaderNodeType(reader));
400
+ }
401
+
402
+ /*
403
+ * call-seq:
404
+ * read
405
+ *
406
+ * Move the Reader forward through the XML document.
407
+ */
408
+ static VALUE read_more(VALUE self)
409
+ {
410
+ xmlTextReaderPtr reader;
411
+ Data_Get_Struct(self, xmlTextReader, reader);
412
+
413
+ VALUE error_list = rb_funcall(self, rb_intern("errors"), 0);
414
+
415
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
416
+ int ret = xmlTextReaderRead(reader);
417
+ xmlSetStructuredErrorFunc(NULL, NULL);
418
+
419
+ if(ret == 1) return self;
420
+ if(ret == 0) return Qnil;
421
+
422
+ xmlErrorPtr error = xmlGetLastError();
423
+ if(error)
424
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
425
+ else
426
+ rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
427
+
428
+ return Qnil;
429
+ }
430
+
431
+ /*
432
+ * call-seq:
433
+ * inner_xml
434
+ *
435
+ * Read the contents of the current node, including child nodes and markup.
436
+ */
437
+ static VALUE inner_xml(VALUE self)
438
+ {
439
+ xmlTextReaderPtr reader;
440
+ Data_Get_Struct(self, xmlTextReader, reader);
441
+
442
+ const char * value = (const char *)xmlTextReaderReadInnerXml(reader);
443
+
444
+ if(value == NULL)
445
+ return Qnil;
446
+ else
447
+ return NOKOGIRI_STR_NEW2(value);
448
+ }
449
+
450
+ /*
451
+ * call-seq:
452
+ * outer_xml
453
+ *
454
+ * Read the current node and its contents, including child nodes and markup.
455
+ */
456
+ static VALUE outer_xml(VALUE self)
457
+ {
458
+ xmlTextReaderPtr reader;
459
+ Data_Get_Struct(self, xmlTextReader, reader);
460
+
461
+ const char * value = (const char *)xmlTextReaderReadOuterXml(reader);
462
+
463
+ if(value == NULL)
464
+ return Qnil;
465
+ else
466
+ return NOKOGIRI_STR_NEW2(value);
467
+ }
468
+
469
+ /*
470
+ * call-seq:
471
+ * from_memory(string, url = nil, encoding = nil, options = 0)
472
+ *
473
+ * Create a new reader that parses +string+
474
+ */
475
+ static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
476
+ {
477
+ VALUE rb_buffer, rb_url, encoding, rb_options;
478
+
479
+ const char * c_url = NULL;
480
+ const char * c_encoding = NULL;
481
+ int c_options = 0;
482
+
483
+ rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
484
+
485
+ if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil");
486
+ if (RTEST(rb_url)) c_url = StringValuePtr(rb_url);
487
+ if (RTEST(encoding)) c_encoding = StringValuePtr(encoding);
488
+ if (RTEST(rb_options)) c_options = NUM2INT(rb_options);
489
+
490
+ xmlTextReaderPtr reader = xmlReaderForMemory(
491
+ StringValuePtr(rb_buffer),
492
+ RSTRING_LEN(rb_buffer),
493
+ c_url,
494
+ c_encoding,
495
+ c_options
496
+ );
497
+
498
+ if(reader == NULL) {
499
+ xmlFreeTextReader(reader);
500
+ rb_raise(rb_eRuntimeError, "couldn't create a parser");
501
+ }
502
+
503
+ VALUE rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
504
+ VALUE args[3] = {rb_buffer, rb_url, encoding};
505
+ rb_obj_call_init(rb_reader, 3, args);
506
+
507
+ return rb_reader;
508
+ }
509
+
510
+ /*
511
+ * call-seq:
512
+ * from_io(io, url = nil, encoding = nil, options = 0)
513
+ *
514
+ * Create a new reader that parses +io+
515
+ */
516
+ static VALUE from_io(int argc, VALUE *argv, VALUE klass)
517
+ {
518
+ VALUE rb_io, rb_url, encoding, rb_options;
519
+
520
+ const char * c_url = NULL;
521
+ const char * c_encoding = NULL;
522
+ int c_options = 0;
523
+
524
+ rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
525
+
526
+ if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil");
527
+ if (RTEST(rb_url)) c_url = StringValuePtr(rb_url);
528
+ if (RTEST(encoding)) c_encoding = StringValuePtr(encoding);
529
+ if (RTEST(rb_options)) c_options = NUM2INT(rb_options);
530
+
531
+ xmlTextReaderPtr reader = xmlReaderForIO(
532
+ (xmlInputReadCallback)io_read_callback,
533
+ (xmlInputCloseCallback)io_close_callback,
534
+ (void *)rb_io,
535
+ c_url,
536
+ c_encoding,
537
+ c_options
538
+ );
539
+
540
+ if(reader == NULL) {
541
+ xmlFreeTextReader(reader);
542
+ rb_raise(rb_eRuntimeError, "couldn't create a parser");
543
+ }
544
+
545
+ VALUE rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
546
+ VALUE args[3] = {rb_io, rb_url, encoding};
547
+ rb_obj_call_init(rb_reader, 3, args);
548
+
549
+ return rb_reader;
550
+ }
551
+
552
+ VALUE cNokogiriXmlReader;
553
+
554
+ void init_xml_reader()
555
+ {
556
+ VALUE module = rb_define_module("Nokogiri");
557
+ VALUE xml = rb_define_module_under(module, "XML");
558
+
559
+ /*
560
+ * The Reader parser allows you to effectively pull parse an XML document.
561
+ * Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
562
+ * node. Note that you may only iterate over the document once!
563
+ */
564
+ VALUE klass = rb_define_class_under(xml, "Reader", rb_cObject);
565
+
566
+ cNokogiriXmlReader = klass;
567
+
568
+ rb_define_singleton_method(klass, "from_memory", from_memory, -1);
569
+ rb_define_singleton_method(klass, "from_io", from_io, -1);
570
+
571
+ rb_define_method(klass, "read", read_more, 0);
572
+ rb_define_method(klass, "inner_xml", inner_xml, 0);
573
+ rb_define_method(klass, "outer_xml", outer_xml, 0);
574
+ rb_define_method(klass, "state", state, 0);
575
+ rb_define_method(klass, "node_type", node_type, 0);
576
+ rb_define_method(klass, "name", name, 0);
577
+ rb_define_method(klass, "local_name", local_name, 0);
578
+ rb_define_method(klass, "namespace_uri", namespace_uri, 0);
579
+ rb_define_method(klass, "prefix", prefix, 0);
580
+ rb_define_method(klass, "value", value, 0);
581
+ rb_define_method(klass, "lang", lang, 0);
582
+ rb_define_method(klass, "xml_version", xml_version, 0);
583
+ rb_define_method(klass, "depth", depth, 0);
584
+ rb_define_method(klass, "attribute_count", attribute_count, 0);
585
+ rb_define_method(klass, "attribute", reader_attribute, 1);
586
+ rb_define_method(klass, "namespaces", namespaces, 0);
587
+ rb_define_method(klass, "attribute_at", attribute_at, 1);
588
+ rb_define_method(klass, "attributes?", attributes_eh, 0);
589
+ rb_define_method(klass, "value?", value_eh, 0);
590
+ rb_define_method(klass, "default?", default_eh, 0);
591
+
592
+ rb_define_private_method(klass, "attr_nodes", attribute_nodes, 0);
593
+ }