glebm-nokogiri 1.4.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (277) hide show
  1. data/.autotest +26 -0
  2. data/CHANGELOG.ja.rdoc +411 -0
  3. data/CHANGELOG.rdoc +397 -0
  4. data/Manifest.txt +276 -0
  5. data/README.ja.rdoc +106 -0
  6. data/README.rdoc +132 -0
  7. data/Rakefile +183 -0
  8. data/bin/nokogiri +49 -0
  9. data/deps.rip +5 -0
  10. data/ext/nokogiri/extconf.rb +97 -0
  11. data/ext/nokogiri/html_document.c +154 -0
  12. data/ext/nokogiri/html_document.h +10 -0
  13. data/ext/nokogiri/html_element_description.c +276 -0
  14. data/ext/nokogiri/html_element_description.h +10 -0
  15. data/ext/nokogiri/html_entity_lookup.c +32 -0
  16. data/ext/nokogiri/html_entity_lookup.h +8 -0
  17. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  18. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  19. data/ext/nokogiri/nokogiri.c +95 -0
  20. data/ext/nokogiri/nokogiri.h +153 -0
  21. data/ext/nokogiri/xml_attr.c +94 -0
  22. data/ext/nokogiri/xml_attr.h +9 -0
  23. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  24. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  25. data/ext/nokogiri/xml_cdata.c +56 -0
  26. data/ext/nokogiri/xml_cdata.h +9 -0
  27. data/ext/nokogiri/xml_comment.c +54 -0
  28. data/ext/nokogiri/xml_comment.h +9 -0
  29. data/ext/nokogiri/xml_document.c +464 -0
  30. data/ext/nokogiri/xml_document.h +23 -0
  31. data/ext/nokogiri/xml_document_fragment.c +48 -0
  32. data/ext/nokogiri/xml_document_fragment.h +10 -0
  33. data/ext/nokogiri/xml_dtd.c +202 -0
  34. data/ext/nokogiri/xml_dtd.h +10 -0
  35. data/ext/nokogiri/xml_element_content.c +123 -0
  36. data/ext/nokogiri/xml_element_content.h +10 -0
  37. data/ext/nokogiri/xml_element_decl.c +69 -0
  38. data/ext/nokogiri/xml_element_decl.h +9 -0
  39. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  40. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  41. data/ext/nokogiri/xml_entity_decl.c +110 -0
  42. data/ext/nokogiri/xml_entity_decl.h +10 -0
  43. data/ext/nokogiri/xml_entity_reference.c +52 -0
  44. data/ext/nokogiri/xml_entity_reference.h +9 -0
  45. data/ext/nokogiri/xml_io.c +31 -0
  46. data/ext/nokogiri/xml_io.h +11 -0
  47. data/ext/nokogiri/xml_namespace.c +84 -0
  48. data/ext/nokogiri/xml_namespace.h +13 -0
  49. data/ext/nokogiri/xml_node.c +1347 -0
  50. data/ext/nokogiri/xml_node.h +13 -0
  51. data/ext/nokogiri/xml_node_set.c +418 -0
  52. data/ext/nokogiri/xml_node_set.h +9 -0
  53. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  54. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  55. data/ext/nokogiri/xml_reader.c +665 -0
  56. data/ext/nokogiri/xml_reader.h +10 -0
  57. data/ext/nokogiri/xml_relax_ng.c +168 -0
  58. data/ext/nokogiri/xml_relax_ng.h +9 -0
  59. data/ext/nokogiri/xml_sax_parser.c +286 -0
  60. data/ext/nokogiri/xml_sax_parser.h +39 -0
  61. data/ext/nokogiri/xml_sax_parser_context.c +159 -0
  62. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  63. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  64. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  65. data/ext/nokogiri/xml_schema.c +205 -0
  66. data/ext/nokogiri/xml_schema.h +9 -0
  67. data/ext/nokogiri/xml_syntax_error.c +58 -0
  68. data/ext/nokogiri/xml_syntax_error.h +13 -0
  69. data/ext/nokogiri/xml_text.c +50 -0
  70. data/ext/nokogiri/xml_text.h +9 -0
  71. data/ext/nokogiri/xml_xpath_context.c +276 -0
  72. data/ext/nokogiri/xml_xpath_context.h +9 -0
  73. data/ext/nokogiri/xslt_stylesheet.c +142 -0
  74. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  75. data/lib/nokogiri.rb +133 -0
  76. data/lib/nokogiri/css.rb +25 -0
  77. data/lib/nokogiri/css/generated_parser.rb +669 -0
  78. data/lib/nokogiri/css/generated_tokenizer.rb +145 -0
  79. data/lib/nokogiri/css/node.rb +99 -0
  80. data/lib/nokogiri/css/parser.rb +82 -0
  81. data/lib/nokogiri/css/parser.y +232 -0
  82. data/lib/nokogiri/css/syntax_error.rb +7 -0
  83. data/lib/nokogiri/css/tokenizer.rb +7 -0
  84. data/lib/nokogiri/css/tokenizer.rex +55 -0
  85. data/lib/nokogiri/css/xpath_visitor.rb +169 -0
  86. data/lib/nokogiri/decorators/slop.rb +33 -0
  87. data/lib/nokogiri/ffi/encoding_handler.rb +42 -0
  88. data/lib/nokogiri/ffi/html/document.rb +28 -0
  89. data/lib/nokogiri/ffi/html/element_description.rb +81 -0
  90. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  91. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  92. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  93. data/lib/nokogiri/ffi/libxml.rb +386 -0
  94. data/lib/nokogiri/ffi/structs/common_node.rb +38 -0
  95. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  96. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  97. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  98. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  99. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  100. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  101. data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +11 -0
  102. data/lib/nokogiri/ffi/structs/xml_document.rb +117 -0
  103. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  104. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  105. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  106. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  107. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  108. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  109. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  110. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  111. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  112. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  113. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  114. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  115. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +124 -0
  116. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  117. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  118. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  119. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +38 -0
  120. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  121. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  122. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  123. data/lib/nokogiri/ffi/weak_bucket.rb +40 -0
  124. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  125. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  126. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  127. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  128. data/lib/nokogiri/ffi/xml/document.rb +162 -0
  129. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  130. data/lib/nokogiri/ffi/xml/dtd.rb +67 -0
  131. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  132. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  133. data/lib/nokogiri/ffi/xml/entity_decl.rb +36 -0
  134. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  135. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  136. data/lib/nokogiri/ffi/xml/node.rb +556 -0
  137. data/lib/nokogiri/ffi/xml/node_set.rb +149 -0
  138. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  139. data/lib/nokogiri/ffi/xml/reader.rb +232 -0
  140. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  141. data/lib/nokogiri/ffi/xml/sax/parser.rb +135 -0
  142. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  143. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +51 -0
  144. data/lib/nokogiri/ffi/xml/schema.rb +109 -0
  145. data/lib/nokogiri/ffi/xml/syntax_error.rb +98 -0
  146. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  147. data/lib/nokogiri/ffi/xml/xpath.rb +9 -0
  148. data/lib/nokogiri/ffi/xml/xpath_context.rb +148 -0
  149. data/lib/nokogiri/ffi/xslt/stylesheet.rb +53 -0
  150. data/lib/nokogiri/html.rb +35 -0
  151. data/lib/nokogiri/html/builder.rb +35 -0
  152. data/lib/nokogiri/html/document.rb +90 -0
  153. data/lib/nokogiri/html/document_fragment.rb +36 -0
  154. data/lib/nokogiri/html/element_description.rb +23 -0
  155. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  156. data/lib/nokogiri/html/sax/parser.rb +48 -0
  157. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  158. data/lib/nokogiri/syntax_error.rb +4 -0
  159. data/lib/nokogiri/version.rb +37 -0
  160. data/lib/nokogiri/version_warning.rb +14 -0
  161. data/lib/nokogiri/xml.rb +67 -0
  162. data/lib/nokogiri/xml/attr.rb +14 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  164. data/lib/nokogiri/xml/builder.rb +418 -0
  165. data/lib/nokogiri/xml/cdata.rb +11 -0
  166. data/lib/nokogiri/xml/character_data.rb +7 -0
  167. data/lib/nokogiri/xml/document.rb +194 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +77 -0
  169. data/lib/nokogiri/xml/dtd.rb +11 -0
  170. data/lib/nokogiri/xml/element_content.rb +36 -0
  171. data/lib/nokogiri/xml/element_decl.rb +13 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  173. data/lib/nokogiri/xml/namespace.rb +13 -0
  174. data/lib/nokogiri/xml/node.rb +793 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  176. data/lib/nokogiri/xml/node_set.rb +325 -0
  177. data/lib/nokogiri/xml/notation.rb +6 -0
  178. data/lib/nokogiri/xml/parse_options.rb +85 -0
  179. data/lib/nokogiri/xml/pp.rb +2 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  181. data/lib/nokogiri/xml/pp/node.rb +56 -0
  182. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  183. data/lib/nokogiri/xml/reader.rb +74 -0
  184. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  185. data/lib/nokogiri/xml/sax.rb +4 -0
  186. data/lib/nokogiri/xml/sax/document.rb +160 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  190. data/lib/nokogiri/xml/schema.rb +57 -0
  191. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  192. data/lib/nokogiri/xml/text.rb +9 -0
  193. data/lib/nokogiri/xml/xpath.rb +10 -0
  194. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  195. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  196. data/lib/nokogiri/xslt.rb +48 -0
  197. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  198. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  199. data/tasks/cross_compile.rb +158 -0
  200. data/tasks/test.rb +94 -0
  201. data/test/css/test_nthiness.rb +159 -0
  202. data/test/css/test_parser.rb +282 -0
  203. data/test/css/test_tokenizer.rb +190 -0
  204. data/test/css/test_xpath_visitor.rb +85 -0
  205. data/test/ffi/test_document.rb +35 -0
  206. data/test/files/2ch.html +108 -0
  207. data/test/files/address_book.rlx +12 -0
  208. data/test/files/address_book.xml +10 -0
  209. data/test/files/bar/bar.xsd +4 -0
  210. data/test/files/dont_hurt_em_why.xml +422 -0
  211. data/test/files/exslt.xml +8 -0
  212. data/test/files/exslt.xslt +35 -0
  213. data/test/files/foo/foo.xsd +4 -0
  214. data/test/files/po.xml +32 -0
  215. data/test/files/po.xsd +66 -0
  216. data/test/files/shift_jis.html +10 -0
  217. data/test/files/shift_jis.xml +5 -0
  218. data/test/files/snuggles.xml +3 -0
  219. data/test/files/staff.dtd +10 -0
  220. data/test/files/staff.xml +59 -0
  221. data/test/files/staff.xslt +32 -0
  222. data/test/files/tlm.html +850 -0
  223. data/test/files/valid_bar.xml +2 -0
  224. data/test/helper.rb +169 -0
  225. data/test/html/sax/test_parser.rb +74 -0
  226. data/test/html/sax/test_parser_context.rb +48 -0
  227. data/test/html/test_builder.rb +164 -0
  228. data/test/html/test_document.rb +398 -0
  229. data/test/html/test_document_encoding.rb +77 -0
  230. data/test/html/test_document_fragment.rb +182 -0
  231. data/test/html/test_element_description.rb +98 -0
  232. data/test/html/test_named_characters.rb +14 -0
  233. data/test/html/test_node.rb +181 -0
  234. data/test/html/test_node_encoding.rb +27 -0
  235. data/test/test_convert_xpath.rb +135 -0
  236. data/test/test_css_cache.rb +45 -0
  237. data/test/test_encoding_handler.rb +46 -0
  238. data/test/test_memory_leak.rb +87 -0
  239. data/test/test_nokogiri.rb +138 -0
  240. data/test/test_reader.rb +386 -0
  241. data/test/test_soap4r_sax.rb +52 -0
  242. data/test/test_xslt_transforms.rb +188 -0
  243. data/test/xml/node/test_save_options.rb +20 -0
  244. data/test/xml/node/test_subclass.rb +44 -0
  245. data/test/xml/sax/test_parser.rb +307 -0
  246. data/test/xml/sax/test_parser_context.rb +63 -0
  247. data/test/xml/sax/test_push_parser.rb +139 -0
  248. data/test/xml/test_attr.rb +38 -0
  249. data/test/xml/test_attribute_decl.rb +82 -0
  250. data/test/xml/test_builder.rb +210 -0
  251. data/test/xml/test_cdata.rb +50 -0
  252. data/test/xml/test_comment.rb +29 -0
  253. data/test/xml/test_document.rb +668 -0
  254. data/test/xml/test_document_encoding.rb +26 -0
  255. data/test/xml/test_document_fragment.rb +180 -0
  256. data/test/xml/test_dtd.rb +82 -0
  257. data/test/xml/test_dtd_encoding.rb +33 -0
  258. data/test/xml/test_element_content.rb +56 -0
  259. data/test/xml/test_element_decl.rb +73 -0
  260. data/test/xml/test_entity_decl.rb +120 -0
  261. data/test/xml/test_entity_reference.rb +21 -0
  262. data/test/xml/test_namespace.rb +68 -0
  263. data/test/xml/test_node.rb +865 -0
  264. data/test/xml/test_node_attributes.rb +34 -0
  265. data/test/xml/test_node_encoding.rb +107 -0
  266. data/test/xml/test_node_reparenting.rb +293 -0
  267. data/test/xml/test_node_set.rb +649 -0
  268. data/test/xml/test_parse_options.rb +52 -0
  269. data/test/xml/test_processing_instruction.rb +30 -0
  270. data/test/xml/test_reader_encoding.rb +126 -0
  271. data/test/xml/test_relax_ng.rb +60 -0
  272. data/test/xml/test_schema.rb +89 -0
  273. data/test/xml/test_syntax_error.rb +12 -0
  274. data/test/xml/test_text.rb +38 -0
  275. data/test/xml/test_unparented_node.rb +381 -0
  276. data/test/xml/test_xpath.rb +138 -0
  277. metadata +533 -0
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_PROCESSING_INSTRUCTION
2
+ #define NOKOGIRI_XML_PROCESSING_INSTRUCTION
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_processing_instruction();
7
+
8
+ extern VALUE cNokogiriXmlProcessingInstruction;
9
+ #endif
@@ -0,0 +1,665 @@
1
+ #include <xml_reader.h>
2
+
3
+ static void dealloc(xmlTextReaderPtr reader)
4
+ {
5
+ NOKOGIRI_DEBUG_START(reader);
6
+ xmlFreeTextReader(reader);
7
+ NOKOGIRI_DEBUG_END(reader);
8
+ }
9
+
10
+ static int has_attributes(xmlTextReaderPtr reader)
11
+ {
12
+ /*
13
+ * this implementation of xmlTextReaderHasAttributes explicitly includes
14
+ * namespaces and properties, because some earlier versions ignore
15
+ * namespaces.
16
+ */
17
+ xmlNodePtr node ;
18
+ node = xmlTextReaderCurrentNode(reader);
19
+ if (node == NULL)
20
+ return(0);
21
+
22
+ if ((node->type == XML_ELEMENT_NODE) &&
23
+ ((node->properties != NULL) || (node->nsDef != NULL)))
24
+ return(1);
25
+ return(0);
26
+ }
27
+
28
+ #define XMLNS_PREFIX "xmlns"
29
+ #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
30
+ #define XMLNS_BUFFER_LEN 128
31
+ static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
32
+ {
33
+ xmlNsPtr ns;
34
+ static char buffer[XMLNS_BUFFER_LEN] ;
35
+ char *key ;
36
+ size_t keylen ;
37
+
38
+ if (node->type != XML_ELEMENT_NODE) return ;
39
+
40
+ ns = node->nsDef;
41
+ while (ns != NULL) {
42
+
43
+ keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ;
44
+ if (keylen > XMLNS_BUFFER_LEN) {
45
+ key = (char*)malloc(keylen) ;
46
+ } else {
47
+ key = buffer ;
48
+ }
49
+
50
+ if (ns->prefix) {
51
+ sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix);
52
+ } else {
53
+ sprintf(key, "%s", XMLNS_PREFIX);
54
+ }
55
+
56
+ rb_hash_aset(attr_hash,
57
+ NOKOGIRI_STR_NEW2(key),
58
+ (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
59
+ );
60
+ if (key != buffer) {
61
+ free(key);
62
+ }
63
+ ns = ns->next ;
64
+ }
65
+ }
66
+
67
+
68
+ /*
69
+ * call-seq:
70
+ * default?
71
+ *
72
+ * Was an attribute generated from the default value in the DTD or schema?
73
+ */
74
+ static VALUE default_eh(VALUE self)
75
+ {
76
+ xmlTextReaderPtr reader;
77
+ int eh;
78
+
79
+ Data_Get_Struct(self, xmlTextReader, reader);
80
+ eh = xmlTextReaderIsDefault(reader);
81
+ if(eh == 0) return Qfalse;
82
+ if(eh == 1) return Qtrue;
83
+
84
+ return Qnil;
85
+ }
86
+
87
+ /*
88
+ * call-seq:
89
+ * value?
90
+ *
91
+ * Does this node have a text value?
92
+ */
93
+ static VALUE value_eh(VALUE self)
94
+ {
95
+ xmlTextReaderPtr reader;
96
+ int eh;
97
+
98
+ Data_Get_Struct(self, xmlTextReader, reader);
99
+ eh = xmlTextReaderHasValue(reader);
100
+ if(eh == 0) return Qfalse;
101
+ if(eh == 1) return Qtrue;
102
+
103
+ return Qnil;
104
+ }
105
+
106
+ /*
107
+ * call-seq:
108
+ * attributes?
109
+ *
110
+ * Does this node have attributes?
111
+ */
112
+ static VALUE attributes_eh(VALUE self)
113
+ {
114
+ xmlTextReaderPtr reader;
115
+ int eh;
116
+
117
+ Data_Get_Struct(self, xmlTextReader, reader);
118
+ eh = has_attributes(reader);
119
+ if(eh == 0) return Qfalse;
120
+ if(eh == 1) return Qtrue;
121
+
122
+ return Qnil;
123
+ }
124
+
125
+ /*
126
+ * call-seq:
127
+ * namespaces
128
+ *
129
+ * Get a hash of namespaces for this Node
130
+ */
131
+ static VALUE namespaces(VALUE self)
132
+ {
133
+ xmlTextReaderPtr reader;
134
+ xmlNodePtr ptr;
135
+ VALUE attr ;
136
+
137
+ Data_Get_Struct(self, xmlTextReader, reader);
138
+
139
+ attr = rb_hash_new() ;
140
+
141
+ if (! has_attributes(reader))
142
+ return attr ;
143
+
144
+ ptr = xmlTextReaderExpand(reader);
145
+ if(ptr == NULL) return Qnil;
146
+
147
+ Nokogiri_xml_node_namespaces(ptr, attr);
148
+
149
+ return attr ;
150
+ }
151
+
152
+ /*
153
+ * call-seq:
154
+ * attribute_nodes
155
+ *
156
+ * Get a list of attributes for this Node
157
+ */
158
+ static VALUE attribute_nodes(VALUE self)
159
+ {
160
+ xmlTextReaderPtr reader;
161
+ xmlNodePtr ptr;
162
+ VALUE attr ;
163
+
164
+ Data_Get_Struct(self, xmlTextReader, reader);
165
+
166
+ attr = rb_ary_new() ;
167
+
168
+ if (! has_attributes(reader))
169
+ return attr ;
170
+
171
+ ptr = xmlTextReaderExpand(reader);
172
+ if(ptr == NULL) return Qnil;
173
+
174
+ Nokogiri_xml_node_properties(ptr, attr);
175
+
176
+ return attr ;
177
+ }
178
+
179
+ /*
180
+ * call-seq:
181
+ * attribute_at(index)
182
+ *
183
+ * Get the value of attribute at +index+
184
+ */
185
+ static VALUE attribute_at(VALUE self, VALUE index)
186
+ {
187
+ xmlTextReaderPtr reader;
188
+ xmlChar *value;
189
+ VALUE rb_value;
190
+
191
+ Data_Get_Struct(self, xmlTextReader, reader);
192
+
193
+ if(NIL_P(index)) return Qnil;
194
+ index = rb_Integer(index);
195
+
196
+ value = xmlTextReaderGetAttributeNo(
197
+ reader,
198
+ (int)NUM2INT(index)
199
+ );
200
+ if(value == NULL) return Qnil;
201
+
202
+ rb_value = NOKOGIRI_STR_NEW2(value);
203
+ xmlFree(value);
204
+ return rb_value;
205
+ }
206
+
207
+ /*
208
+ * call-seq:
209
+ * attribute(name)
210
+ *
211
+ * Get the value of attribute named +name+
212
+ */
213
+ static VALUE reader_attribute(VALUE self, VALUE name)
214
+ {
215
+ xmlTextReaderPtr reader;
216
+ xmlChar *value ;
217
+ VALUE rb_value;
218
+
219
+ Data_Get_Struct(self, xmlTextReader, reader);
220
+
221
+ if(NIL_P(name)) return Qnil;
222
+ name = StringValue(name) ;
223
+
224
+ value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValuePtr(name));
225
+ if(value == NULL) {
226
+ /* this section is an attempt to workaround older versions of libxml that
227
+ don't handle namespaces properly in all attribute-and-friends functions */
228
+ xmlChar *prefix = NULL ;
229
+ xmlChar *localname = xmlSplitQName2((xmlChar*)StringValuePtr(name), &prefix);
230
+ if (localname != NULL) {
231
+ value = xmlTextReaderLookupNamespace(reader, localname);
232
+ xmlFree(localname) ;
233
+ } else {
234
+ value = xmlTextReaderLookupNamespace(reader, prefix);
235
+ }
236
+ xmlFree(prefix);
237
+ }
238
+ if(value == NULL) return Qnil;
239
+
240
+ rb_value = NOKOGIRI_STR_NEW2(value);
241
+ xmlFree(value);
242
+ return rb_value;
243
+ }
244
+
245
+ /*
246
+ * call-seq:
247
+ * attribute_count
248
+ *
249
+ * Get the number of attributes for the current node
250
+ */
251
+ static VALUE attribute_count(VALUE self)
252
+ {
253
+ xmlTextReaderPtr reader;
254
+ int count;
255
+
256
+ Data_Get_Struct(self, xmlTextReader, reader);
257
+ count = xmlTextReaderAttributeCount(reader);
258
+ if(count == -1) return Qnil;
259
+
260
+ return INT2NUM((long)count);
261
+ }
262
+
263
+ /*
264
+ * call-seq:
265
+ * depth
266
+ *
267
+ * Get the depth of the node
268
+ */
269
+ static VALUE depth(VALUE self)
270
+ {
271
+ xmlTextReaderPtr reader;
272
+ int depth;
273
+
274
+ Data_Get_Struct(self, xmlTextReader, reader);
275
+ depth = xmlTextReaderDepth(reader);
276
+ if(depth == -1) return Qnil;
277
+
278
+ return INT2NUM((long)depth);
279
+ }
280
+
281
+ /*
282
+ * call-seq:
283
+ * xml_version
284
+ *
285
+ * Get the XML version of the document being read
286
+ */
287
+ static VALUE xml_version(VALUE self)
288
+ {
289
+ xmlTextReaderPtr reader;
290
+ const char *version;
291
+
292
+ Data_Get_Struct(self, xmlTextReader, reader);
293
+ version = (const char *)xmlTextReaderConstXmlVersion(reader);
294
+ if(version == NULL) return Qnil;
295
+
296
+ return NOKOGIRI_STR_NEW2(version);
297
+ }
298
+
299
+ /*
300
+ * call-seq:
301
+ * lang
302
+ *
303
+ * Get the xml:lang scope within which the node resides.
304
+ */
305
+ static VALUE lang(VALUE self)
306
+ {
307
+ xmlTextReaderPtr reader;
308
+ const char *lang;
309
+
310
+ Data_Get_Struct(self, xmlTextReader, reader);
311
+ lang = (const char *)xmlTextReaderConstXmlLang(reader);
312
+ if(lang == NULL) return Qnil;
313
+
314
+ return NOKOGIRI_STR_NEW2(lang);
315
+ }
316
+
317
+ /*
318
+ * call-seq:
319
+ * value
320
+ *
321
+ * Get the text value of the node if present. Returns a utf-8 encoded string.
322
+ */
323
+ static VALUE value(VALUE self)
324
+ {
325
+ xmlTextReaderPtr reader;
326
+ const char *value;
327
+
328
+ Data_Get_Struct(self, xmlTextReader, reader);
329
+ value = (const char *)xmlTextReaderConstValue(reader);
330
+ if(value == NULL) return Qnil;
331
+
332
+ return NOKOGIRI_STR_NEW2(value);
333
+ }
334
+
335
+ /*
336
+ * call-seq:
337
+ * prefix
338
+ *
339
+ * Get the shorthand reference to the namespace associated with the node.
340
+ */
341
+ static VALUE prefix(VALUE self)
342
+ {
343
+ xmlTextReaderPtr reader;
344
+ const char *prefix;
345
+
346
+ Data_Get_Struct(self, xmlTextReader, reader);
347
+ prefix = (const char *)xmlTextReaderConstPrefix(reader);
348
+ if(prefix == NULL) return Qnil;
349
+
350
+ return NOKOGIRI_STR_NEW2(prefix);
351
+ }
352
+
353
+ /*
354
+ * call-seq:
355
+ * namespace_uri
356
+ *
357
+ * Get the URI defining the namespace associated with the node
358
+ */
359
+ static VALUE namespace_uri(VALUE self)
360
+ {
361
+ xmlTextReaderPtr reader;
362
+ const char *uri;
363
+
364
+ Data_Get_Struct(self, xmlTextReader, reader);
365
+ uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
366
+ if(uri == NULL) return Qnil;
367
+
368
+ return NOKOGIRI_STR_NEW2(uri);
369
+ }
370
+
371
+ /*
372
+ * call-seq:
373
+ * local_name
374
+ *
375
+ * Get the local name of the node
376
+ */
377
+ static VALUE local_name(VALUE self)
378
+ {
379
+ xmlTextReaderPtr reader;
380
+ const char *name;
381
+
382
+ Data_Get_Struct(self, xmlTextReader, reader);
383
+ name = (const char *)xmlTextReaderConstLocalName(reader);
384
+ if(name == NULL) return Qnil;
385
+
386
+ return NOKOGIRI_STR_NEW2(name);
387
+ }
388
+
389
+ /*
390
+ * call-seq:
391
+ * name
392
+ *
393
+ * Get the name of the node. Returns a utf-8 encoded string.
394
+ */
395
+ static VALUE name(VALUE self)
396
+ {
397
+ xmlTextReaderPtr reader;
398
+ const char *name;
399
+
400
+ Data_Get_Struct(self, xmlTextReader, reader);
401
+ name = (const char *)xmlTextReaderConstName(reader);
402
+ if(name == NULL) return Qnil;
403
+
404
+ return NOKOGIRI_STR_NEW2(name);
405
+ }
406
+
407
+ /*
408
+ * call-seq:
409
+ * base_uri
410
+ *
411
+ * Get the xml:base of the node
412
+ */
413
+ static VALUE base_uri(VALUE self)
414
+ {
415
+ xmlTextReaderPtr reader;
416
+ const char * base_uri;
417
+
418
+ Data_Get_Struct(self, xmlTextReader, reader);
419
+ base_uri = (const char *)xmlTextReaderBaseUri(reader);
420
+ if (base_uri == NULL) return Qnil;
421
+
422
+ return NOKOGIRI_STR_NEW2(base_uri);
423
+ }
424
+
425
+ /*
426
+ * call-seq:
427
+ * state
428
+ *
429
+ * Get the state of the reader
430
+ */
431
+ static VALUE state(VALUE self)
432
+ {
433
+ xmlTextReaderPtr reader;
434
+ Data_Get_Struct(self, xmlTextReader, reader);
435
+ return INT2NUM((long)xmlTextReaderReadState(reader));
436
+ }
437
+
438
+ /*
439
+ * call-seq:
440
+ * node_type
441
+ *
442
+ * Get the type of readers current node
443
+ */
444
+ static VALUE node_type(VALUE self)
445
+ {
446
+ xmlTextReaderPtr reader;
447
+ Data_Get_Struct(self, xmlTextReader, reader);
448
+ return INT2NUM((long)xmlTextReaderNodeType(reader));
449
+ }
450
+
451
+ /*
452
+ * call-seq:
453
+ * read
454
+ *
455
+ * Move the Reader forward through the XML document.
456
+ */
457
+ static VALUE read_more(VALUE self)
458
+ {
459
+ xmlTextReaderPtr reader;
460
+ xmlErrorPtr error;
461
+ VALUE error_list;
462
+ int ret;
463
+
464
+ Data_Get_Struct(self, xmlTextReader, reader);
465
+
466
+ error_list = rb_funcall(self, rb_intern("errors"), 0);
467
+
468
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
469
+ ret = xmlTextReaderRead(reader);
470
+ xmlSetStructuredErrorFunc(NULL, NULL);
471
+
472
+ if(ret == 1) return self;
473
+ if(ret == 0) return Qnil;
474
+
475
+ error = xmlGetLastError();
476
+ if(error)
477
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
478
+ else
479
+ rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
480
+
481
+ return Qnil;
482
+ }
483
+
484
+ /*
485
+ * call-seq:
486
+ * inner_xml
487
+ *
488
+ * Read the contents of the current node, including child nodes and markup.
489
+ * Returns a utf-8 encoded string.
490
+ */
491
+ static VALUE inner_xml(VALUE self)
492
+ {
493
+ xmlTextReaderPtr reader;
494
+ xmlChar* value;
495
+ VALUE str;
496
+
497
+ Data_Get_Struct(self, xmlTextReader, reader);
498
+
499
+ value = xmlTextReaderReadInnerXml(reader);
500
+
501
+ str = Qnil;
502
+ if(value) {
503
+ str = NOKOGIRI_STR_NEW2((char*)value);
504
+ xmlFree(value);
505
+ }
506
+
507
+ return str;
508
+ }
509
+
510
+ /*
511
+ * call-seq:
512
+ * outer_xml
513
+ *
514
+ * Read the current node and its contents, including child nodes and markup.
515
+ * Returns a utf-8 encoded string.
516
+ */
517
+ static VALUE outer_xml(VALUE self)
518
+ {
519
+ xmlTextReaderPtr reader;
520
+ xmlChar *value;
521
+ VALUE str = Qnil;
522
+
523
+ Data_Get_Struct(self, xmlTextReader, reader);
524
+
525
+ value = xmlTextReaderReadOuterXml(reader);
526
+
527
+ if(value) {
528
+ str = NOKOGIRI_STR_NEW2((char*)value);
529
+ xmlFree(value);
530
+ }
531
+ return str;
532
+ }
533
+
534
+ /*
535
+ * call-seq:
536
+ * from_memory(string, url = nil, encoding = nil, options = 0)
537
+ *
538
+ * Create a new reader that parses +string+
539
+ */
540
+ static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
541
+ {
542
+ VALUE rb_buffer, rb_url, encoding, rb_options;
543
+ xmlTextReaderPtr reader;
544
+ const char * c_url = NULL;
545
+ const char * c_encoding = NULL;
546
+ int c_options = 0;
547
+ VALUE rb_reader, args[3];
548
+
549
+ rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
550
+
551
+ if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil");
552
+ if (RTEST(rb_url)) c_url = StringValuePtr(rb_url);
553
+ if (RTEST(encoding)) c_encoding = StringValuePtr(encoding);
554
+ if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
555
+
556
+ reader = xmlReaderForMemory(
557
+ StringValuePtr(rb_buffer),
558
+ (int)RSTRING_LEN(rb_buffer),
559
+ c_url,
560
+ c_encoding,
561
+ c_options
562
+ );
563
+
564
+ if(reader == NULL) {
565
+ xmlFreeTextReader(reader);
566
+ rb_raise(rb_eRuntimeError, "couldn't create a parser");
567
+ }
568
+
569
+ rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
570
+ args[0] = rb_buffer;
571
+ args[1] = rb_url;
572
+ args[2] = encoding;
573
+ rb_obj_call_init(rb_reader, 3, args);
574
+
575
+ return rb_reader;
576
+ }
577
+
578
+ /*
579
+ * call-seq:
580
+ * from_io(io, url = nil, encoding = nil, options = 0)
581
+ *
582
+ * Create a new reader that parses +io+
583
+ */
584
+ static VALUE from_io(int argc, VALUE *argv, VALUE klass)
585
+ {
586
+ VALUE rb_io, rb_url, encoding, rb_options;
587
+ xmlTextReaderPtr reader;
588
+ const char * c_url = NULL;
589
+ const char * c_encoding = NULL;
590
+ int c_options = 0;
591
+ VALUE rb_reader, args[3];
592
+
593
+ rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
594
+
595
+ if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil");
596
+ if (RTEST(rb_url)) c_url = StringValuePtr(rb_url);
597
+ if (RTEST(encoding)) c_encoding = StringValuePtr(encoding);
598
+ if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
599
+
600
+ reader = xmlReaderForIO(
601
+ (xmlInputReadCallback)io_read_callback,
602
+ (xmlInputCloseCallback)io_close_callback,
603
+ (void *)rb_io,
604
+ c_url,
605
+ c_encoding,
606
+ c_options
607
+ );
608
+
609
+ if(reader == NULL) {
610
+ xmlFreeTextReader(reader);
611
+ rb_raise(rb_eRuntimeError, "couldn't create a parser");
612
+ }
613
+
614
+ rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
615
+ args[0] = rb_io;
616
+ args[1] = rb_url;
617
+ args[2] = encoding;
618
+ rb_obj_call_init(rb_reader, 3, args);
619
+
620
+ return rb_reader;
621
+ }
622
+
623
+ VALUE cNokogiriXmlReader;
624
+
625
+ void init_xml_reader()
626
+ {
627
+ VALUE module = rb_define_module("Nokogiri");
628
+ VALUE xml = rb_define_module_under(module, "XML");
629
+
630
+ /*
631
+ * The Reader parser allows you to effectively pull parse an XML document.
632
+ * Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
633
+ * node. Note that you may only iterate over the document once!
634
+ */
635
+ VALUE klass = rb_define_class_under(xml, "Reader", rb_cObject);
636
+
637
+ cNokogiriXmlReader = klass;
638
+
639
+ rb_define_singleton_method(klass, "from_memory", from_memory, -1);
640
+ rb_define_singleton_method(klass, "from_io", from_io, -1);
641
+
642
+ rb_define_method(klass, "read", read_more, 0);
643
+ rb_define_method(klass, "inner_xml", inner_xml, 0);
644
+ rb_define_method(klass, "outer_xml", outer_xml, 0);
645
+ rb_define_method(klass, "state", state, 0);
646
+ rb_define_method(klass, "node_type", node_type, 0);
647
+ rb_define_method(klass, "name", name, 0);
648
+ rb_define_method(klass, "local_name", local_name, 0);
649
+ rb_define_method(klass, "namespace_uri", namespace_uri, 0);
650
+ rb_define_method(klass, "prefix", prefix, 0);
651
+ rb_define_method(klass, "value", value, 0);
652
+ rb_define_method(klass, "lang", lang, 0);
653
+ rb_define_method(klass, "xml_version", xml_version, 0);
654
+ rb_define_method(klass, "depth", depth, 0);
655
+ rb_define_method(klass, "attribute_count", attribute_count, 0);
656
+ rb_define_method(klass, "attribute", reader_attribute, 1);
657
+ rb_define_method(klass, "namespaces", namespaces, 0);
658
+ rb_define_method(klass, "attribute_at", attribute_at, 1);
659
+ rb_define_method(klass, "attributes?", attributes_eh, 0);
660
+ rb_define_method(klass, "value?", value_eh, 0);
661
+ rb_define_method(klass, "default?", default_eh, 0);
662
+ rb_define_method(klass, "base_uri", base_uri, 0);
663
+
664
+ rb_define_private_method(klass, "attr_nodes", attribute_nodes, 0);
665
+ }