nokogiri 1.2.3-x86-mswin32-60 → 1.4.5-x86-mswin32-60

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (319) hide show
  1. data/.autotest +18 -7
  2. data/.gemtest +0 -0
  3. data/CHANGELOG.ja.rdoc +297 -3
  4. data/CHANGELOG.rdoc +289 -0
  5. data/Manifest.txt +148 -37
  6. data/README.ja.rdoc +20 -20
  7. data/README.rdoc +53 -22
  8. data/Rakefile +127 -211
  9. data/bin/nokogiri +54 -0
  10. data/ext/nokogiri/depend +358 -0
  11. data/ext/nokogiri/extconf.rb +89 -54
  12. data/ext/nokogiri/html_document.c +34 -27
  13. data/ext/nokogiri/html_document.h +1 -1
  14. data/ext/nokogiri/html_element_description.c +276 -0
  15. data/ext/nokogiri/html_element_description.h +10 -0
  16. data/ext/nokogiri/html_entity_lookup.c +7 -5
  17. data/ext/nokogiri/html_entity_lookup.h +1 -1
  18. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  19. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  20. data/ext/nokogiri/{native.c → nokogiri.c} +31 -7
  21. data/ext/nokogiri/{native.h → nokogiri.h} +68 -41
  22. data/ext/nokogiri/xml_attr.c +20 -9
  23. data/ext/nokogiri/xml_attr.h +1 -1
  24. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  25. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  26. data/ext/nokogiri/xml_cdata.c +21 -9
  27. data/ext/nokogiri/xml_cdata.h +1 -1
  28. data/ext/nokogiri/xml_comment.c +18 -6
  29. data/ext/nokogiri/xml_comment.h +1 -1
  30. data/ext/nokogiri/xml_document.c +247 -68
  31. data/ext/nokogiri/xml_document.h +5 -3
  32. data/ext/nokogiri/xml_document_fragment.c +15 -7
  33. data/ext/nokogiri/xml_document_fragment.h +1 -1
  34. data/ext/nokogiri/xml_dtd.c +110 -10
  35. data/ext/nokogiri/xml_dtd.h +3 -1
  36. data/ext/nokogiri/xml_element_content.c +123 -0
  37. data/ext/nokogiri/xml_element_content.h +10 -0
  38. data/ext/nokogiri/xml_element_decl.c +69 -0
  39. data/ext/nokogiri/xml_element_decl.h +9 -0
  40. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  41. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  42. data/ext/nokogiri/xml_entity_decl.c +110 -0
  43. data/ext/nokogiri/xml_entity_decl.h +10 -0
  44. data/ext/nokogiri/xml_entity_reference.c +16 -5
  45. data/ext/nokogiri/xml_entity_reference.h +1 -1
  46. data/ext/nokogiri/xml_io.c +40 -8
  47. data/ext/nokogiri/xml_io.h +2 -1
  48. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  49. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  50. data/ext/nokogiri/xml_namespace.c +84 -0
  51. data/ext/nokogiri/xml_namespace.h +13 -0
  52. data/ext/nokogiri/xml_node.c +782 -225
  53. data/ext/nokogiri/xml_node.h +2 -4
  54. data/ext/nokogiri/xml_node_set.c +253 -34
  55. data/ext/nokogiri/xml_node_set.h +2 -2
  56. data/ext/nokogiri/xml_processing_instruction.c +17 -5
  57. data/ext/nokogiri/xml_processing_instruction.h +1 -1
  58. data/ext/nokogiri/xml_reader.c +277 -85
  59. data/ext/nokogiri/xml_reader.h +1 -1
  60. data/ext/nokogiri/xml_relax_ng.c +168 -0
  61. data/ext/nokogiri/xml_relax_ng.h +9 -0
  62. data/ext/nokogiri/xml_sax_parser.c +183 -111
  63. data/ext/nokogiri/xml_sax_parser.h +30 -1
  64. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  65. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  66. data/ext/nokogiri/xml_sax_push_parser.c +42 -12
  67. data/ext/nokogiri/xml_sax_push_parser.h +1 -1
  68. data/ext/nokogiri/xml_schema.c +205 -0
  69. data/ext/nokogiri/xml_schema.h +9 -0
  70. data/ext/nokogiri/xml_syntax_error.c +28 -173
  71. data/ext/nokogiri/xml_syntax_error.h +2 -1
  72. data/ext/nokogiri/xml_text.c +16 -6
  73. data/ext/nokogiri/xml_text.h +1 -1
  74. data/ext/nokogiri/xml_xpath_context.c +104 -47
  75. data/ext/nokogiri/xml_xpath_context.h +1 -1
  76. data/ext/nokogiri/xslt_stylesheet.c +161 -19
  77. data/ext/nokogiri/xslt_stylesheet.h +1 -1
  78. data/lib/nokogiri.rb +47 -8
  79. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  80. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  81. data/lib/nokogiri/css.rb +6 -3
  82. data/lib/nokogiri/css/node.rb +14 -12
  83. data/lib/nokogiri/css/parser.rb +665 -62
  84. data/lib/nokogiri/css/parser.y +20 -10
  85. data/lib/nokogiri/css/parser_extras.rb +91 -0
  86. data/lib/nokogiri/css/tokenizer.rb +148 -5
  87. data/lib/nokogiri/css/tokenizer.rex +10 -9
  88. data/lib/nokogiri/css/xpath_visitor.rb +47 -44
  89. data/lib/nokogiri/decorators/slop.rb +8 -4
  90. data/lib/nokogiri/ffi/encoding_handler.rb +42 -0
  91. data/lib/nokogiri/ffi/html/document.rb +28 -0
  92. data/lib/nokogiri/ffi/html/element_description.rb +81 -0
  93. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  94. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  95. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  96. data/lib/nokogiri/ffi/libxml.rb +420 -0
  97. data/lib/nokogiri/ffi/structs/common_node.rb +38 -0
  98. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  99. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  100. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  101. data/lib/nokogiri/ffi/structs/xml_attr.rb +20 -0
  102. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  103. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  104. data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +11 -0
  105. data/lib/nokogiri/ffi/structs/xml_document.rb +117 -0
  106. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  107. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  108. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  109. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  110. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  111. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  112. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  113. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  114. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  115. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +20 -0
  116. data/lib/nokogiri/ffi/structs/xml_parser_input.rb +19 -0
  117. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  118. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  119. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +124 -0
  120. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  121. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  122. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  123. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +38 -0
  124. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  125. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  126. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  127. data/lib/nokogiri/ffi/weak_bucket.rb +40 -0
  128. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  129. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  130. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  131. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  132. data/lib/nokogiri/ffi/xml/document.rb +174 -0
  133. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  134. data/lib/nokogiri/ffi/xml/dtd.rb +67 -0
  135. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  136. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  137. data/lib/nokogiri/ffi/xml/entity_decl.rb +36 -0
  138. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  139. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  140. data/lib/nokogiri/ffi/xml/node.rb +559 -0
  141. data/lib/nokogiri/ffi/xml/node_set.rb +150 -0
  142. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  143. data/lib/nokogiri/ffi/xml/reader.rb +236 -0
  144. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  145. data/lib/nokogiri/ffi/xml/sax/parser.rb +143 -0
  146. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +79 -0
  147. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +51 -0
  148. data/lib/nokogiri/ffi/xml/schema.rb +109 -0
  149. data/lib/nokogiri/ffi/xml/syntax_error.rb +98 -0
  150. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  151. data/lib/nokogiri/ffi/xml/xpath.rb +9 -0
  152. data/lib/nokogiri/ffi/xml/xpath_context.rb +153 -0
  153. data/lib/nokogiri/ffi/xslt/stylesheet.rb +77 -0
  154. data/lib/nokogiri/html.rb +13 -47
  155. data/lib/nokogiri/html/builder.rb +27 -1
  156. data/lib/nokogiri/html/document.rb +201 -7
  157. data/lib/nokogiri/html/document_fragment.rb +41 -0
  158. data/lib/nokogiri/html/element_description.rb +23 -0
  159. data/lib/nokogiri/html/entity_lookup.rb +2 -0
  160. data/lib/nokogiri/html/sax/parser.rb +34 -3
  161. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  162. data/lib/nokogiri/nokogiri.rb +1 -0
  163. data/lib/nokogiri/version.rb +40 -1
  164. data/lib/nokogiri/version_warning.rb +14 -0
  165. data/lib/nokogiri/xml.rb +32 -53
  166. data/lib/nokogiri/xml/attr.rb +5 -0
  167. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  168. data/lib/nokogiri/xml/builder.rb +349 -29
  169. data/lib/nokogiri/xml/cdata.rb +3 -1
  170. data/lib/nokogiri/xml/character_data.rb +7 -0
  171. data/lib/nokogiri/xml/document.rb +166 -14
  172. data/lib/nokogiri/xml/document_fragment.rb +76 -1
  173. data/lib/nokogiri/xml/dtd.rb +16 -3
  174. data/lib/nokogiri/xml/element_content.rb +36 -0
  175. data/lib/nokogiri/xml/element_decl.rb +13 -0
  176. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  177. data/lib/nokogiri/xml/namespace.rb +13 -0
  178. data/lib/nokogiri/xml/node.rb +561 -166
  179. data/lib/nokogiri/xml/node/save_options.rb +22 -2
  180. data/lib/nokogiri/xml/node_set.rb +202 -40
  181. data/lib/nokogiri/xml/parse_options.rb +93 -0
  182. data/lib/nokogiri/xml/pp.rb +2 -0
  183. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  184. data/lib/nokogiri/xml/pp/node.rb +56 -0
  185. data/lib/nokogiri/xml/processing_instruction.rb +2 -0
  186. data/lib/nokogiri/xml/reader.rb +93 -8
  187. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  188. data/lib/nokogiri/xml/sax.rb +1 -7
  189. data/lib/nokogiri/xml/sax/document.rb +107 -2
  190. data/lib/nokogiri/xml/sax/parser.rb +57 -7
  191. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  192. data/lib/nokogiri/xml/sax/push_parser.rb +13 -1
  193. data/lib/nokogiri/xml/schema.rb +63 -0
  194. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  195. data/lib/nokogiri/xml/text.rb +4 -1
  196. data/lib/nokogiri/xml/xpath.rb +1 -1
  197. data/lib/nokogiri/xml/xpath/syntax_error.rb +3 -0
  198. data/lib/nokogiri/xml/xpath_context.rb +2 -0
  199. data/lib/nokogiri/xslt.rb +26 -2
  200. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  201. data/lib/xsd/xmlparser/nokogiri.rb +45 -9
  202. data/tasks/cross_compile.rb +173 -0
  203. data/tasks/test.rb +25 -69
  204. data/test/css/test_nthiness.rb +3 -4
  205. data/test/css/test_parser.rb +75 -20
  206. data/test/css/test_tokenizer.rb +23 -1
  207. data/test/css/test_xpath_visitor.rb +10 -1
  208. data/test/decorators/test_slop.rb +16 -0
  209. data/test/ffi/test_document.rb +35 -0
  210. data/test/files/2ch.html +108 -0
  211. data/test/files/address_book.rlx +12 -0
  212. data/test/files/address_book.xml +10 -0
  213. data/test/files/bar/bar.xsd +4 -0
  214. data/test/files/encoding.html +82 -0
  215. data/test/files/encoding.xhtml +84 -0
  216. data/test/files/foo/foo.xsd +4 -0
  217. data/test/files/po.xml +32 -0
  218. data/test/files/po.xsd +66 -0
  219. data/test/files/shift_jis.html +10 -0
  220. data/test/files/shift_jis.xml +5 -0
  221. data/test/files/snuggles.xml +3 -0
  222. data/test/files/staff.dtd +10 -0
  223. data/test/files/valid_bar.xml +2 -0
  224. data/test/helper.rb +101 -23
  225. data/test/html/sax/test_parser.rb +81 -2
  226. data/test/html/sax/test_parser_context.rb +48 -0
  227. data/test/html/test_builder.rb +39 -8
  228. data/test/html/test_document.rb +186 -23
  229. data/test/html/test_document_encoding.rb +78 -1
  230. data/test/html/test_document_fragment.rb +253 -0
  231. data/test/html/test_element_description.rb +98 -0
  232. data/test/html/test_named_characters.rb +1 -1
  233. data/test/html/test_node.rb +124 -36
  234. data/test/html/test_node_encoding.rb +27 -0
  235. data/test/test_convert_xpath.rb +1 -52
  236. data/test/test_css_cache.rb +2 -13
  237. data/test/test_encoding_handler.rb +46 -0
  238. data/test/test_memory_leak.rb +88 -19
  239. data/test/test_nokogiri.rb +38 -5
  240. data/test/test_reader.rb +188 -6
  241. data/test/test_soap4r_sax.rb +52 -0
  242. data/test/test_xslt_transforms.rb +183 -83
  243. data/test/xml/node/test_save_options.rb +1 -1
  244. data/test/xml/node/test_subclass.rb +44 -0
  245. data/test/xml/sax/test_parser.rb +175 -4
  246. data/test/xml/sax/test_parser_context.rb +113 -0
  247. data/test/xml/sax/test_push_parser.rb +90 -2
  248. data/test/xml/test_attr.rb +35 -1
  249. data/test/xml/test_attribute_decl.rb +82 -0
  250. data/test/xml/test_builder.rb +186 -1
  251. data/test/xml/test_cdata.rb +32 -1
  252. data/test/xml/test_comment.rb +13 -1
  253. data/test/xml/test_document.rb +415 -43
  254. data/test/xml/test_document_encoding.rb +1 -1
  255. data/test/xml/test_document_fragment.rb +173 -5
  256. data/test/xml/test_dtd.rb +61 -6
  257. data/test/xml/test_dtd_encoding.rb +3 -1
  258. data/test/xml/test_element_content.rb +56 -0
  259. data/test/xml/test_element_decl.rb +73 -0
  260. data/test/xml/test_entity_decl.rb +120 -0
  261. data/test/xml/test_entity_reference.rb +5 -1
  262. data/test/xml/test_namespace.rb +68 -0
  263. data/test/xml/test_node.rb +546 -201
  264. data/test/xml/test_node_attributes.rb +34 -0
  265. data/test/xml/test_node_encoding.rb +33 -3
  266. data/test/xml/test_node_reparenting.rb +321 -0
  267. data/test/xml/test_node_set.rb +538 -2
  268. data/test/xml/test_parse_options.rb +52 -0
  269. data/test/xml/test_processing_instruction.rb +6 -1
  270. data/test/xml/test_reader_encoding.rb +1 -1
  271. data/test/xml/test_relax_ng.rb +60 -0
  272. data/test/xml/test_schema.rb +94 -0
  273. data/test/xml/test_syntax_error.rb +12 -0
  274. data/test/xml/test_text.rb +35 -1
  275. data/test/xml/test_unparented_node.rb +5 -5
  276. data/test/xml/test_xpath.rb +142 -11
  277. data/test/xslt/test_custom_functions.rb +94 -0
  278. metadata +328 -92
  279. data/ext/nokogiri/html_sax_parser.c +0 -57
  280. data/ext/nokogiri/html_sax_parser.h +0 -11
  281. data/ext/nokogiri/iconv.dll +0 -0
  282. data/ext/nokogiri/libexslt.dll +0 -0
  283. data/ext/nokogiri/libxml2.dll +0 -0
  284. data/ext/nokogiri/libxslt.dll +0 -0
  285. data/ext/nokogiri/native.so +0 -0
  286. data/ext/nokogiri/xml_xpath.c +0 -53
  287. data/ext/nokogiri/xml_xpath.h +0 -11
  288. data/ext/nokogiri/zlib1.dll +0 -0
  289. data/lib/action-nokogiri.rb +0 -30
  290. data/lib/nokogiri/css/generated_parser.rb +0 -713
  291. data/lib/nokogiri/css/generated_tokenizer.rb +0 -144
  292. data/lib/nokogiri/decorators.rb +0 -2
  293. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  294. data/lib/nokogiri/decorators/hpricot/node.rb +0 -56
  295. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -54
  296. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -28
  297. data/lib/nokogiri/hpricot.rb +0 -51
  298. data/lib/nokogiri/xml/comment.rb +0 -6
  299. data/lib/nokogiri/xml/element.rb +0 -6
  300. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  301. data/lib/nokogiri/xml/fragment_handler.rb +0 -34
  302. data/test/hpricot/files/basic.xhtml +0 -17
  303. data/test/hpricot/files/boingboing.html +0 -2266
  304. data/test/hpricot/files/cy0.html +0 -3653
  305. data/test/hpricot/files/immob.html +0 -400
  306. data/test/hpricot/files/pace_application.html +0 -1320
  307. data/test/hpricot/files/tenderlove.html +0 -16
  308. data/test/hpricot/files/uswebgen.html +0 -220
  309. data/test/hpricot/files/utf8.html +0 -1054
  310. data/test/hpricot/files/week9.html +0 -1723
  311. data/test/hpricot/files/why.xml +0 -19
  312. data/test/hpricot/load_files.rb +0 -11
  313. data/test/hpricot/test_alter.rb +0 -68
  314. data/test/hpricot/test_builder.rb +0 -20
  315. data/test/hpricot/test_parser.rb +0 -426
  316. data/test/hpricot/test_paths.rb +0 -15
  317. data/test/hpricot/test_preserved.rb +0 -77
  318. data/test/hpricot/test_xml.rb +0 -30
  319. data/test/test_gc.rb +0 -15
@@ -25,6 +25,46 @@ static int has_attributes(xmlTextReaderPtr reader)
25
25
  return(0);
26
26
  }
27
27
 
28
+ #define XMLNS_PREFIX "xmlns"
29
+ #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
30
+ #define XMLNS_BUFFER_LEN 128
31
+ static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
32
+ {
33
+ xmlNsPtr ns;
34
+ static char buffer[XMLNS_BUFFER_LEN] ;
35
+ char *key ;
36
+ size_t keylen ;
37
+
38
+ if (node->type != XML_ELEMENT_NODE) return ;
39
+
40
+ ns = node->nsDef;
41
+ while (ns != NULL) {
42
+
43
+ keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ;
44
+ if (keylen > XMLNS_BUFFER_LEN) {
45
+ key = (char*)malloc(keylen) ;
46
+ } else {
47
+ key = buffer ;
48
+ }
49
+
50
+ if (ns->prefix) {
51
+ sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix);
52
+ } else {
53
+ sprintf(key, "%s", XMLNS_PREFIX);
54
+ }
55
+
56
+ rb_hash_aset(attr_hash,
57
+ NOKOGIRI_STR_NEW2(key),
58
+ (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
59
+ );
60
+ if (key != buffer) {
61
+ free(key);
62
+ }
63
+ ns = ns->next ;
64
+ }
65
+ }
66
+
67
+
28
68
  /*
29
69
  * call-seq:
30
70
  * default?
@@ -34,8 +74,10 @@ static int has_attributes(xmlTextReaderPtr reader)
34
74
  static VALUE default_eh(VALUE self)
35
75
  {
36
76
  xmlTextReaderPtr reader;
77
+ int eh;
78
+
37
79
  Data_Get_Struct(self, xmlTextReader, reader);
38
- int eh = xmlTextReaderIsDefault(reader);
80
+ eh = xmlTextReaderIsDefault(reader);
39
81
  if(eh == 0) return Qfalse;
40
82
  if(eh == 1) return Qtrue;
41
83
 
@@ -51,8 +93,10 @@ static VALUE default_eh(VALUE self)
51
93
  static VALUE value_eh(VALUE self)
52
94
  {
53
95
  xmlTextReaderPtr reader;
96
+ int eh;
97
+
54
98
  Data_Get_Struct(self, xmlTextReader, reader);
55
- int eh = xmlTextReaderHasValue(reader);
99
+ eh = xmlTextReaderHasValue(reader);
56
100
  if(eh == 0) return Qfalse;
57
101
  if(eh == 1) return Qtrue;
58
102
 
@@ -68,8 +112,10 @@ static VALUE value_eh(VALUE self)
68
112
  static VALUE attributes_eh(VALUE self)
69
113
  {
70
114
  xmlTextReaderPtr reader;
115
+ int eh;
116
+
71
117
  Data_Get_Struct(self, xmlTextReader, reader);
72
- int eh = has_attributes(reader);
118
+ eh = has_attributes(reader);
73
119
  if(eh == 0) return Qfalse;
74
120
  if(eh == 1) return Qtrue;
75
121
 
@@ -85,6 +131,7 @@ static VALUE attributes_eh(VALUE self)
85
131
  static VALUE namespaces(VALUE self)
86
132
  {
87
133
  xmlTextReaderPtr reader;
134
+ xmlNodePtr ptr;
88
135
  VALUE attr ;
89
136
 
90
137
  Data_Get_Struct(self, xmlTextReader, reader);
@@ -94,7 +141,7 @@ static VALUE namespaces(VALUE self)
94
141
  if (! has_attributes(reader))
95
142
  return attr ;
96
143
 
97
- xmlNodePtr ptr = xmlTextReaderExpand(reader);
144
+ ptr = xmlTextReaderExpand(reader);
98
145
  if(ptr == NULL) return Qnil;
99
146
 
100
147
  Nokogiri_xml_node_namespaces(ptr, attr);
@@ -111,6 +158,7 @@ static VALUE namespaces(VALUE self)
111
158
  static VALUE attribute_nodes(VALUE self)
112
159
  {
113
160
  xmlTextReaderPtr reader;
161
+ xmlNodePtr ptr;
114
162
  VALUE attr ;
115
163
 
116
164
  Data_Get_Struct(self, xmlTextReader, reader);
@@ -120,30 +168,9 @@ static VALUE attribute_nodes(VALUE self)
120
168
  if (! has_attributes(reader))
121
169
  return attr ;
122
170
 
123
- xmlNodePtr ptr = xmlTextReaderExpand(reader);
171
+ ptr = xmlTextReaderExpand(reader);
124
172
  if(ptr == NULL) return Qnil;
125
173
 
126
- // FIXME I'm not sure if this is correct..... I don't really like pointing
127
- // at this document, but I have to because of the assertions in
128
- // the node wrapping code.
129
- if(! DOC_RUBY_OBJECT_TEST(ptr->doc)) {
130
- VALUE rb_doc = Data_Wrap_Struct(cNokogiriXmlDocument, 0, 0, ptr->doc);
131
- ptr->doc->_private = malloc(sizeof(nokogiriTuple));
132
- rb_iv_set(rb_doc, "@decorators", Qnil);
133
- ((nokogiriTuplePtr)(ptr->doc->_private))->doc = (void *)rb_doc;
134
- ((nokogiriTuplePtr)(ptr->doc->_private))->unlinkedNodes = xmlXPathNodeSetCreate(NULL);
135
- }
136
- VALUE enc = rb_iv_get(self, "@encoding");
137
-
138
- if(enc != Qnil && NULL == ptr->doc->encoding) {
139
- ptr->doc->encoding = calloc((size_t)RSTRING_LEN(enc), sizeof(char));
140
- strncpy(
141
- (char *)ptr->doc->encoding,
142
- StringValuePtr(enc),
143
- (size_t)RSTRING_LEN(enc)
144
- );
145
- }
146
-
147
174
  Nokogiri_xml_node_properties(ptr, attr);
148
175
 
149
176
  return attr ;
@@ -158,20 +185,21 @@ static VALUE attribute_nodes(VALUE self)
158
185
  static VALUE attribute_at(VALUE self, VALUE index)
159
186
  {
160
187
  xmlTextReaderPtr reader;
188
+ xmlChar *value;
189
+ VALUE rb_value;
190
+
161
191
  Data_Get_Struct(self, xmlTextReader, reader);
162
192
 
163
- if(index == Qnil) return Qnil;
164
- index = rb_funcall(index, rb_intern("to_i"), 0);
193
+ if(NIL_P(index)) return Qnil;
194
+ index = rb_Integer(index);
165
195
 
166
- xmlChar * value = xmlTextReaderGetAttributeNo(
196
+ value = xmlTextReaderGetAttributeNo(
167
197
  reader,
168
- NUM2INT(index)
198
+ (int)NUM2INT(index)
169
199
  );
170
200
  if(value == NULL) return Qnil;
171
201
 
172
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
173
- VALUE rb_value = NOKOGIRI_STR_NEW2(value,
174
- RTEST(enc) ? StringValuePtr(enc) : NULL);
202
+ rb_value = NOKOGIRI_STR_NEW2(value);
175
203
  xmlFree(value);
176
204
  return rb_value;
177
205
  }
@@ -186,9 +214,11 @@ static VALUE reader_attribute(VALUE self, VALUE name)
186
214
  {
187
215
  xmlTextReaderPtr reader;
188
216
  xmlChar *value ;
217
+ VALUE rb_value;
218
+
189
219
  Data_Get_Struct(self, xmlTextReader, reader);
190
220
 
191
- if(name == Qnil) return Qnil;
221
+ if(NIL_P(name)) return Qnil;
192
222
  name = StringValue(name) ;
193
223
 
194
224
  value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValuePtr(name));
@@ -207,9 +237,7 @@ static VALUE reader_attribute(VALUE self, VALUE name)
207
237
  }
208
238
  if(value == NULL) return Qnil;
209
239
 
210
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
211
- VALUE rb_value = NOKOGIRI_STR_NEW2(value,
212
- RTEST(enc) ? StringValuePtr(enc) : NULL);
240
+ rb_value = NOKOGIRI_STR_NEW2(value);
213
241
  xmlFree(value);
214
242
  return rb_value;
215
243
  }
@@ -223,11 +251,13 @@ static VALUE reader_attribute(VALUE self, VALUE name)
223
251
  static VALUE attribute_count(VALUE self)
224
252
  {
225
253
  xmlTextReaderPtr reader;
254
+ int count;
255
+
226
256
  Data_Get_Struct(self, xmlTextReader, reader);
227
- int count = xmlTextReaderAttributeCount(reader);
257
+ count = xmlTextReaderAttributeCount(reader);
228
258
  if(count == -1) return Qnil;
229
259
 
230
- return INT2NUM(count);
260
+ return INT2NUM((long)count);
231
261
  }
232
262
 
233
263
  /*
@@ -239,11 +269,13 @@ static VALUE attribute_count(VALUE self)
239
269
  static VALUE depth(VALUE self)
240
270
  {
241
271
  xmlTextReaderPtr reader;
272
+ int depth;
273
+
242
274
  Data_Get_Struct(self, xmlTextReader, reader);
243
- int depth = xmlTextReaderDepth(reader);
275
+ depth = xmlTextReaderDepth(reader);
244
276
  if(depth == -1) return Qnil;
245
277
 
246
- return INT2NUM(depth);
278
+ return INT2NUM((long)depth);
247
279
  }
248
280
 
249
281
  /*
@@ -255,11 +287,13 @@ static VALUE depth(VALUE self)
255
287
  static VALUE xml_version(VALUE self)
256
288
  {
257
289
  xmlTextReaderPtr reader;
290
+ const char *version;
291
+
258
292
  Data_Get_Struct(self, xmlTextReader, reader);
259
- const char * version = (const char *)xmlTextReaderConstXmlVersion(reader);
293
+ version = (const char *)xmlTextReaderConstXmlVersion(reader);
260
294
  if(version == NULL) return Qnil;
261
295
 
262
- return NOKOGIRI_STR_NEW2(version, "UTF-8");
296
+ return NOKOGIRI_STR_NEW2(version);
263
297
  }
264
298
 
265
299
  /*
@@ -271,31 +305,31 @@ static VALUE xml_version(VALUE self)
271
305
  static VALUE lang(VALUE self)
272
306
  {
273
307
  xmlTextReaderPtr reader;
308
+ const char *lang;
309
+
274
310
  Data_Get_Struct(self, xmlTextReader, reader);
275
- const char * lang = (const char *)xmlTextReaderConstXmlLang(reader);
311
+ lang = (const char *)xmlTextReaderConstXmlLang(reader);
276
312
  if(lang == NULL) return Qnil;
277
313
 
278
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
279
- return NOKOGIRI_STR_NEW2(lang,
280
- RTEST(enc) ? StringValuePtr(enc) : NULL);
314
+ return NOKOGIRI_STR_NEW2(lang);
281
315
  }
282
316
 
283
317
  /*
284
318
  * call-seq:
285
319
  * value
286
320
  *
287
- * Get the text value of the node if present
321
+ * Get the text value of the node if present. Returns a utf-8 encoded string.
288
322
  */
289
323
  static VALUE value(VALUE self)
290
324
  {
291
325
  xmlTextReaderPtr reader;
326
+ const char *value;
327
+
292
328
  Data_Get_Struct(self, xmlTextReader, reader);
293
- const char * value = (const char *)xmlTextReaderConstValue(reader);
329
+ value = (const char *)xmlTextReaderConstValue(reader);
294
330
  if(value == NULL) return Qnil;
295
331
 
296
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
297
- return NOKOGIRI_STR_NEW2(value,
298
- RTEST(enc) ? StringValuePtr(enc) : NULL);
332
+ return NOKOGIRI_STR_NEW2(value);
299
333
  }
300
334
 
301
335
  /*
@@ -307,13 +341,13 @@ static VALUE value(VALUE self)
307
341
  static VALUE prefix(VALUE self)
308
342
  {
309
343
  xmlTextReaderPtr reader;
344
+ const char *prefix;
345
+
310
346
  Data_Get_Struct(self, xmlTextReader, reader);
311
- const char * prefix = (const char *)xmlTextReaderConstPrefix(reader);
347
+ prefix = (const char *)xmlTextReaderConstPrefix(reader);
312
348
  if(prefix == NULL) return Qnil;
313
349
 
314
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
315
- return NOKOGIRI_STR_NEW2(prefix,
316
- RTEST(enc) ? StringValuePtr(enc) : NULL);
350
+ return NOKOGIRI_STR_NEW2(prefix);
317
351
  }
318
352
 
319
353
  /*
@@ -325,13 +359,13 @@ static VALUE prefix(VALUE self)
325
359
  static VALUE namespace_uri(VALUE self)
326
360
  {
327
361
  xmlTextReaderPtr reader;
362
+ const char *uri;
363
+
328
364
  Data_Get_Struct(self, xmlTextReader, reader);
329
- const char * uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
365
+ uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
330
366
  if(uri == NULL) return Qnil;
331
367
 
332
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
333
- return NOKOGIRI_STR_NEW2(uri,
334
- RTEST(enc) ? StringValuePtr(enc) : NULL);
368
+ return NOKOGIRI_STR_NEW2(uri);
335
369
  }
336
370
 
337
371
  /*
@@ -343,31 +377,49 @@ static VALUE namespace_uri(VALUE self)
343
377
  static VALUE local_name(VALUE self)
344
378
  {
345
379
  xmlTextReaderPtr reader;
380
+ const char *name;
381
+
346
382
  Data_Get_Struct(self, xmlTextReader, reader);
347
- const char * name = (const char *)xmlTextReaderConstLocalName(reader);
383
+ name = (const char *)xmlTextReaderConstLocalName(reader);
348
384
  if(name == NULL) return Qnil;
349
385
 
350
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
351
- return NOKOGIRI_STR_NEW2(name,
352
- RTEST(enc) ? StringValuePtr(enc) : NULL);
386
+ return NOKOGIRI_STR_NEW2(name);
353
387
  }
354
388
 
355
389
  /*
356
390
  * call-seq:
357
391
  * name
358
392
  *
359
- * Get the name of the node
393
+ * Get the name of the node. Returns a utf-8 encoded string.
360
394
  */
361
395
  static VALUE name(VALUE self)
362
396
  {
363
397
  xmlTextReaderPtr reader;
398
+ const char *name;
399
+
364
400
  Data_Get_Struct(self, xmlTextReader, reader);
365
- const char * name = (const char *)xmlTextReaderConstName(reader);
401
+ name = (const char *)xmlTextReaderConstName(reader);
366
402
  if(name == NULL) return Qnil;
367
403
 
368
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
369
- return NOKOGIRI_STR_NEW2(name,
370
- RTEST(enc) ? StringValuePtr(enc) : NULL);
404
+ return NOKOGIRI_STR_NEW2(name);
405
+ }
406
+
407
+ /*
408
+ * call-seq:
409
+ * base_uri
410
+ *
411
+ * Get the xml:base of the node
412
+ */
413
+ static VALUE base_uri(VALUE self)
414
+ {
415
+ xmlTextReaderPtr reader;
416
+ const char * base_uri;
417
+
418
+ Data_Get_Struct(self, xmlTextReader, reader);
419
+ base_uri = (const char *)xmlTextReaderBaseUri(reader);
420
+ if (base_uri == NULL) return Qnil;
421
+
422
+ return NOKOGIRI_STR_NEW2(base_uri);
371
423
  }
372
424
 
373
425
  /*
@@ -380,7 +432,20 @@ static VALUE state(VALUE self)
380
432
  {
381
433
  xmlTextReaderPtr reader;
382
434
  Data_Get_Struct(self, xmlTextReader, reader);
383
- return INT2NUM(xmlTextReaderReadState(reader));
435
+ return INT2NUM((long)xmlTextReaderReadState(reader));
436
+ }
437
+
438
+ /*
439
+ * call-seq:
440
+ * node_type
441
+ *
442
+ * Get the type of readers current node
443
+ */
444
+ static VALUE node_type(VALUE self)
445
+ {
446
+ xmlTextReaderPtr reader;
447
+ Data_Get_Struct(self, xmlTextReader, reader);
448
+ return INT2NUM((long)xmlTextReaderNodeType(reader));
384
449
  }
385
450
 
386
451
  /*
@@ -392,28 +457,80 @@ static VALUE state(VALUE self)
392
457
  static VALUE read_more(VALUE self)
393
458
  {
394
459
  xmlTextReaderPtr reader;
460
+ xmlErrorPtr error;
461
+ VALUE error_list;
462
+ int ret;
463
+
395
464
  Data_Get_Struct(self, xmlTextReader, reader);
396
465
 
397
- VALUE error_list = rb_funcall(self, rb_intern("errors"), 0);
466
+ error_list = rb_funcall(self, rb_intern("errors"), 0);
398
467
 
399
468
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
400
- int ret = xmlTextReaderRead(reader);
469
+ ret = xmlTextReaderRead(reader);
401
470
  xmlSetStructuredErrorFunc(NULL, NULL);
402
471
 
403
472
  if(ret == 1) return self;
404
473
  if(ret == 0) return Qnil;
405
474
 
406
- xmlErrorPtr error = xmlGetLastError();
475
+ error = xmlGetLastError();
407
476
  if(error)
408
- rb_funcall(rb_mKernel, rb_intern("raise"), 1,
409
- Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
410
- );
477
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
411
478
  else
412
479
  rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
413
480
 
414
481
  return Qnil;
415
482
  }
416
483
 
484
+ /*
485
+ * call-seq:
486
+ * inner_xml
487
+ *
488
+ * Read the contents of the current node, including child nodes and markup.
489
+ * Returns a utf-8 encoded string.
490
+ */
491
+ static VALUE inner_xml(VALUE self)
492
+ {
493
+ xmlTextReaderPtr reader;
494
+ xmlChar* value;
495
+ VALUE str;
496
+
497
+ Data_Get_Struct(self, xmlTextReader, reader);
498
+
499
+ value = xmlTextReaderReadInnerXml(reader);
500
+
501
+ str = Qnil;
502
+ if(value) {
503
+ str = NOKOGIRI_STR_NEW2((char*)value);
504
+ xmlFree(value);
505
+ }
506
+
507
+ return str;
508
+ }
509
+
510
+ /*
511
+ * call-seq:
512
+ * outer_xml
513
+ *
514
+ * Read the current node and its contents, including child nodes and markup.
515
+ * Returns a utf-8 encoded string.
516
+ */
517
+ static VALUE outer_xml(VALUE self)
518
+ {
519
+ xmlTextReaderPtr reader;
520
+ xmlChar *value;
521
+ VALUE str = Qnil;
522
+
523
+ Data_Get_Struct(self, xmlTextReader, reader);
524
+
525
+ value = xmlTextReaderReadOuterXml(reader);
526
+
527
+ if(value) {
528
+ str = NOKOGIRI_STR_NEW2((char*)value);
529
+ xmlFree(value);
530
+ }
531
+ return str;
532
+ }
533
+
417
534
  /*
418
535
  * call-seq:
419
536
  * from_memory(string, url = nil, encoding = nil, options = 0)
@@ -423,21 +540,22 @@ static VALUE read_more(VALUE self)
423
540
  static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
424
541
  {
425
542
  VALUE rb_buffer, rb_url, encoding, rb_options;
426
-
543
+ xmlTextReaderPtr reader;
427
544
  const char * c_url = NULL;
428
545
  const char * c_encoding = NULL;
429
- int c_options = 0;
546
+ int c_options = 0;
547
+ VALUE rb_reader, args[3];
430
548
 
431
549
  rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
432
550
 
433
- rb_buffer = StringValue(rb_buffer) ;
551
+ if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil");
434
552
  if (RTEST(rb_url)) c_url = StringValuePtr(rb_url);
435
553
  if (RTEST(encoding)) c_encoding = StringValuePtr(encoding);
436
- if (RTEST(rb_options)) c_options = NUM2INT(rb_options);
554
+ if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
437
555
 
438
- xmlTextReaderPtr reader = xmlReaderForMemory(
556
+ reader = xmlReaderForMemory(
439
557
  StringValuePtr(rb_buffer),
440
- RSTRING_LEN(rb_buffer),
558
+ (int)RSTRING_LEN(rb_buffer),
441
559
  c_url,
442
560
  c_encoding,
443
561
  c_options
@@ -448,12 +566,78 @@ static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
448
566
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
449
567
  }
450
568
 
451
- VALUE rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
452
- rb_funcall(rb_reader, rb_intern("initialize"), 2, rb_url, encoding);
569
+ rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
570
+ args[0] = rb_buffer;
571
+ args[1] = rb_url;
572
+ args[2] = encoding;
573
+ rb_obj_call_init(rb_reader, 3, args);
453
574
 
454
575
  return rb_reader;
455
576
  }
456
577
 
578
+ /*
579
+ * call-seq:
580
+ * from_io(io, url = nil, encoding = nil, options = 0)
581
+ *
582
+ * Create a new reader that parses +io+
583
+ */
584
+ static VALUE from_io(int argc, VALUE *argv, VALUE klass)
585
+ {
586
+ VALUE rb_io, rb_url, encoding, rb_options;
587
+ xmlTextReaderPtr reader;
588
+ const char * c_url = NULL;
589
+ const char * c_encoding = NULL;
590
+ int c_options = 0;
591
+ VALUE rb_reader, args[3];
592
+
593
+ rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
594
+
595
+ if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil");
596
+ if (RTEST(rb_url)) c_url = StringValuePtr(rb_url);
597
+ if (RTEST(encoding)) c_encoding = StringValuePtr(encoding);
598
+ if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
599
+
600
+ reader = xmlReaderForIO(
601
+ (xmlInputReadCallback)io_read_callback,
602
+ (xmlInputCloseCallback)io_close_callback,
603
+ (void *)rb_io,
604
+ c_url,
605
+ c_encoding,
606
+ c_options
607
+ );
608
+
609
+ if(reader == NULL) {
610
+ xmlFreeTextReader(reader);
611
+ rb_raise(rb_eRuntimeError, "couldn't create a parser");
612
+ }
613
+
614
+ rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
615
+ args[0] = rb_io;
616
+ args[1] = rb_url;
617
+ args[2] = encoding;
618
+ rb_obj_call_init(rb_reader, 3, args);
619
+
620
+ return rb_reader;
621
+ }
622
+
623
+ /*
624
+ * call-seq:
625
+ * reader.empty_element? # => true or false
626
+ *
627
+ * Returns true if the current node is empty, otherwise false.
628
+ */
629
+ static VALUE empty_element_p(VALUE self)
630
+ {
631
+ xmlTextReaderPtr reader;
632
+
633
+ Data_Get_Struct(self, xmlTextReader, reader);
634
+
635
+ if(xmlTextReaderIsEmptyElement(reader))
636
+ return Qtrue;
637
+
638
+ return Qfalse;
639
+ }
640
+
457
641
  VALUE cNokogiriXmlReader;
458
642
 
459
643
  void init_xml_reader()
@@ -471,8 +655,13 @@ void init_xml_reader()
471
655
  cNokogiriXmlReader = klass;
472
656
 
473
657
  rb_define_singleton_method(klass, "from_memory", from_memory, -1);
658
+ rb_define_singleton_method(klass, "from_io", from_io, -1);
659
+
474
660
  rb_define_method(klass, "read", read_more, 0);
661
+ rb_define_method(klass, "inner_xml", inner_xml, 0);
662
+ rb_define_method(klass, "outer_xml", outer_xml, 0);
475
663
  rb_define_method(klass, "state", state, 0);
664
+ rb_define_method(klass, "node_type", node_type, 0);
476
665
  rb_define_method(klass, "name", name, 0);
477
666
  rb_define_method(klass, "local_name", local_name, 0);
478
667
  rb_define_method(klass, "namespace_uri", namespace_uri, 0);
@@ -485,8 +674,11 @@ void init_xml_reader()
485
674
  rb_define_method(klass, "attribute", reader_attribute, 1);
486
675
  rb_define_method(klass, "namespaces", namespaces, 0);
487
676
  rb_define_method(klass, "attribute_at", attribute_at, 1);
488
- rb_define_method(klass, "attribute_nodes", attribute_nodes, 0);
677
+ rb_define_method(klass, "empty_element?", empty_element_p, 0);
489
678
  rb_define_method(klass, "attributes?", attributes_eh, 0);
490
679
  rb_define_method(klass, "value?", value_eh, 0);
491
680
  rb_define_method(klass, "default?", default_eh, 0);
681
+ rb_define_method(klass, "base_uri", base_uri, 0);
682
+
683
+ rb_define_private_method(klass, "attr_nodes", attribute_nodes, 0);
492
684
  }