nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -25,6 +25,43 @@ static int has_attributes(xmlTextReaderPtr reader)
25
25
  return(0);
26
26
  }
27
27
 
28
+ static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
29
+ {
30
+ xmlNsPtr ns;
31
+ static char buffer[XMLNS_BUFFER_LEN] ;
32
+ char *key ;
33
+ size_t keylen ;
34
+
35
+ if (node->type != XML_ELEMENT_NODE) return ;
36
+
37
+ ns = node->nsDef;
38
+ while (ns != NULL) {
39
+
40
+ keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ;
41
+ if (keylen > XMLNS_BUFFER_LEN) {
42
+ key = (char*)malloc(keylen) ;
43
+ } else {
44
+ key = buffer ;
45
+ }
46
+
47
+ if (ns->prefix) {
48
+ sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix);
49
+ } else {
50
+ sprintf(key, "%s", XMLNS_PREFIX);
51
+ }
52
+
53
+ rb_hash_aset(attr_hash,
54
+ NOKOGIRI_STR_NEW2(key),
55
+ (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
56
+ );
57
+ if (key != buffer) {
58
+ free(key);
59
+ }
60
+ ns = ns->next ;
61
+ }
62
+ }
63
+
64
+
28
65
  /*
29
66
  * call-seq:
30
67
  * default?
@@ -34,8 +71,10 @@ static int has_attributes(xmlTextReaderPtr reader)
34
71
  static VALUE default_eh(VALUE self)
35
72
  {
36
73
  xmlTextReaderPtr reader;
74
+ int eh;
75
+
37
76
  Data_Get_Struct(self, xmlTextReader, reader);
38
- int eh = xmlTextReaderIsDefault(reader);
77
+ eh = xmlTextReaderIsDefault(reader);
39
78
  if(eh == 0) return Qfalse;
40
79
  if(eh == 1) return Qtrue;
41
80
 
@@ -51,8 +90,10 @@ static VALUE default_eh(VALUE self)
51
90
  static VALUE value_eh(VALUE self)
52
91
  {
53
92
  xmlTextReaderPtr reader;
93
+ int eh;
94
+
54
95
  Data_Get_Struct(self, xmlTextReader, reader);
55
- int eh = xmlTextReaderHasValue(reader);
96
+ eh = xmlTextReaderHasValue(reader);
56
97
  if(eh == 0) return Qfalse;
57
98
  if(eh == 1) return Qtrue;
58
99
 
@@ -68,8 +109,10 @@ static VALUE value_eh(VALUE self)
68
109
  static VALUE attributes_eh(VALUE self)
69
110
  {
70
111
  xmlTextReaderPtr reader;
112
+ int eh;
113
+
71
114
  Data_Get_Struct(self, xmlTextReader, reader);
72
- int eh = has_attributes(reader);
115
+ eh = has_attributes(reader);
73
116
  if(eh == 0) return Qfalse;
74
117
  if(eh == 1) return Qtrue;
75
118
 
@@ -78,13 +121,14 @@ static VALUE attributes_eh(VALUE self)
78
121
 
79
122
  /*
80
123
  * call-seq:
81
- * attributes
124
+ * namespaces
82
125
  *
83
- * Get a Hash of attributes for this node
126
+ * Get a hash of namespaces for this Node
84
127
  */
85
- static VALUE attributes(VALUE self)
128
+ static VALUE namespaces(VALUE self)
86
129
  {
87
130
  xmlTextReaderPtr reader;
131
+ xmlNodePtr ptr;
88
132
  VALUE attr ;
89
133
 
90
134
  Data_Get_Struct(self, xmlTextReader, reader);
@@ -94,10 +138,36 @@ static VALUE attributes(VALUE self)
94
138
  if (! has_attributes(reader))
95
139
  return attr ;
96
140
 
97
- xmlNodePtr ptr = xmlTextReaderExpand(reader);
141
+ ptr = xmlTextReaderExpand(reader);
98
142
  if(ptr == NULL) return Qnil;
99
143
 
100
144
  Nokogiri_xml_node_namespaces(ptr, attr);
145
+
146
+ return attr ;
147
+ }
148
+
149
+ /*
150
+ * call-seq:
151
+ * attribute_nodes
152
+ *
153
+ * Get a list of attributes for this Node
154
+ */
155
+ static VALUE attribute_nodes(VALUE self)
156
+ {
157
+ xmlTextReaderPtr reader;
158
+ xmlNodePtr ptr;
159
+ VALUE attr ;
160
+
161
+ Data_Get_Struct(self, xmlTextReader, reader);
162
+
163
+ attr = rb_ary_new() ;
164
+
165
+ if (! has_attributes(reader))
166
+ return attr ;
167
+
168
+ ptr = xmlTextReaderExpand(reader);
169
+ if(ptr == NULL) return Qnil;
170
+
101
171
  Nokogiri_xml_node_properties(ptr, attr);
102
172
 
103
173
  return attr ;
@@ -112,18 +182,21 @@ static VALUE attributes(VALUE self)
112
182
  static VALUE attribute_at(VALUE self, VALUE index)
113
183
  {
114
184
  xmlTextReaderPtr reader;
185
+ xmlChar *value;
186
+ VALUE rb_value;
187
+
115
188
  Data_Get_Struct(self, xmlTextReader, reader);
116
189
 
117
- if(index == Qnil) return Qnil;
118
- index = rb_funcall(index, rb_intern("to_i"), 0);
190
+ if(NIL_P(index)) return Qnil;
191
+ index = rb_Integer(index);
119
192
 
120
- xmlChar * value = xmlTextReaderGetAttributeNo(
193
+ value = xmlTextReaderGetAttributeNo(
121
194
  reader,
122
- NUM2INT(index)
195
+ (int)NUM2INT(index)
123
196
  );
124
197
  if(value == NULL) return Qnil;
125
198
 
126
- VALUE rb_value = rb_str_new2((const char *)value);
199
+ rb_value = NOKOGIRI_STR_NEW2(value);
127
200
  xmlFree(value);
128
201
  return rb_value;
129
202
  }
@@ -138,27 +211,17 @@ static VALUE reader_attribute(VALUE self, VALUE name)
138
211
  {
139
212
  xmlTextReaderPtr reader;
140
213
  xmlChar *value ;
214
+ VALUE rb_value;
215
+
141
216
  Data_Get_Struct(self, xmlTextReader, reader);
142
217
 
143
- if(name == Qnil) return Qnil;
218
+ if(NIL_P(name)) return Qnil;
144
219
  name = StringValue(name) ;
145
220
 
146
- value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValuePtr(name));
147
- if(value == NULL) {
148
- /* this section is an attempt to workaround older versions of libxml that
149
- don't handle namespaces properly in all attribute-and-friends functions */
150
- xmlChar *prefix = NULL ;
151
- xmlChar *localname = xmlSplitQName2((xmlChar*)StringValuePtr(name), &prefix);
152
- if (localname != NULL) {
153
- value = xmlTextReaderLookupNamespace(reader, localname);
154
- free(localname) ;
155
- } else {
156
- value = xmlTextReaderLookupNamespace(reader, prefix);
157
- }
158
- }
221
+ value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValueCStr(name));
159
222
  if(value == NULL) return Qnil;
160
223
 
161
- VALUE rb_value = rb_str_new2((const char *)value);
224
+ rb_value = NOKOGIRI_STR_NEW2(value);
162
225
  xmlFree(value);
163
226
  return rb_value;
164
227
  }
@@ -172,11 +235,13 @@ static VALUE reader_attribute(VALUE self, VALUE name)
172
235
  static VALUE attribute_count(VALUE self)
173
236
  {
174
237
  xmlTextReaderPtr reader;
238
+ int count;
239
+
175
240
  Data_Get_Struct(self, xmlTextReader, reader);
176
- int count = xmlTextReaderAttributeCount(reader);
241
+ count = xmlTextReaderAttributeCount(reader);
177
242
  if(count == -1) return Qnil;
178
243
 
179
- return INT2NUM(count);
244
+ return INT2NUM((long)count);
180
245
  }
181
246
 
182
247
  /*
@@ -188,27 +253,13 @@ static VALUE attribute_count(VALUE self)
188
253
  static VALUE depth(VALUE self)
189
254
  {
190
255
  xmlTextReaderPtr reader;
191
- Data_Get_Struct(self, xmlTextReader, reader);
192
- int depth = xmlTextReaderDepth(reader);
193
- if(depth == -1) return Qnil;
256
+ int depth;
194
257
 
195
- return INT2NUM(depth);
196
- }
197
-
198
- /*
199
- * call-seq:
200
- * encoding
201
- *
202
- * Get the encoding for the document
203
- */
204
- static VALUE encoding(VALUE self)
205
- {
206
- xmlTextReaderPtr reader;
207
258
  Data_Get_Struct(self, xmlTextReader, reader);
208
- const char * encoding = (const char *)xmlTextReaderConstEncoding(reader);
209
- if(encoding == NULL) return Qnil;
259
+ depth = xmlTextReaderDepth(reader);
260
+ if(depth == -1) return Qnil;
210
261
 
211
- return rb_str_new2(encoding);
262
+ return INT2NUM((long)depth);
212
263
  }
213
264
 
214
265
  /*
@@ -220,11 +271,13 @@ static VALUE encoding(VALUE self)
220
271
  static VALUE xml_version(VALUE self)
221
272
  {
222
273
  xmlTextReaderPtr reader;
274
+ const char *version;
275
+
223
276
  Data_Get_Struct(self, xmlTextReader, reader);
224
- const char * version = (const char *)xmlTextReaderConstXmlVersion(reader);
277
+ version = (const char *)xmlTextReaderConstXmlVersion(reader);
225
278
  if(version == NULL) return Qnil;
226
279
 
227
- return rb_str_new2(version);
280
+ return NOKOGIRI_STR_NEW2(version);
228
281
  }
229
282
 
230
283
  /*
@@ -236,27 +289,31 @@ static VALUE xml_version(VALUE self)
236
289
  static VALUE lang(VALUE self)
237
290
  {
238
291
  xmlTextReaderPtr reader;
292
+ const char *lang;
293
+
239
294
  Data_Get_Struct(self, xmlTextReader, reader);
240
- const char * lang = (const char *)xmlTextReaderConstXmlLang(reader);
295
+ lang = (const char *)xmlTextReaderConstXmlLang(reader);
241
296
  if(lang == NULL) return Qnil;
242
297
 
243
- return rb_str_new2(lang);
298
+ return NOKOGIRI_STR_NEW2(lang);
244
299
  }
245
300
 
246
301
  /*
247
302
  * call-seq:
248
303
  * value
249
304
  *
250
- * Get the text value of the node if present
305
+ * Get the text value of the node if present. Returns a utf-8 encoded string.
251
306
  */
252
307
  static VALUE value(VALUE self)
253
308
  {
254
309
  xmlTextReaderPtr reader;
310
+ const char *value;
311
+
255
312
  Data_Get_Struct(self, xmlTextReader, reader);
256
- const char * value = (const char *)xmlTextReaderConstValue(reader);
313
+ value = (const char *)xmlTextReaderConstValue(reader);
257
314
  if(value == NULL) return Qnil;
258
315
 
259
- return rb_str_new2(value);
316
+ return NOKOGIRI_STR_NEW2(value);
260
317
  }
261
318
 
262
319
  /*
@@ -268,11 +325,13 @@ static VALUE value(VALUE self)
268
325
  static VALUE prefix(VALUE self)
269
326
  {
270
327
  xmlTextReaderPtr reader;
328
+ const char *prefix;
329
+
271
330
  Data_Get_Struct(self, xmlTextReader, reader);
272
- const char * prefix = (const char *)xmlTextReaderConstPrefix(reader);
331
+ prefix = (const char *)xmlTextReaderConstPrefix(reader);
273
332
  if(prefix == NULL) return Qnil;
274
333
 
275
- return rb_str_new2(prefix);
334
+ return NOKOGIRI_STR_NEW2(prefix);
276
335
  }
277
336
 
278
337
  /*
@@ -284,11 +343,13 @@ static VALUE prefix(VALUE self)
284
343
  static VALUE namespace_uri(VALUE self)
285
344
  {
286
345
  xmlTextReaderPtr reader;
346
+ const char *uri;
347
+
287
348
  Data_Get_Struct(self, xmlTextReader, reader);
288
- const char * uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
349
+ uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
289
350
  if(uri == NULL) return Qnil;
290
351
 
291
- return rb_str_new2(uri);
352
+ return NOKOGIRI_STR_NEW2(uri);
292
353
  }
293
354
 
294
355
  /*
@@ -300,27 +361,49 @@ static VALUE namespace_uri(VALUE self)
300
361
  static VALUE local_name(VALUE self)
301
362
  {
302
363
  xmlTextReaderPtr reader;
364
+ const char *name;
365
+
303
366
  Data_Get_Struct(self, xmlTextReader, reader);
304
- const char * name = (const char *)xmlTextReaderConstLocalName(reader);
367
+ name = (const char *)xmlTextReaderConstLocalName(reader);
305
368
  if(name == NULL) return Qnil;
306
369
 
307
- return rb_str_new2(name);
370
+ return NOKOGIRI_STR_NEW2(name);
308
371
  }
309
372
 
310
373
  /*
311
374
  * call-seq:
312
375
  * name
313
376
  *
314
- * Get the name of the node
377
+ * Get the name of the node. Returns a utf-8 encoded string.
315
378
  */
316
379
  static VALUE name(VALUE self)
317
380
  {
318
381
  xmlTextReaderPtr reader;
382
+ const char *name;
383
+
319
384
  Data_Get_Struct(self, xmlTextReader, reader);
320
- const char * name = (const char *)xmlTextReaderConstName(reader);
385
+ name = (const char *)xmlTextReaderConstName(reader);
321
386
  if(name == NULL) return Qnil;
322
387
 
323
- return rb_str_new2(name);
388
+ return NOKOGIRI_STR_NEW2(name);
389
+ }
390
+
391
+ /*
392
+ * call-seq:
393
+ * base_uri
394
+ *
395
+ * Get the xml:base of the node
396
+ */
397
+ static VALUE base_uri(VALUE self)
398
+ {
399
+ xmlTextReaderPtr reader;
400
+ const char * base_uri;
401
+
402
+ Data_Get_Struct(self, xmlTextReader, reader);
403
+ base_uri = (const char *)xmlTextReaderBaseUri(reader);
404
+ if (base_uri == NULL) return Qnil;
405
+
406
+ return NOKOGIRI_STR_NEW2(base_uri);
324
407
  }
325
408
 
326
409
  /*
@@ -333,7 +416,20 @@ static VALUE state(VALUE self)
333
416
  {
334
417
  xmlTextReaderPtr reader;
335
418
  Data_Get_Struct(self, xmlTextReader, reader);
336
- return INT2NUM(xmlTextReaderReadState(reader));
419
+ return INT2NUM((long)xmlTextReaderReadState(reader));
420
+ }
421
+
422
+ /*
423
+ * call-seq:
424
+ * node_type
425
+ *
426
+ * Get the type of readers current node
427
+ */
428
+ static VALUE node_type(VALUE self)
429
+ {
430
+ xmlTextReaderPtr reader;
431
+ Data_Get_Struct(self, xmlTextReader, reader);
432
+ return INT2NUM((long)xmlTextReaderNodeType(reader));
337
433
  }
338
434
 
339
435
  /*
@@ -345,13 +441,78 @@ static VALUE state(VALUE self)
345
441
  static VALUE read_more(VALUE self)
346
442
  {
347
443
  xmlTextReaderPtr reader;
444
+ xmlErrorPtr error;
445
+ VALUE error_list;
446
+ int ret;
447
+
348
448
  Data_Get_Struct(self, xmlTextReader, reader);
349
449
 
350
- int ret = xmlTextReaderRead(reader);
450
+ error_list = rb_funcall(self, rb_intern("errors"), 0);
451
+
452
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
453
+ ret = xmlTextReaderRead(reader);
454
+ xmlSetStructuredErrorFunc(NULL, NULL);
455
+
351
456
  if(ret == 1) return self;
352
457
  if(ret == 0) return Qnil;
353
458
 
354
- rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
459
+ error = xmlGetLastError();
460
+ if(error)
461
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
462
+ else
463
+ rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
464
+
465
+ return Qnil;
466
+ }
467
+
468
+ /*
469
+ * call-seq:
470
+ * inner_xml
471
+ *
472
+ * Read the contents of the current node, including child nodes and markup.
473
+ * Returns a utf-8 encoded string.
474
+ */
475
+ static VALUE inner_xml(VALUE self)
476
+ {
477
+ xmlTextReaderPtr reader;
478
+ xmlChar* value;
479
+ VALUE str;
480
+
481
+ Data_Get_Struct(self, xmlTextReader, reader);
482
+
483
+ value = xmlTextReaderReadInnerXml(reader);
484
+
485
+ str = Qnil;
486
+ if(value) {
487
+ str = NOKOGIRI_STR_NEW2((char*)value);
488
+ xmlFree(value);
489
+ }
490
+
491
+ return str;
492
+ }
493
+
494
+ /*
495
+ * call-seq:
496
+ * outer_xml
497
+ *
498
+ * Read the current node and its contents, including child nodes and markup.
499
+ * Returns a utf-8 encoded string.
500
+ */
501
+ static VALUE outer_xml(VALUE self)
502
+ {
503
+ xmlTextReaderPtr reader;
504
+ xmlChar *value;
505
+ VALUE str = Qnil;
506
+
507
+ Data_Get_Struct(self, xmlTextReader, reader);
508
+
509
+ value = xmlTextReaderReadOuterXml(reader);
510
+
511
+ if(value) {
512
+ str = NOKOGIRI_STR_NEW2((char*)value);
513
+ xmlFree(value);
514
+ }
515
+ return str;
355
516
  }
356
517
 
357
518
  /*
@@ -363,21 +524,22 @@ static VALUE read_more(VALUE self)
363
524
  static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
364
525
  {
365
526
  VALUE rb_buffer, rb_url, encoding, rb_options;
366
-
527
+ xmlTextReaderPtr reader;
367
528
  const char * c_url = NULL;
368
529
  const char * c_encoding = NULL;
369
- int c_options = 0;
530
+ int c_options = 0;
531
+ VALUE rb_reader, args[3];
370
532
 
371
533
  rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
372
534
 
373
- rb_buffer = StringValue(rb_buffer) ;
374
- if (RTEST(rb_url)) c_url = StringValuePtr(rb_url);
375
- if (RTEST(encoding)) c_encoding = StringValuePtr(rb_url);
376
- if (RTEST(rb_options)) c_options = NUM2INT(rb_options);
535
+ if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil");
536
+ if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
537
+ if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
538
+ if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
377
539
 
378
- xmlTextReaderPtr reader = xmlReaderForMemory(
540
+ reader = xmlReaderForMemory(
379
541
  StringValuePtr(rb_buffer),
380
- NUM2INT(rb_funcall(rb_buffer, rb_intern("length"), 0)),
542
+ (int)RSTRING_LEN(rb_buffer),
381
543
  c_url,
382
544
  c_encoding,
383
545
  c_options
@@ -388,7 +550,76 @@ static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
388
550
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
389
551
  }
390
552
 
391
- return Data_Wrap_Struct(klass, NULL, dealloc, reader);
553
+ rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
554
+ args[0] = rb_buffer;
555
+ args[1] = rb_url;
556
+ args[2] = encoding;
557
+ rb_obj_call_init(rb_reader, 3, args);
558
+
559
+ return rb_reader;
560
+ }
561
+
562
+ /*
563
+ * call-seq:
564
+ * from_io(io, url = nil, encoding = nil, options = 0)
565
+ *
566
+ * Create a new reader that parses +io+
567
+ */
568
+ static VALUE from_io(int argc, VALUE *argv, VALUE klass)
569
+ {
570
+ VALUE rb_io, rb_url, encoding, rb_options;
571
+ xmlTextReaderPtr reader;
572
+ const char * c_url = NULL;
573
+ const char * c_encoding = NULL;
574
+ int c_options = 0;
575
+ VALUE rb_reader, args[3];
576
+
577
+ rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
578
+
579
+ if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil");
580
+ if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
581
+ if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
582
+ if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
583
+
584
+ reader = xmlReaderForIO(
585
+ (xmlInputReadCallback)io_read_callback,
586
+ (xmlInputCloseCallback)io_close_callback,
587
+ (void *)rb_io,
588
+ c_url,
589
+ c_encoding,
590
+ c_options
591
+ );
592
+
593
+ if(reader == NULL) {
594
+ xmlFreeTextReader(reader);
595
+ rb_raise(rb_eRuntimeError, "couldn't create a parser");
596
+ }
597
+
598
+ rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
599
+ args[0] = rb_io;
600
+ args[1] = rb_url;
601
+ args[2] = encoding;
602
+ rb_obj_call_init(rb_reader, 3, args);
603
+
604
+ return rb_reader;
605
+ }
606
+
607
+ /*
608
+ * call-seq:
609
+ * reader.empty_element? # => true or false
610
+ *
611
+ * Returns true if the current node is empty, otherwise false.
612
+ */
613
+ static VALUE empty_element_p(VALUE self)
614
+ {
615
+ xmlTextReaderPtr reader;
616
+
617
+ Data_Get_Struct(self, xmlTextReader, reader);
618
+
619
+ if(xmlTextReaderIsEmptyElement(reader))
620
+ return Qtrue;
621
+
622
+ return Qfalse;
392
623
  }
393
624
 
394
625
  VALUE cNokogiriXmlReader;
@@ -408,8 +639,13 @@ void init_xml_reader()
408
639
  cNokogiriXmlReader = klass;
409
640
 
410
641
  rb_define_singleton_method(klass, "from_memory", from_memory, -1);
642
+ rb_define_singleton_method(klass, "from_io", from_io, -1);
643
+
411
644
  rb_define_method(klass, "read", read_more, 0);
645
+ rb_define_method(klass, "inner_xml", inner_xml, 0);
646
+ rb_define_method(klass, "outer_xml", outer_xml, 0);
412
647
  rb_define_method(klass, "state", state, 0);
648
+ rb_define_method(klass, "node_type", node_type, 0);
413
649
  rb_define_method(klass, "name", name, 0);
414
650
  rb_define_method(klass, "local_name", local_name, 0);
415
651
  rb_define_method(klass, "namespace_uri", namespace_uri, 0);
@@ -417,13 +653,16 @@ void init_xml_reader()
417
653
  rb_define_method(klass, "value", value, 0);
418
654
  rb_define_method(klass, "lang", lang, 0);
419
655
  rb_define_method(klass, "xml_version", xml_version, 0);
420
- rb_define_method(klass, "encoding", encoding, 0);
421
656
  rb_define_method(klass, "depth", depth, 0);
422
657
  rb_define_method(klass, "attribute_count", attribute_count, 0);
423
658
  rb_define_method(klass, "attribute", reader_attribute, 1);
659
+ rb_define_method(klass, "namespaces", namespaces, 0);
424
660
  rb_define_method(klass, "attribute_at", attribute_at, 1);
425
- rb_define_method(klass, "attributes", attributes, 0);
661
+ rb_define_method(klass, "empty_element?", empty_element_p, 0);
426
662
  rb_define_method(klass, "attributes?", attributes_eh, 0);
427
663
  rb_define_method(klass, "value?", value_eh, 0);
428
664
  rb_define_method(klass, "default?", default_eh, 0);
665
+ rb_define_method(klass, "base_uri", base_uri, 0);
666
+
667
+ rb_define_private_method(klass, "attr_nodes", attribute_nodes, 0);
429
668
  }
@@ -1,7 +1,7 @@
1
1
  #ifndef NOKOGIRI_XML_READER
2
2
  #define NOKOGIRI_XML_READER
3
3
 
4
- #include <native.h>
4
+ #include <nokogiri.h>
5
5
 
6
6
  void init_xml_reader();
7
7