libxml-ruby 2.8.0 → 3.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (205) hide show
  1. checksums.yaml +5 -5
  2. data/HISTORY +859 -775
  3. data/LICENSE +20 -20
  4. data/MANIFEST +166 -166
  5. data/README.rdoc +217 -184
  6. data/Rakefile +98 -78
  7. data/ext/libxml/extconf.rb +61 -116
  8. data/ext/libxml/libxml.c +80 -76
  9. data/ext/libxml/ruby_libxml.h +67 -75
  10. data/ext/libxml/ruby_xml.c +937 -893
  11. data/ext/libxml/ruby_xml.h +10 -10
  12. data/ext/libxml/ruby_xml_attr.c +333 -333
  13. data/ext/libxml/ruby_xml_attr.h +12 -12
  14. data/ext/libxml/ruby_xml_attr_decl.c +153 -153
  15. data/ext/libxml/ruby_xml_attr_decl.h +11 -11
  16. data/ext/libxml/ruby_xml_attributes.c +275 -275
  17. data/ext/libxml/ruby_xml_attributes.h +15 -15
  18. data/ext/libxml/ruby_xml_cbg.c +85 -85
  19. data/ext/libxml/ruby_xml_document.c +1123 -1147
  20. data/ext/libxml/ruby_xml_document.h +11 -11
  21. data/ext/libxml/ruby_xml_dtd.c +248 -268
  22. data/ext/libxml/ruby_xml_dtd.h +9 -9
  23. data/ext/libxml/ruby_xml_encoding.c +250 -260
  24. data/ext/libxml/ruby_xml_encoding.h +16 -19
  25. data/ext/libxml/ruby_xml_error.c +996 -996
  26. data/ext/libxml/ruby_xml_error.h +12 -12
  27. data/ext/libxml/ruby_xml_html_parser.c +89 -92
  28. data/ext/libxml/ruby_xml_html_parser.h +10 -10
  29. data/ext/libxml/ruby_xml_html_parser_context.c +337 -338
  30. data/ext/libxml/ruby_xml_html_parser_context.h +10 -10
  31. data/ext/libxml/ruby_xml_html_parser_options.c +46 -46
  32. data/ext/libxml/ruby_xml_html_parser_options.h +10 -10
  33. data/ext/libxml/ruby_xml_input_cbg.c +191 -191
  34. data/ext/libxml/ruby_xml_input_cbg.h +20 -20
  35. data/ext/libxml/ruby_xml_io.c +47 -50
  36. data/ext/libxml/ruby_xml_io.h +10 -10
  37. data/ext/libxml/ruby_xml_namespace.c +154 -153
  38. data/ext/libxml/ruby_xml_namespace.h +10 -10
  39. data/ext/libxml/ruby_xml_namespaces.c +293 -293
  40. data/ext/libxml/ruby_xml_namespaces.h +9 -9
  41. data/ext/libxml/ruby_xml_node.c +1406 -1452
  42. data/ext/libxml/ruby_xml_node.h +13 -11
  43. data/ext/libxml/ruby_xml_parser.c +91 -94
  44. data/ext/libxml/ruby_xml_parser.h +12 -12
  45. data/ext/libxml/ruby_xml_parser_context.c +999 -1001
  46. data/ext/libxml/ruby_xml_parser_context.h +10 -10
  47. data/ext/libxml/ruby_xml_parser_options.c +66 -66
  48. data/ext/libxml/ruby_xml_parser_options.h +12 -12
  49. data/ext/libxml/ruby_xml_reader.c +1239 -1228
  50. data/ext/libxml/ruby_xml_reader.h +17 -17
  51. data/ext/libxml/ruby_xml_relaxng.c +110 -111
  52. data/ext/libxml/ruby_xml_relaxng.h +10 -10
  53. data/ext/libxml/ruby_xml_sax2_handler.c +326 -328
  54. data/ext/libxml/ruby_xml_sax2_handler.h +10 -10
  55. data/ext/libxml/ruby_xml_sax_parser.c +116 -120
  56. data/ext/libxml/ruby_xml_sax_parser.h +10 -10
  57. data/ext/libxml/ruby_xml_schema.c +350 -301
  58. data/ext/libxml/ruby_xml_schema.h +806 -809
  59. data/ext/libxml/ruby_xml_schema_attribute.c +61 -109
  60. data/ext/libxml/ruby_xml_schema_attribute.h +15 -15
  61. data/ext/libxml/ruby_xml_schema_element.c +69 -94
  62. data/ext/libxml/ruby_xml_schema_element.h +14 -14
  63. data/ext/libxml/ruby_xml_schema_facet.c +46 -52
  64. data/ext/libxml/ruby_xml_schema_facet.h +13 -13
  65. data/ext/libxml/ruby_xml_schema_type.c +214 -259
  66. data/ext/libxml/ruby_xml_schema_type.h +9 -9
  67. data/ext/libxml/ruby_xml_version.h +9 -9
  68. data/ext/libxml/ruby_xml_writer.c +1133 -1137
  69. data/ext/libxml/ruby_xml_writer.h +10 -10
  70. data/ext/libxml/ruby_xml_xinclude.c +16 -16
  71. data/ext/libxml/ruby_xml_xinclude.h +11 -11
  72. data/ext/libxml/ruby_xml_xpath.c +194 -188
  73. data/ext/libxml/ruby_xml_xpath.h +13 -13
  74. data/ext/libxml/ruby_xml_xpath_context.c +360 -361
  75. data/ext/libxml/ruby_xml_xpath_context.h +9 -9
  76. data/ext/libxml/ruby_xml_xpath_expression.c +81 -81
  77. data/ext/libxml/ruby_xml_xpath_expression.h +10 -10
  78. data/ext/libxml/ruby_xml_xpath_object.c +338 -335
  79. data/ext/libxml/ruby_xml_xpath_object.h +17 -17
  80. data/ext/libxml/ruby_xml_xpointer.c +99 -99
  81. data/ext/libxml/ruby_xml_xpointer.h +11 -11
  82. data/ext/vc/libxml_ruby.sln +17 -15
  83. data/lib/libxml/node.rb +2 -78
  84. data/lib/libxml/parser.rb +0 -266
  85. data/lib/libxml/sax_parser.rb +0 -17
  86. data/lib/libxml/schema/attribute.rb +19 -19
  87. data/lib/libxml/schema/element.rb +19 -27
  88. data/lib/libxml/schema/type.rb +21 -29
  89. data/lib/libxml/schema.rb +47 -66
  90. data/lib/libxml-ruby.rb +30 -0
  91. data/lib/libxml.rb +3 -33
  92. data/libxml-ruby.gemspec +48 -44
  93. data/script/benchmark/depixelate +634 -634
  94. data/script/benchmark/hamlet.xml +9054 -9054
  95. data/script/benchmark/parsecount +170 -170
  96. data/script/benchmark/throughput +41 -41
  97. data/script/test +6 -6
  98. data/setup.rb +0 -1
  99. data/test/c14n/given/example-1.xml +14 -14
  100. data/test/c14n/given/example-2.xml +11 -11
  101. data/test/c14n/given/example-3.xml +18 -18
  102. data/test/c14n/given/example-4.xml +9 -9
  103. data/test/c14n/given/example-5.xml +12 -12
  104. data/test/c14n/given/example-6.xml +2 -2
  105. data/test/c14n/given/example-7.xml +11 -11
  106. data/test/c14n/given/example-8.xml +11 -11
  107. data/test/c14n/given/example-8.xpath +9 -9
  108. data/test/c14n/result/1-1-without-comments/example-1 +3 -3
  109. data/test/c14n/result/1-1-without-comments/example-2 +10 -10
  110. data/test/c14n/result/1-1-without-comments/example-3 +13 -13
  111. data/test/c14n/result/1-1-without-comments/example-4 +8 -8
  112. data/test/c14n/result/1-1-without-comments/example-5 +2 -2
  113. data/test/c14n/result/with-comments/example-1 +5 -5
  114. data/test/c14n/result/with-comments/example-2 +10 -10
  115. data/test/c14n/result/with-comments/example-3 +13 -13
  116. data/test/c14n/result/with-comments/example-4 +8 -8
  117. data/test/c14n/result/with-comments/example-5 +3 -3
  118. data/test/c14n/result/without-comments/example-1 +3 -3
  119. data/test/c14n/result/without-comments/example-2 +10 -10
  120. data/test/c14n/result/without-comments/example-3 +13 -13
  121. data/test/c14n/result/without-comments/example-4 +8 -8
  122. data/test/c14n/result/without-comments/example-5 +2 -2
  123. data/test/model/atom.xml +12 -12
  124. data/test/model/bands.iso-8859-1.xml +4 -4
  125. data/test/model/bands.utf-8.xml +4 -4
  126. data/test/model/bands.xml +4 -4
  127. data/test/model/books.xml +153 -153
  128. data/test/model/cwm_1_0.xml +11336 -0
  129. data/test/model/merge_bug_data.xml +58 -58
  130. data/test/model/ruby-lang.html +238 -238
  131. data/test/model/rubynet.xml +79 -79
  132. data/test/model/shiporder.rnc +28 -28
  133. data/test/model/shiporder.rng +86 -86
  134. data/test/model/shiporder.xml +22 -22
  135. data/test/model/shiporder.xsd +44 -40
  136. data/test/model/shiporder_bad.xsd +40 -0
  137. data/test/model/shiporder_import.xsd +45 -0
  138. data/test/model/soap.xml +27 -27
  139. data/test/model/xinclude.xml +4 -4
  140. data/test/{tc_attr.rb → test_attr.rb} +23 -25
  141. data/test/{tc_attr_decl.rb → test_attr_decl.rb} +13 -14
  142. data/test/{tc_attributes.rb → test_attributes.rb} +11 -18
  143. data/test/{tc_canonicalize.rb → test_canonicalize.rb} +36 -41
  144. data/test/test_deprecated_require.rb +12 -0
  145. data/test/{tc_document.rb → test_document.rb} +33 -27
  146. data/test/test_document_write.rb +146 -0
  147. data/test/{tc_dtd.rb → test_dtd.rb} +29 -29
  148. data/test/{tc_encoding.rb → test_encoding.rb} +129 -126
  149. data/test/{tc_encoding_sax.rb → test_encoding_sax.rb} +7 -6
  150. data/test/test_error.rb +178 -0
  151. data/test/test_helper.rb +4 -9
  152. data/test/test_html_parser.rb +162 -0
  153. data/test/test_html_parser_context.rb +23 -0
  154. data/test/test_namespace.rb +60 -0
  155. data/test/{tc_namespaces.rb → test_namespaces.rb} +34 -44
  156. data/test/{tc_node.rb → test_node.rb} +68 -47
  157. data/test/{tc_node_cdata.rb → test_node_cdata.rb} +12 -13
  158. data/test/{tc_node_comment.rb → test_node_comment.rb} +7 -8
  159. data/test/{tc_node_copy.rb → test_node_copy.rb} +4 -6
  160. data/test/{tc_node_edit.rb → test_node_edit.rb} +23 -41
  161. data/test/{tc_node_pi.rb → test_node_pi.rb} +37 -40
  162. data/test/{tc_node_text.rb → test_node_text.rb} +10 -12
  163. data/test/{tc_node_write.rb → test_node_write.rb} +18 -29
  164. data/test/test_node_xlink.rb +28 -0
  165. data/test/test_parser.rb +324 -0
  166. data/test/{tc_parser_context.rb → test_parser_context.rb} +42 -49
  167. data/test/{tc_properties.rb → test_properties.rb} +6 -7
  168. data/test/test_reader.rb +364 -0
  169. data/test/test_relaxng.rb +53 -0
  170. data/test/{tc_sax_parser.rb → test_sax_parser.rb} +44 -38
  171. data/test/test_schema.rb +231 -0
  172. data/test/test_suite.rb +38 -40
  173. data/test/{tc_traversal.rb → test_traversal.rb} +5 -6
  174. data/test/{tc_writer.rb → test_writer.rb} +468 -448
  175. data/test/{tc_xinclude.rb → test_xinclude.rb} +4 -5
  176. data/test/test_xml.rb +263 -0
  177. data/test/{tc_xpath.rb → test_xpath.rb} +31 -32
  178. data/test/{tc_xpath_context.rb → test_xpath_context.rb} +8 -9
  179. data/test/test_xpath_expression.rb +37 -0
  180. data/test/{tc_xpointer.rb → test_xpointer.rb} +16 -18
  181. metadata +122 -100
  182. data/lib/libxml/ns.rb +0 -22
  183. data/lib/libxml/properties.rb +0 -23
  184. data/lib/libxml/reader.rb +0 -29
  185. data/lib/libxml/xpath_object.rb +0 -16
  186. data/test/etc_doc_to_s.rb +0 -21
  187. data/test/ets_doc_file.rb +0 -17
  188. data/test/ets_doc_to_s.rb +0 -23
  189. data/test/ets_gpx.rb +0 -28
  190. data/test/ets_node_gc.rb +0 -23
  191. data/test/ets_test.xml +0 -2
  192. data/test/ets_tsr.rb +0 -11
  193. data/test/tc_deprecated_require.rb +0 -13
  194. data/test/tc_document_write.rb +0 -196
  195. data/test/tc_error.rb +0 -180
  196. data/test/tc_html_parser.rb +0 -153
  197. data/test/tc_html_parser_context.rb +0 -24
  198. data/test/tc_namespace.rb +0 -62
  199. data/test/tc_node_xlink.rb +0 -29
  200. data/test/tc_parser.rb +0 -381
  201. data/test/tc_reader.rb +0 -400
  202. data/test/tc_relaxng.rb +0 -54
  203. data/test/tc_schema.rb +0 -162
  204. data/test/tc_xml.rb +0 -226
  205. data/test/tc_xpath_expression.rb +0 -38
data/README.rdoc CHANGED
@@ -1,184 +1,217 @@
1
- = LibXML Ruby
2
-
3
- == Overview
4
- The libxml gem provides Ruby language bindings for GNOME's Libxml2
5
- XML toolkit. It is free software, released under the MIT License.
6
-
7
- We think libxml-ruby is the best XML library for Ruby because:
8
-
9
- * Speed - Its much faster than REXML and Hpricot
10
- * Features - It provides an amazing number of featues
11
- * Conformance - It passes all 1800+ tests from the OASIS XML Tests Suite
12
-
13
- == Requirements
14
- libxml-ruby requires Ruby 1.8.4 or higher. It is dependent on
15
- the following libraries to function properly:
16
-
17
- * libm (math routines: very standard)
18
- * libz (zlib)
19
- * libiconv
20
- * libxml2
21
-
22
- If you are running Linux or Unix you'll need a C compiler so the
23
- extension can be compiled when it is installed. If you are running
24
- Windows, then install the Windows specific RubyGem which
25
- includes an already built extension.
26
-
27
- == INSTALLATION
28
- The easiest way to install libxml-ruby is via Ruby Gems. To install:
29
-
30
- <tt>gem install libxml-ruby</tt>
31
-
32
- If you are running Windows, make sure to install the Win32 RubyGem
33
- which includes prebuilt extensions for Ruby 1.8, 1.9 and 2.0 preview. These
34
- extensions are built with MinGW32 against libxml2 version 2.9.0,
35
- iconv version 1.13 and zlib version 1.2.5. Note these binaries
36
- are available in the lib\libs directory. To use them, put them
37
- someplace on your path.
38
-
39
- The gem also includes a Microsoft VC++ 2012 solution (useful for debugging).
40
-
41
- libxml-ruby's source codes lives on Github at https://github.com/xml4r/libxml-ruby.
42
-
43
- == Getting Started
44
- Using libxml is easy. First decide what parser you want to use:
45
-
46
- * Generally you'll want to use the LibXML::XML::Parser which provides a tree based API.
47
- * For larger documents that don't fit into memory, or if you prefer an input based API, use the LibXML::XML::Reader.
48
- * To parse HTML files use LibXML::XML::HTMLParser.
49
- * If you are masochistic, then use the LibXML::XML::SaxParser, which provides a callback API.
50
-
51
- Once you have chosen a parser, choose a datasource. Libxml can parse files, strings, URIs
52
- and IO streams. For each data source you can specify an LibXML::XML::Encoding, a base uri and
53
- various parser options. For more information, refer the LibXML::XML::Parser.document,
54
- LibXML::XML::Parser.file, LibXML::XML::Parser.io or LibXML:::XML::Parser.string methods (the
55
- same methods are defined on all four parser classes).
56
-
57
- == Advanced Functionality
58
- Beyond the basics of parsing and processing XML and HTML documents,
59
- libxml provides a wealth of additional functionality.
60
-
61
- Most commonly, you'll want to use its LibXML::XML::XPath support, which makes
62
- it easy to find data inside a XML document. Although not as popular,
63
- LibXML::XML::XPointer provides another API for finding data inside an XML document.
64
-
65
- Often times you'll need to validate data before processing it. For example,
66
- if you accept user generated content submitted over the Web, you'll
67
- want to verify that it does not contain malicious code such as embedded scripts.
68
- This can be done using libxml's powerful set of validators:
69
-
70
- * DTDs (LibXML::XML::Dtd)
71
- * Relax Schemas (LibXML::XML::RelaxNG)
72
- * XML Schema (LibXML::XML::Schema)
73
-
74
- Finally, if you'd like to use XSL Transformations to process data,
75
- then install the libxslt gem which is available at
76
- https://github.com/xml4r/libxslt-ruby.
77
-
78
- == Usage
79
- For information about using libxml-ruby please refer to its documentation at
80
- http://xml4r.github.com/libxml-ruby/rdoc/index.html. Some tutorials are also
81
- available at https://github.com/xml4r/libxml-ruby/wiki.
82
-
83
- All libxml classes are in the LibXML::XML module. The easiest
84
- way to use libxml is to require 'xml'. This will mixin
85
- the LibXML module into the global namespace, allowing you to
86
- write code like this:
87
-
88
- require 'xml'
89
- document = XML::Document.new
90
-
91
- However, when creating an application or library you plan to
92
- redistribute, it is best to not add the LibXML module to the global
93
- namespace, in which case you can either write your code like this:
94
-
95
- require 'libxml'
96
- document = LibXML::XML::Document.new
97
-
98
- Or you can utilize a namespace for your own work and include LibXML into it.
99
- For example:
100
-
101
- require 'libxml'
102
-
103
- module MyApplication
104
- include LibXML
105
-
106
- class MyClass
107
- def some_method
108
- document = XML::Document.new
109
- end
110
- end
111
- end
112
-
113
- For simplicity's sake, the documentation uses the xml module in its examples.
114
-
115
- == Memory Management
116
- libxml-ruby automatically manages memory associated with the
117
- underlying libxml2 library. There is however one corner case that
118
- your code must handle. If a node is imported into a document, but not
119
- added to the document, a segmentation fault may occur on program termination.
120
-
121
- # Do NOT do this
122
- require 'xml'
123
- doc1 = XML::Document.string("test1")
124
- doc2 = XML::Document.string("test2")
125
- node = doc2.import(doc1.root)
126
-
127
- If doc2 is freed before node2 a segmentatin fault will occur since
128
- node2 references the document. To avoid this, simply make sure to add the
129
- node to the document:
130
-
131
- # DO this instead
132
- doc1 = XML::Document.string("test1")
133
- doc2 = XML::Document.string("test2")
134
- doc2.root << doc2.import(doc1.root)
135
-
136
- Alternatively, you can call node2.remove! to disassociate node2 from doc2.
137
-
138
- == Threading
139
- libxml-ruby fully supports native, background Ruby threads. This of course
140
- only applies to Ruby 1.9.x and higher since earlier versions of Ruby do not
141
- support native threads.
142
-
143
- == Performance
144
- In addition to being feature rich and conformation, the main reason
145
- people use libxml-ruby is for performance. Here are the results
146
- of a couple simple benchmarks recently blogged about on the
147
- Web (you can find them in the benchmark directory of the
148
- libxml distribution).
149
-
150
- From http://depixelate.com/2008/4/23/ruby-xml-parsing-benchmarks
151
-
152
- user system total real
153
- libxml 0.032000 0.000000 0.032000 ( 0.031000)
154
- Hpricot 0.640000 0.031000 0.671000 ( 0.890000)
155
- REXML 1.813000 0.047000 1.860000 ( 2.031000)
156
-
157
- From https://svn.concord.org/svn/projects/trunk/common/ruby/xml_benchmarks/
158
-
159
- user system total real
160
- libxml 0.641000 0.031000 0.672000 ( 0.672000)
161
- hpricot 5.359000 0.062000 5.421000 ( 5.516000)
162
- rexml 22.859000 0.047000 22.906000 ( 23.203000)
163
-
164
-
165
- == Documentation
166
- Documentation is available via rdoc, and is installed automatically with the
167
- gem.
168
-
169
- libxml-ruby's online documentation is generated using Hanna, which is a
170
- development gem dependency.
171
-
172
- Note that older versions of Rdoc, which ship with Ruby 1.8.x, will report
173
- a number of errors. To avoid them, install Rdoc 2.1 or higher. Once you have
174
- installed the gem, you'll have to disable the version of Rdoc that Ruby 1.8.x
175
- includes. An easy way to do that is rename the directory ruby/lib/ruby/1.8/rdoc to
176
- ruby/lib/ruby/1.8/rdoc_old.
177
-
178
- == Support
179
-
180
- If you have any questions about using libxml-ruby, please report them to
181
- Git Hub at https://github.com/xml4r/libxml-ruby/issues
182
-
183
- == License
184
- See LICENSE for license information.
1
+ = LibXML Ruby
2
+
3
+ == Overview
4
+ The libxml gem provides Ruby language bindings for GNOME's Libxml2
5
+ XML toolkit. It is free software, released under the MIT License.
6
+
7
+ We think libxml-ruby is the best XML library for Ruby because:
8
+
9
+ * Speed - Its much faster than REXML and Hpricot
10
+ * Features - It provides an amazing number of featues
11
+ * Conformance - It passes all 1800+ tests from the OASIS XML Tests Suite
12
+
13
+ == Requirements
14
+ libxml-ruby requires Ruby 1.8.7 or higher. It depends on libxml2 to
15
+ function properly. libxml2, in turn, depends on:
16
+
17
+ * libm (math routines: very standard)
18
+ * libz (zlib)
19
+ * libiconv
20
+
21
+ If you are running Linux or Unix you'll need a C compiler so the
22
+ extension can be compiled when it is installed. If you are running
23
+ Windows, then install the x64-mingw32 gem or build it yourself using
24
+ Devkit[http://rubyinstaller.org/add-ons/devkit/] or
25
+ msys2[https://msys2.github.io/].
26
+
27
+ == Installation
28
+ The easiest way to install libxml-ruby is via RubyGems. To install:
29
+
30
+ <tt>gem install libxml-ruby</tt>
31
+
32
+ If the extension compile process cannot find libxml2, you may need to indicate
33
+ the location of the libxml2 configuration utility as it is used to find the
34
+ required header and include files. (If you need to indicate a location for the
35
+ libxml2 library or header files different than reported by <tt>xml2-config</tt>,
36
+ see the additional configuration options.)
37
+
38
+ This may be done with RubyGems:
39
+
40
+ <tt>gem install libxml-ruby -- --with-xml2-dir=/path/to/xml2-config</tt>
41
+
42
+ Or bundler:
43
+
44
+ <tt>bundle config build.libxml-ruby --with-xml2-config=/path/to/xml2-config</tt>
45
+
46
+ <tt>bundle install libxml-ruby</tt>
47
+
48
+ If you are running Windows, then install the libxml-ruby-x64-mingw32 gem.
49
+ The gem includes prebuilt extensions for Ruby 2.3. These
50
+ extensions are built using MinGW64 and libxml2 version 2.9.3,
51
+ iconv version 1.14 and zlib version 1.2.8. Note these binaries
52
+ are available in the <tt>lib\\libs</tt> directory. To use them, put them
53
+ on your <tt>PATH</tt>.
54
+
55
+ The gem also includes a Microsoft VC++ 2012 solution and XCode 5 project - these
56
+ are very useful for debugging.
57
+
58
+ libxml-ruby's source codes lives on GitHub[https://github.com/xml4r/libxml-ruby].
59
+
60
+ == Getting Started
61
+ Using libxml is easy. First decide what parser you want to use:
62
+
63
+ * Generally you'll want to use the LibXML::XML::Parser which provides a tree based API.
64
+ * For larger documents that don't fit into memory, or if you prefer an input based API, use the LibXML::XML::Reader.
65
+ * To parse HTML files use LibXML::XML::HTMLParser.
66
+ * If you are masochistic, then use the LibXML::XML::SaxParser, which provides a callback API.
67
+
68
+ Once you have chosen a parser, choose a datasource. Libxml can parse files, strings, URIs
69
+ and IO streams. For each data source you can specify an LibXML::XML::Encoding, a base uri and
70
+ various parser options. For more information, refer the LibXML::XML::Parser.document,
71
+ LibXML::XML::Parser.file, LibXML::XML::Parser.io or LibXML:::XML::Parser.string methods (the
72
+ same methods are defined on all four parser classes).
73
+
74
+ == Advanced Functionality
75
+ Beyond the basics of parsing and processing XML and HTML documents,
76
+ libxml provides a wealth of additional functionality.
77
+
78
+ Most commonly, you'll want to use its LibXML::XML::XPath support, which makes
79
+ it easy to find data inside an XML document. Although not as popular,
80
+ LibXML::XML::XPointer provides another API for finding data inside an XML document.
81
+
82
+ Often times you'll need to validate data before processing it. For example,
83
+ if you accept user generated content submitted over the Web, you'll
84
+ want to verify that it does not contain malicious code such as embedded scripts.
85
+ This can be done using libxml's powerful set of validators:
86
+
87
+ * DTDs (LibXML::XML::Dtd)
88
+ * Relax Schemas (LibXML::XML::RelaxNG)
89
+ * XML Schema (LibXML::XML::Schema)
90
+
91
+ Finally, if you'd like to use XSL Transformations to process data, then install
92
+ the {libxslt gem}[https://github.com/xml4r/libxslt-rubygem].
93
+
94
+ == Usage
95
+ For information about using libxml-ruby please refer to its
96
+ documentation[http://xml4r.github.io/libxml-ruby]. Some tutorials are also
97
+ available[https://github.com/xml4r/libxml-ruby/wiki].
98
+
99
+ All libxml classes are in the LibXML::XML module. The easiest
100
+ way to use libxml is to <tt>require 'xml'</tt>. This will mixin
101
+ the LibXML module into the global namespace, allowing you to
102
+ write code like this:
103
+
104
+ require 'xml'
105
+ document = XML::Document.new
106
+
107
+ However, when creating an application or library you plan to
108
+ redistribute, it is best to not add the LibXML module to the global
109
+ namespace, in which case you can either write your code like this:
110
+
111
+ require 'libxml'
112
+ document = LibXML::XML::Document.new
113
+
114
+ Or you can utilize a namespace for your own work and include LibXML into it.
115
+ For example:
116
+
117
+ require 'libxml'
118
+
119
+ module MyApplication
120
+ include LibXML
121
+
122
+ class MyClass
123
+ def some_method
124
+ document = XML::Document.new
125
+ end
126
+ end
127
+ end
128
+
129
+ For simplicity's sake, the documentation uses the xml module in its examples.
130
+
131
+ == Threading
132
+ libxml-ruby fully supports native, background Ruby threads. This of course
133
+ only applies to Ruby 1.9.x and higher since earlier versions of Ruby do not
134
+ support native threads.
135
+
136
+ == Tests
137
+
138
+ To run tests you first need to build the shared libary:
139
+
140
+ rake compile
141
+
142
+ Once you have build the shared libary, you can then run tests using rake:
143
+
144
+ rake test
145
+
146
+ +Travis build status: {<img src="https://travis-ci.org/xml4r/libxml-ruby.svg?branch=master" alt="Build Status" />}[https://travis-ci.org/xml4r/libxml-ruby]
147
+
148
+ == Performance
149
+
150
+ In addition to being feature rich and conformation, the main reason
151
+ people use libxml-ruby is for performance. Here are the results
152
+ of a couple simple benchmarks recently blogged about on the
153
+ Web (you can find them in the benchmark directory of the
154
+ libxml distribution).
155
+
156
+ From http://depixelate.com/2008/4/23/ruby-xml-parsing-benchmarks
157
+
158
+ user system total real
159
+ libxml 0.032000 0.000000 0.032000 ( 0.031000)
160
+ Hpricot 0.640000 0.031000 0.671000 ( 0.890000)
161
+ REXML 1.813000 0.047000 1.860000 ( 2.031000)
162
+
163
+ From https://svn.concord.org/svn/projects/trunk/common/ruby/xml_benchmarks/
164
+
165
+ user system total real
166
+ libxml 0.641000 0.031000 0.672000 ( 0.672000)
167
+ hpricot 5.359000 0.062000 5.421000 ( 5.516000)
168
+ rexml 22.859000 0.047000 22.906000 ( 23.203000)
169
+
170
+
171
+ == Documentation
172
+ Documentation is available via rdoc, and is installed automatically with the
173
+ gem.
174
+
175
+ libxml-ruby's {online
176
+ documentation}[https://xml4r.github.io/libxml-ruby/rdoc/index.html] is generated
177
+ using Hanna, which is a development gem dependency.
178
+
179
+ Note that older versions of Rdoc, which ship with Ruby 1.8.x, will report
180
+ a number of errors. To avoid them, install Rdoc 2.1 or higher. Once you have
181
+ installed the gem, you'll have to disable the version of Rdoc that Ruby 1.8.x
182
+ includes. An easy way to do that is rename the directory
183
+ <tt>ruby/lib/ruby/1.8/rdoc</tt> to
184
+ <tt>ruby/lib/ruby/1.8/rdoc_old</tt>.
185
+
186
+ == Support
187
+ If you have any questions about using libxml-ruby, please report an issue
188
+ on GitHub[https://github.com/xml4r/libxml-ruby/issues].
189
+
190
+ == Memory Management
191
+ libxml-ruby automatically manages memory associated with the
192
+ underlying libxml2 library. The bindings create a one-to-one mapping between
193
+ Ruby objects and libxml documents and libxml parent nodes (ie, nodes that do not
194
+ have a parent and do not belong to a document). In these cases,
195
+ the bindings manage the memory. They do this by installing a free
196
+ function and storing a back pointer to the Ruby object from the xmlnode
197
+ using the _private member on libxml structures. When the Ruby object
198
+ goes out of scope, the underlying libxml structure is freed. Libxml
199
+ itself then frees all child nodes (recursively).
200
+
201
+ For all other nodes (the vast majority), the bindings create temporary
202
+ Ruby objects that get freed once they go out of scope. Thus there can be
203
+ more than one Ruby object pointing to the same xml node. To mostly hide
204
+ this from a programmer on the Ruby side, the <tt>#eql?</tt> and <tt>#==</tt> methods are
205
+ overriden to check if two Ruby objects wrap the same xmlnode. If they do,
206
+ then the methods return true. During the mark phase, each of these temporary
207
+ objects marks its owning document, thereby keeping the Ruby document object
208
+ alive and thus the xmldoc tree.
209
+
210
+ In the sweep phase of the garbage collector, or when a program ends,
211
+ there is no order to how Ruby objects are freed. In fact, the Ruby document
212
+ object is almost always freed before any Ruby objects that wrap child nodes.
213
+ However, this is ok because those Ruby objects do not have a free function
214
+ and are no longer in scope (since if they were the document would not be freed).
215
+
216
+ == License
217
+ See LICENSE for license information.
data/Rakefile CHANGED
@@ -1,79 +1,99 @@
1
- #!/usr/bin/env ruby
2
-
3
- require "rubygems"
4
- require "rake/extensiontask"
5
- require "rake/testtask"
6
- require "rubygems/package_task"
7
- require "rdoc/task"
8
- require "yaml"
9
-
10
- GEM_NAME = "libxml-ruby"
11
- SO_NAME = "libxml_ruby"
12
-
13
- # Read the spec file
14
- spec = Gem::Specification.load("#{GEM_NAME}.gemspec")
15
-
16
- # Setup compile tasks
17
- Rake::ExtensionTask.new do |ext|
18
- ext.gem_spec = spec
19
- ext.name = SO_NAME
20
- ext.ext_dir = "ext/libxml"
21
- ext.lib_dir = "lib/#{RUBY_VERSION.sub(/\.\d$/, '')}"
22
- ext.config_options << "--with-xml2-include=C:/MinGW/local/include/libxml2"
23
- ext.config_options << "--with-zlib-dir=C:/MinGW/local"
24
- end
25
-
26
- # Setup generic gem
27
- Gem::PackageTask.new(spec) do |pkg|
28
- pkg.package_dir = 'pkg'
29
- pkg.need_tar = false
30
- end
31
-
32
- # Setup Windows Gem
33
- if RUBY_PLATFORM.match(/win32|mingw32/)
34
- binaries = (FileList['lib/**/*.so',
35
- 'lib/**/*dll'])
36
-
37
- # Windows specification
38
- win_spec = spec.clone
39
- win_spec.platform = Gem::Platform::CURRENT
40
- win_spec.files += binaries.to_a
41
- win_spec.instance_variable_set(:@cache_file, nil)
42
-
43
- # Unset extensions
44
- win_spec.extensions = nil
45
-
46
- # Rake task to build the windows package
47
- Gem::PackageTask.new(win_spec) do |pkg|
48
- pkg.package_dir = 'pkg'
49
- pkg.need_tar = false
50
- end
51
- end
52
-
53
- # RDoc Task
54
- desc 'Generate rdoc documentation'
55
- RDoc::Task.new("rdoc") do |rdoc|
56
- rdoc.rdoc_dir = 'website/_site/rdoc'
57
- rdoc.title = 'LibXML'
58
- rdoc.generator = 'hanna'
59
-
60
- # Show source inline with line numbers
61
- rdoc.options << '--line-numbers'
62
- rdoc.options << '--charset=utf-8'
63
- # Make the readme file the start page for the generated html
64
- rdoc.main = 'README.rdoc'
65
- rdoc.rdoc_files.include('doc/*.rdoc',
66
- 'ext/**/libxml.c',
67
- 'ext/**/ruby_xml.c',
68
- 'ext/**/*.c',
69
- 'lib/**/*.rb',
70
- 'README.rdoc',
71
- 'HISTORY',
72
- 'LICENSE')
73
- end
74
-
75
- # Test Task
76
- Rake::TestTask.new do |t|
77
- t.libs << "test"
78
- t.verbose = true
1
+ #!/usr/bin/env ruby
2
+
3
+ require "rubygems"
4
+ require "rake/extensiontask"
5
+ require "rake/testtask"
6
+ require "rubygems/package_task"
7
+ require "rdoc/task"
8
+ require "yaml"
9
+
10
+ GEM_NAME = "libxml-ruby"
11
+ SO_NAME = "libxml_ruby"
12
+
13
+ # Read the spec file
14
+ spec = Gem::Specification.load("#{GEM_NAME}.gemspec")
15
+
16
+ task :default => [:test]
17
+
18
+ # Setup compile tasks
19
+ Rake::ExtensionTask.new do |ext|
20
+ ext.gem_spec = spec
21
+ ext.name = SO_NAME
22
+ ext.ext_dir = "ext/libxml"
23
+ ext.lib_dir = "lib/#{RUBY_VERSION.sub(/\.\d$/, '')}"
24
+ if RUBY_PLATFORM.match(/mswin|mingw/)
25
+ ext.config_options <<
26
+ if (dir = ENV['WINDOWS_XML2_INCLUDE'])
27
+ "--with-xml2-include=#{dir}"
28
+ else
29
+ case RUBY_PLATFORM
30
+ when 'i386-mingw32'
31
+ '--with-xml2-include=C:/msys64/mingw32/include/libxml2'
32
+ when 'x64-mingw32'
33
+ '--with-xml2-include=C:/msys64/mingw64/include/libxml2'
34
+ when 'x64-mingw-ucrt'
35
+ '--with-xml2-include=C:/msys64/ucrt64/include/libxml2'
36
+ else
37
+ raise "Unknown Windows Ruby, please set ENV['WINDOWS_XML2_INCLUDE']"
38
+ end
39
+ end
40
+ else
41
+ ext.config_options << '--with-xml2-include=/usr/include/libxml2'
42
+ end
43
+ end
44
+
45
+ # Setup generic gem
46
+ Gem::PackageTask.new(spec) do |pkg|
47
+ pkg.package_dir = 'pkg'
48
+ pkg.need_tar = false
49
+ end
50
+
51
+ # Setup Windows Gem
52
+ if RUBY_PLATFORM.match(/mswin|mingw/)
53
+ binaries = (FileList['lib/**/*.so',
54
+ 'lib/**/*dll'])
55
+
56
+ # Windows specification
57
+ win_spec = spec.clone
58
+ win_spec.platform = Gem::Platform::CURRENT
59
+ win_spec.files += binaries.to_a
60
+ win_spec.instance_variable_set(:@cache_file, nil)
61
+
62
+ # Unset extensions
63
+ win_spec.extensions = nil
64
+
65
+ # Rake task to build the windows package
66
+ Gem::PackageTask.new(win_spec) do |pkg|
67
+ pkg.package_dir = 'pkg'
68
+ pkg.need_tar = false
69
+ end
70
+ end
71
+
72
+ # RDoc Task
73
+ desc 'Generate rdoc documentation'
74
+ RDoc::Task.new("rdoc") do |rdoc|
75
+ rdoc.rdoc_dir = 'rdoc'
76
+ rdoc.title = 'LibXML'
77
+ rdoc.generator = 'hanna'
78
+
79
+ # Show source inline with line numbers
80
+ rdoc.options << '--line-numbers'
81
+ rdoc.options << '--charset=utf-8'
82
+ # Make the readme file the start page for the generated html
83
+ rdoc.main = 'README.rdoc'
84
+ rdoc.rdoc_files.include('doc/*.rdoc',
85
+ 'ext/**/libxml.c',
86
+ 'ext/**/ruby_xml.c',
87
+ 'ext/**/*.c',
88
+ 'lib/**/*.rb',
89
+ 'README.rdoc',
90
+ 'HISTORY',
91
+ 'LICENSE')
92
+ end
93
+
94
+ # Test Task
95
+ Rake::TestTask.new do |t|
96
+ t.libs << "test"
97
+ t.test_files = FileList['test/test*.rb'] - ['test/test_suite.rb']
98
+ t.verbose = true
79
99
  end