rxerces 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +309 -0
- data/ext/rxerces/extconf.rb +32 -0
- data/ext/rxerces/rxerces.cpp +591 -0
- data/ext/rxerces/rxerces.h +8 -0
- data/lib/rxerces/rxerces.bundle +0 -0
- data/lib/rxerces/version.rb +3 -0
- data/lib/rxerces.rb +48 -0
- metadata +92 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 71254540124db9069ed4366fe61e6efa20d0d74fe4c92f5f8b51793bca16099d
|
|
4
|
+
data.tar.gz: ba91d5747e0394f5cd5149921af75d838e97815f0dc8b2b09528ac775530461d
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 6e26d2111e1ef02ed72fdac2e532d38201dfba9de4b1521bbf22046c2a23a1a821f0ad42368d47520f61139caa0aca365169399abf6ce79b2fa6fd337e0e0026
|
|
7
|
+
data.tar.gz: 6897cb5b3f9755c4cab71735c4ca5eeaf5a3e85917c67d6a4e5903adb3312067de51961262ec7470ca7b91c1984007d58090f7078081a8400692358be870acc7
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 RXerces Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
# RXerces
|
|
2
|
+
|
|
3
|
+
A Ruby XML library with a Nokogiri-compatible API, powered by Apache Xerces-C instead of libxml2.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
RXerces provides a familiar Nokogiri-like interface for XML parsing and manipulation, but uses the robust Apache Xerces-C XML parser under the hood. This allows Ruby developers to leverage Xerces-C's performance and standards compliance while maintaining compatibility with existing Nokogiri-based code.
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- ✅ Nokogiri-compatible API
|
|
12
|
+
- ✅ Powered by Apache Xerces-C
|
|
13
|
+
- ✅ Parse XML documents
|
|
14
|
+
- ✅ Navigate and manipulate DOM trees
|
|
15
|
+
- ✅ Read and write node attributes
|
|
16
|
+
- ✅ Query nodes with XPath (basic support)
|
|
17
|
+
- ✅ Serialize documents back to XML strings
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
### Prerequisites
|
|
22
|
+
|
|
23
|
+
You need to have Xerces-C installed on your system:
|
|
24
|
+
|
|
25
|
+
**macOS (Homebrew):**
|
|
26
|
+
```bash
|
|
27
|
+
brew install xerces-c
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
**Ubuntu/Debian:**
|
|
31
|
+
```bash
|
|
32
|
+
sudo apt-get install libxerces-c-dev
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
**Fedora/RHEL:**
|
|
36
|
+
```bash
|
|
37
|
+
sudo yum install xerces-c-devel
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Install the Gem
|
|
41
|
+
|
|
42
|
+
Add this line to your application's Gemfile:
|
|
43
|
+
|
|
44
|
+
```ruby
|
|
45
|
+
gem 'rxerces'
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
And then execute:
|
|
49
|
+
```bash
|
|
50
|
+
bundle install
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Or install it yourself as:
|
|
54
|
+
```bash
|
|
55
|
+
gem install rxerces
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Usage
|
|
59
|
+
|
|
60
|
+
### Basic Parsing
|
|
61
|
+
|
|
62
|
+
```ruby
|
|
63
|
+
require 'rxerces'
|
|
64
|
+
|
|
65
|
+
# Parse XML string
|
|
66
|
+
xml = '<root><person name="Alice">Hello</person></root>'
|
|
67
|
+
doc = RXerces.XML(xml)
|
|
68
|
+
|
|
69
|
+
# Access root element
|
|
70
|
+
root = doc.root
|
|
71
|
+
puts root.name # => "root"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Nokogiri Compatibility
|
|
75
|
+
|
|
76
|
+
RXerces provides a `Nokogiri` module for drop-in compatibility:
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
require 'rxerces'
|
|
80
|
+
|
|
81
|
+
# Use Nokogiri syntax
|
|
82
|
+
doc = Nokogiri.XML('<root><child>text</child></root>')
|
|
83
|
+
puts doc.root.name # => "root"
|
|
84
|
+
|
|
85
|
+
# Classes are aliased
|
|
86
|
+
Nokogiri::XML::Document == RXerces::XML::Document # => true
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Working with Nodes
|
|
90
|
+
|
|
91
|
+
```ruby
|
|
92
|
+
# Parse XML
|
|
93
|
+
xml = <<-XML
|
|
94
|
+
<library>
|
|
95
|
+
<book id="1" title="1984">
|
|
96
|
+
<author>George Orwell</author>
|
|
97
|
+
<year>1949</year>
|
|
98
|
+
</book>
|
|
99
|
+
<book id="2" title="Brave New World">
|
|
100
|
+
<author>Aldous Huxley</author>
|
|
101
|
+
<year>1932</year>
|
|
102
|
+
</book>
|
|
103
|
+
</library>
|
|
104
|
+
XML
|
|
105
|
+
|
|
106
|
+
doc = RXerces.XML(xml)
|
|
107
|
+
root = doc.root
|
|
108
|
+
|
|
109
|
+
# Get attributes
|
|
110
|
+
book = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
111
|
+
puts book['id'] # => "1"
|
|
112
|
+
puts book['title'] # => "1984"
|
|
113
|
+
|
|
114
|
+
# Set attributes
|
|
115
|
+
book['isbn'] = '978-0451524935'
|
|
116
|
+
puts book['isbn'] # => "978-0451524935"
|
|
117
|
+
|
|
118
|
+
# Get text content
|
|
119
|
+
author = book.children.find { |n| n.name == 'author' }
|
|
120
|
+
puts author.text # => "George Orwell"
|
|
121
|
+
|
|
122
|
+
# Set text content
|
|
123
|
+
author.text = "Eric Arthur Blair"
|
|
124
|
+
puts author.text # => "Eric Arthur Blair"
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Navigating the DOM
|
|
128
|
+
|
|
129
|
+
```ruby
|
|
130
|
+
# Get all children
|
|
131
|
+
root.children.each do |child|
|
|
132
|
+
puts "#{child.name}: #{child.class}"
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Find specific elements
|
|
136
|
+
books = root.children.select { |n| n.is_a?(RXerces::XML::Element) && n.name == 'book' }
|
|
137
|
+
books.each do |book|
|
|
138
|
+
puts "Book ID: #{book['id']}"
|
|
139
|
+
end
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Serialization
|
|
143
|
+
|
|
144
|
+
```ruby
|
|
145
|
+
# Convert document back to XML string
|
|
146
|
+
xml_string = doc.to_xml
|
|
147
|
+
puts xml_string
|
|
148
|
+
|
|
149
|
+
# or use to_s
|
|
150
|
+
puts doc.to_s
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### XPath Queries
|
|
154
|
+
|
|
155
|
+
RXerces supports XPath queries using Xerces-C's XPath implementation:
|
|
156
|
+
|
|
157
|
+
```ruby
|
|
158
|
+
xml = <<-XML
|
|
159
|
+
<library>
|
|
160
|
+
<book>
|
|
161
|
+
<title>1984</title>
|
|
162
|
+
<author>George Orwell</author>
|
|
163
|
+
</book>
|
|
164
|
+
<book>
|
|
165
|
+
<title>Brave New World</title>
|
|
166
|
+
<author>Aldous Huxley</author>
|
|
167
|
+
</book>
|
|
168
|
+
</library>
|
|
169
|
+
XML
|
|
170
|
+
|
|
171
|
+
doc = RXerces.XML(xml)
|
|
172
|
+
|
|
173
|
+
# Find all book elements
|
|
174
|
+
books = doc.xpath('//book')
|
|
175
|
+
puts books.length # => 2
|
|
176
|
+
|
|
177
|
+
# Find all titles
|
|
178
|
+
titles = doc.xpath('//title')
|
|
179
|
+
titles.each do |title|
|
|
180
|
+
puts title.text.strip
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Use path expressions
|
|
184
|
+
authors = doc.xpath('/library/book/author')
|
|
185
|
+
puts authors.length # => 2
|
|
186
|
+
|
|
187
|
+
# Query from a specific node
|
|
188
|
+
first_book = books[0]
|
|
189
|
+
title = first_book.xpath('.//title').first
|
|
190
|
+
puts title.text # => "1984"
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
**Note on XPath Support**: Xerces-C implements the XML Schema XPath subset, not full XPath 1.0. Supported features include:
|
|
194
|
+
- Basic path expressions (`/`, `//`, `.`, `..`)
|
|
195
|
+
- Element selection by name
|
|
196
|
+
- Descendant and child axes
|
|
197
|
+
|
|
198
|
+
Not supported:
|
|
199
|
+
- Attribute predicates (`[@attribute="value"]`)
|
|
200
|
+
- XPath functions (`last()`, `position()`, `text()`)
|
|
201
|
+
- Comparison operators in predicates
|
|
202
|
+
|
|
203
|
+
For more complex queries, you can combine basic XPath with Ruby's `select` and `find` methods.
|
|
204
|
+
|
|
205
|
+
## API Reference
|
|
206
|
+
|
|
207
|
+
### RXerces Module
|
|
208
|
+
|
|
209
|
+
- `RXerces.XML(string)` - Parse XML string and return Document
|
|
210
|
+
- `RXerces.parse(string)` - Alias for `XML`
|
|
211
|
+
|
|
212
|
+
### RXerces::XML::Document
|
|
213
|
+
|
|
214
|
+
- `.parse(string)` - Parse XML string (class method)
|
|
215
|
+
- `#root` - Get root element
|
|
216
|
+
- `#to_s` / `#to_xml` - Serialize to XML string
|
|
217
|
+
- `#xpath(path)` - Query with XPath (returns NodeSet)
|
|
218
|
+
|
|
219
|
+
### RXerces::XML::Node
|
|
220
|
+
|
|
221
|
+
- `#name` - Get node name
|
|
222
|
+
- `#text` / `#content` - Get text content
|
|
223
|
+
- `#text=` / `#content=` - Set text content
|
|
224
|
+
- `#[attribute]` - Get attribute value
|
|
225
|
+
- `#[attribute]=` - Set attribute value
|
|
226
|
+
- `#children` - Get array of child nodes
|
|
227
|
+
- `#xpath(path)` - Query descendants with XPath
|
|
228
|
+
|
|
229
|
+
### RXerces::XML::Element
|
|
230
|
+
|
|
231
|
+
Inherits all methods from `Node`. Represents element nodes.
|
|
232
|
+
|
|
233
|
+
### RXerces::XML::Text
|
|
234
|
+
|
|
235
|
+
Inherits all methods from `Node`. Represents text nodes.
|
|
236
|
+
|
|
237
|
+
### RXerces::XML::NodeSet
|
|
238
|
+
|
|
239
|
+
- `#length` / `#size` - Get number of nodes
|
|
240
|
+
- `#[]` - Access node by index
|
|
241
|
+
- `#each` - Iterate over nodes (Enumerable)
|
|
242
|
+
- `#to_a` - Convert to array
|
|
243
|
+
|
|
244
|
+
## Development
|
|
245
|
+
|
|
246
|
+
### Building the Extension
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
bundle install
|
|
250
|
+
bundle exec rake compile
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
### Running Tests
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
bundle exec rspec
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### Running Tests with Compilation
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
bundle exec rake
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
## Implementation Notes
|
|
266
|
+
|
|
267
|
+
- Uses Apache Xerces-C 3.x for XML parsing
|
|
268
|
+
- C++ extension compiled with Ruby's native extension API
|
|
269
|
+
- XPath support is basic (full XPath requires additional implementation)
|
|
270
|
+
- Memory management handled by Ruby's GC and Xerces-C's DOM
|
|
271
|
+
|
|
272
|
+
## Differences from Nokogiri
|
|
273
|
+
|
|
274
|
+
While RXerces aims for API compatibility with Nokogiri, there are some differences:
|
|
275
|
+
|
|
276
|
+
1. **Parser Backend**: Uses Xerces-C instead of libxml2
|
|
277
|
+
2. **XPath**: Basic XPath support (returns empty NodeSet currently)
|
|
278
|
+
3. **Features**: Subset of Nokogiri's full feature set
|
|
279
|
+
4. **Performance**: Different performance characteristics due to Xerces-C
|
|
280
|
+
|
|
281
|
+
## Contributing
|
|
282
|
+
|
|
283
|
+
1. Fork it
|
|
284
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
|
285
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
|
286
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
|
287
|
+
5. Create new Pull Request
|
|
288
|
+
|
|
289
|
+
## License
|
|
290
|
+
|
|
291
|
+
MIT License - see LICENSE file for details
|
|
292
|
+
|
|
293
|
+
## Credits
|
|
294
|
+
|
|
295
|
+
- Built with [Apache Xerces-C](https://xerces.apache.org/xerces-c/)
|
|
296
|
+
- API inspired by [Nokogiri](https://nokogiri.org/)
|
|
297
|
+
|
|
298
|
+
## Misc
|
|
299
|
+
This library was almost entirely written using AI (Claude Sonnet 4.5). It
|
|
300
|
+
was mainly a reaction to the lack of maintainers for libxml2, and the generally
|
|
301
|
+
sorry state of that library in general. Since nokogiri uses it under the hood,
|
|
302
|
+
I thought it best to create an alternative.
|
|
303
|
+
|
|
304
|
+
## Copyright
|
|
305
|
+
(C) 2025, Daniel J. Berger
|
|
306
|
+
All Rights Reserved
|
|
307
|
+
|
|
308
|
+
## Author
|
|
309
|
+
* Daniel J. Berger
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require 'mkmf'
|
|
2
|
+
|
|
3
|
+
# Force C++ compiler for the extension
|
|
4
|
+
$CXXFLAGS << " -std=c++11"
|
|
5
|
+
|
|
6
|
+
# Try to find Xerces-C via homebrew on macOS
|
|
7
|
+
if RUBY_PLATFORM =~ /darwin/
|
|
8
|
+
homebrew_prefix = `brew --prefix xerces-c 2>/dev/null`.chomp
|
|
9
|
+
if !homebrew_prefix.empty? && File.directory?(homebrew_prefix)
|
|
10
|
+
$INCFLAGS << " -I#{homebrew_prefix}/include"
|
|
11
|
+
$LDFLAGS << " -L#{homebrew_prefix}/lib"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Check for Xerces-C library
|
|
16
|
+
unless have_library('xerces-c')
|
|
17
|
+
puts "Xerces-C library not found. Please install it:"
|
|
18
|
+
puts " macOS: brew install xerces-c"
|
|
19
|
+
puts " Ubuntu/Debian: sudo apt-get install libxerces-c-dev"
|
|
20
|
+
puts " Fedora/RHEL: sudo yum install xerces-c-devel"
|
|
21
|
+
exit 1
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Use C++ for header checking
|
|
25
|
+
# We check by trying to find the library itself rather than header
|
|
26
|
+
unless find_header('xercesc/util/PlatformUtils.hpp')
|
|
27
|
+
# Header check failed, but library exists, so we'll proceed with a warning
|
|
28
|
+
puts "Warning: Xerces-C headers not automatically detected, but library is present."
|
|
29
|
+
puts "Proceeding with compilation..."
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
create_makefile('rxerces/rxerces')
|
|
@@ -0,0 +1,591 @@
|
|
|
1
|
+
#include "rxerces.h"
|
|
2
|
+
#include <xercesc/util/PlatformUtils.hpp>
|
|
3
|
+
#include <xercesc/parsers/XercesDOMParser.hpp>
|
|
4
|
+
#include <xercesc/dom/DOM.hpp>
|
|
5
|
+
#include <xercesc/util/XMLString.hpp>
|
|
6
|
+
#include <xercesc/framework/MemBufInputSource.hpp>
|
|
7
|
+
#include <xercesc/framework/MemBufFormatTarget.hpp>
|
|
8
|
+
#include <xercesc/util/XercesDefs.hpp>
|
|
9
|
+
#include <xercesc/dom/DOMXPathResult.hpp>
|
|
10
|
+
#include <xercesc/dom/DOMXPathExpression.hpp>
|
|
11
|
+
#include <sstream>
|
|
12
|
+
|
|
13
|
+
using namespace xercesc;
|
|
14
|
+
|
|
15
|
+
VALUE rb_mRXerces;
|
|
16
|
+
VALUE rb_mXML;
|
|
17
|
+
VALUE rb_cDocument;
|
|
18
|
+
VALUE rb_cNode;
|
|
19
|
+
VALUE rb_cNodeSet;
|
|
20
|
+
VALUE rb_cElement;
|
|
21
|
+
VALUE rb_cText;
|
|
22
|
+
|
|
23
|
+
// Xerces initialization flag
|
|
24
|
+
static bool xerces_initialized = false;
|
|
25
|
+
|
|
26
|
+
// Helper class to manage XMLCh strings
|
|
27
|
+
class XStr {
|
|
28
|
+
public:
|
|
29
|
+
XStr(const char* const toTranscode) {
|
|
30
|
+
fUnicodeForm = XMLString::transcode(toTranscode);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
XStr(const std::string& toTranscode) {
|
|
34
|
+
fUnicodeForm = XMLString::transcode(toTranscode.c_str());
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
~XStr() {
|
|
38
|
+
XMLString::release(&fUnicodeForm);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const XMLCh* unicodeForm() const {
|
|
42
|
+
return fUnicodeForm;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
private:
|
|
46
|
+
XMLCh* fUnicodeForm;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
// Helper to convert XMLCh to char*
|
|
50
|
+
class CharStr {
|
|
51
|
+
public:
|
|
52
|
+
CharStr(const XMLCh* const toTranscode) {
|
|
53
|
+
fLocalForm = XMLString::transcode(toTranscode);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
~CharStr() {
|
|
57
|
+
XMLString::release(&fLocalForm);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const char* localForm() const {
|
|
61
|
+
return fLocalForm;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
private:
|
|
65
|
+
char* fLocalForm;
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
// Wrapper structure for DOMDocument
|
|
69
|
+
typedef struct {
|
|
70
|
+
DOMDocument* doc;
|
|
71
|
+
XercesDOMParser* parser;
|
|
72
|
+
} DocumentWrapper;
|
|
73
|
+
|
|
74
|
+
// Wrapper structure for DOMNode
|
|
75
|
+
typedef struct {
|
|
76
|
+
DOMNode* node;
|
|
77
|
+
VALUE doc_ref; // Keep reference to parent document
|
|
78
|
+
} NodeWrapper;
|
|
79
|
+
|
|
80
|
+
// Wrapper structure for NodeSet (array of nodes)
|
|
81
|
+
typedef struct {
|
|
82
|
+
VALUE nodes_array;
|
|
83
|
+
} NodeSetWrapper;
|
|
84
|
+
|
|
85
|
+
// Memory management functions
|
|
86
|
+
static void document_free(void* ptr) {
|
|
87
|
+
DocumentWrapper* wrapper = (DocumentWrapper*)ptr;
|
|
88
|
+
if (wrapper) {
|
|
89
|
+
if (wrapper->parser) {
|
|
90
|
+
delete wrapper->parser;
|
|
91
|
+
}
|
|
92
|
+
// Document is owned by parser, so don't delete it separately
|
|
93
|
+
xfree(wrapper);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
static void node_free(void* ptr) {
|
|
98
|
+
NodeWrapper* wrapper = (NodeWrapper*)ptr;
|
|
99
|
+
if (wrapper) {
|
|
100
|
+
// Don't delete node - it's owned by the document
|
|
101
|
+
xfree(wrapper);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
static void nodeset_free(void* ptr) {
|
|
106
|
+
NodeSetWrapper* wrapper = (NodeSetWrapper*)ptr;
|
|
107
|
+
if (wrapper) {
|
|
108
|
+
xfree(wrapper);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
static size_t document_size(const void* ptr) {
|
|
113
|
+
return sizeof(DocumentWrapper);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
static size_t node_size(const void* ptr) {
|
|
117
|
+
return sizeof(NodeWrapper);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
static size_t nodeset_size(const void* ptr) {
|
|
121
|
+
return sizeof(NodeSetWrapper);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
static const rb_data_type_t document_type = {
|
|
125
|
+
"RXerces::XML::Document",
|
|
126
|
+
{0, document_free, document_size},
|
|
127
|
+
0, 0,
|
|
128
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
static const rb_data_type_t node_type = {
|
|
132
|
+
"RXerces::XML::Node",
|
|
133
|
+
{0, node_free, node_size},
|
|
134
|
+
0, 0,
|
|
135
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
static const rb_data_type_t nodeset_type = {
|
|
139
|
+
"RXerces::XML::NodeSet",
|
|
140
|
+
{0, nodeset_free, nodeset_size},
|
|
141
|
+
0, 0,
|
|
142
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
// Helper to create Ruby Node object from DOMNode
|
|
146
|
+
static VALUE wrap_node(DOMNode* node, VALUE doc_ref) {
|
|
147
|
+
if (!node) {
|
|
148
|
+
return Qnil;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
NodeWrapper* wrapper = ALLOC(NodeWrapper);
|
|
152
|
+
wrapper->node = node;
|
|
153
|
+
wrapper->doc_ref = doc_ref;
|
|
154
|
+
|
|
155
|
+
VALUE rb_node;
|
|
156
|
+
|
|
157
|
+
switch (node->getNodeType()) {
|
|
158
|
+
case DOMNode::ELEMENT_NODE:
|
|
159
|
+
rb_node = TypedData_Wrap_Struct(rb_cElement, &node_type, wrapper);
|
|
160
|
+
break;
|
|
161
|
+
case DOMNode::TEXT_NODE:
|
|
162
|
+
rb_node = TypedData_Wrap_Struct(rb_cText, &node_type, wrapper);
|
|
163
|
+
break;
|
|
164
|
+
default:
|
|
165
|
+
rb_node = TypedData_Wrap_Struct(rb_cNode, &node_type, wrapper);
|
|
166
|
+
break;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Keep reference to document to prevent GC
|
|
170
|
+
rb_iv_set(rb_node, "@document", doc_ref);
|
|
171
|
+
|
|
172
|
+
return rb_node;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// RXerces::XML::Document.parse(string)
|
|
176
|
+
static VALUE document_parse(VALUE klass, VALUE str) {
|
|
177
|
+
if (!xerces_initialized) {
|
|
178
|
+
try {
|
|
179
|
+
XMLPlatformUtils::Initialize();
|
|
180
|
+
xerces_initialized = true;
|
|
181
|
+
} catch (const XMLException& e) {
|
|
182
|
+
rb_raise(rb_eRuntimeError, "Xerces initialization failed");
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
Check_Type(str, T_STRING);
|
|
187
|
+
const char* xml_str = StringValueCStr(str);
|
|
188
|
+
|
|
189
|
+
XercesDOMParser* parser = new XercesDOMParser();
|
|
190
|
+
parser->setValidationScheme(XercesDOMParser::Val_Never);
|
|
191
|
+
parser->setDoNamespaces(false);
|
|
192
|
+
parser->setDoSchema(false);
|
|
193
|
+
|
|
194
|
+
try {
|
|
195
|
+
MemBufInputSource input((const XMLByte*)xml_str, strlen(xml_str), "memory");
|
|
196
|
+
parser->parse(input);
|
|
197
|
+
|
|
198
|
+
DOMDocument* doc = parser->getDocument();
|
|
199
|
+
|
|
200
|
+
DocumentWrapper* wrapper = ALLOC(DocumentWrapper);
|
|
201
|
+
wrapper->doc = doc;
|
|
202
|
+
wrapper->parser = parser;
|
|
203
|
+
|
|
204
|
+
VALUE rb_doc = TypedData_Wrap_Struct(rb_cDocument, &document_type, wrapper);
|
|
205
|
+
return rb_doc;
|
|
206
|
+
} catch (const XMLException& e) {
|
|
207
|
+
CharStr message(e.getMessage());
|
|
208
|
+
delete parser;
|
|
209
|
+
rb_raise(rb_eRuntimeError, "XML parsing error: %s", message.localForm());
|
|
210
|
+
} catch (const DOMException& e) {
|
|
211
|
+
CharStr message(e.getMessage());
|
|
212
|
+
delete parser;
|
|
213
|
+
rb_raise(rb_eRuntimeError, "DOM error: %s", message.localForm());
|
|
214
|
+
} catch (...) {
|
|
215
|
+
delete parser;
|
|
216
|
+
rb_raise(rb_eRuntimeError, "Unknown XML parsing error");
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
return Qnil;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// document.root
|
|
223
|
+
static VALUE document_root(VALUE self) {
|
|
224
|
+
DocumentWrapper* wrapper;
|
|
225
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
226
|
+
|
|
227
|
+
if (!wrapper->doc) {
|
|
228
|
+
return Qnil;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
DOMElement* root = wrapper->doc->getDocumentElement();
|
|
232
|
+
return wrap_node(root, self);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// document.to_s / document.to_xml
|
|
236
|
+
static VALUE document_to_s(VALUE self) {
|
|
237
|
+
DocumentWrapper* wrapper;
|
|
238
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
239
|
+
|
|
240
|
+
if (!wrapper->doc) {
|
|
241
|
+
return rb_str_new_cstr("");
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
try {
|
|
245
|
+
DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(XStr("LS").unicodeForm());
|
|
246
|
+
DOMLSSerializer* serializer = ((DOMImplementationLS*)impl)->createLSSerializer();
|
|
247
|
+
|
|
248
|
+
XMLCh* xml_str = serializer->writeToString(wrapper->doc);
|
|
249
|
+
CharStr utf8_str(xml_str);
|
|
250
|
+
|
|
251
|
+
VALUE result = rb_str_new_cstr(utf8_str.localForm());
|
|
252
|
+
|
|
253
|
+
XMLString::release(&xml_str);
|
|
254
|
+
serializer->release();
|
|
255
|
+
|
|
256
|
+
return result;
|
|
257
|
+
} catch (...) {
|
|
258
|
+
rb_raise(rb_eRuntimeError, "Failed to serialize document");
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return Qnil;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// document.xpath(path)
|
|
265
|
+
static VALUE document_xpath(VALUE self, VALUE path) {
|
|
266
|
+
DocumentWrapper* doc_wrapper;
|
|
267
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, doc_wrapper);
|
|
268
|
+
|
|
269
|
+
if (!doc_wrapper->doc) {
|
|
270
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
271
|
+
wrapper->nodes_array = rb_ary_new();
|
|
272
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
Check_Type(path, T_STRING);
|
|
276
|
+
const char* xpath_str = StringValueCStr(path);
|
|
277
|
+
|
|
278
|
+
try {
|
|
279
|
+
DOMElement* root = doc_wrapper->doc->getDocumentElement();
|
|
280
|
+
if (!root) {
|
|
281
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
282
|
+
wrapper->nodes_array = rb_ary_new();
|
|
283
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
DOMXPathNSResolver* resolver = doc_wrapper->doc->createNSResolver(root);
|
|
287
|
+
DOMXPathExpression* expression = doc_wrapper->doc->createExpression(
|
|
288
|
+
XStr(xpath_str).unicodeForm(), resolver);
|
|
289
|
+
|
|
290
|
+
DOMXPathResult* result = expression->evaluate(
|
|
291
|
+
doc_wrapper->doc->getDocumentElement(),
|
|
292
|
+
DOMXPathResult::ORDERED_NODE_SNAPSHOT_TYPE,
|
|
293
|
+
NULL);
|
|
294
|
+
|
|
295
|
+
VALUE nodes_array = rb_ary_new();
|
|
296
|
+
XMLSize_t length = result->getSnapshotLength();
|
|
297
|
+
|
|
298
|
+
for (XMLSize_t i = 0; i < length; i++) {
|
|
299
|
+
result->snapshotItem(i);
|
|
300
|
+
DOMNode* node = result->getNodeValue();
|
|
301
|
+
if (node) {
|
|
302
|
+
rb_ary_push(nodes_array, wrap_node(node, self));
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
expression->release();
|
|
307
|
+
resolver->release();
|
|
308
|
+
result->release();
|
|
309
|
+
|
|
310
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
311
|
+
wrapper->nodes_array = nodes_array;
|
|
312
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
313
|
+
|
|
314
|
+
} catch (const DOMXPathException& e) {
|
|
315
|
+
CharStr message(e.getMessage());
|
|
316
|
+
rb_raise(rb_eRuntimeError, "XPath error: %s", message.localForm());
|
|
317
|
+
} catch (const DOMException& e) {
|
|
318
|
+
CharStr message(e.getMessage());
|
|
319
|
+
rb_raise(rb_eRuntimeError, "DOM error: %s", message.localForm());
|
|
320
|
+
} catch (...) {
|
|
321
|
+
rb_raise(rb_eRuntimeError, "Unknown XPath error");
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
325
|
+
wrapper->nodes_array = rb_ary_new();
|
|
326
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// node.name
|
|
330
|
+
static VALUE node_name(VALUE self) {
|
|
331
|
+
NodeWrapper* wrapper;
|
|
332
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
333
|
+
|
|
334
|
+
if (!wrapper->node) {
|
|
335
|
+
return Qnil;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
const XMLCh* name = wrapper->node->getNodeName();
|
|
339
|
+
CharStr utf8_name(name);
|
|
340
|
+
|
|
341
|
+
return rb_str_new_cstr(utf8_name.localForm());
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// node.text / node.content
|
|
345
|
+
static VALUE node_text(VALUE self) {
|
|
346
|
+
NodeWrapper* wrapper;
|
|
347
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
348
|
+
|
|
349
|
+
if (!wrapper->node) {
|
|
350
|
+
return rb_str_new_cstr("");
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
const XMLCh* content = wrapper->node->getTextContent();
|
|
354
|
+
if (!content) {
|
|
355
|
+
return rb_str_new_cstr("");
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
CharStr utf8_content(content);
|
|
359
|
+
return rb_str_new_cstr(utf8_content.localForm());
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// node.text = value
|
|
363
|
+
static VALUE node_text_set(VALUE self, VALUE text) {
|
|
364
|
+
NodeWrapper* wrapper;
|
|
365
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
366
|
+
|
|
367
|
+
if (!wrapper->node) {
|
|
368
|
+
return Qnil;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
Check_Type(text, T_STRING);
|
|
372
|
+
const char* text_str = StringValueCStr(text);
|
|
373
|
+
|
|
374
|
+
wrapper->node->setTextContent(XStr(text_str).unicodeForm());
|
|
375
|
+
|
|
376
|
+
return text;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// node[attribute_name]
|
|
380
|
+
static VALUE node_get_attribute(VALUE self, VALUE attr_name) {
|
|
381
|
+
NodeWrapper* wrapper;
|
|
382
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
383
|
+
|
|
384
|
+
if (!wrapper->node || wrapper->node->getNodeType() != DOMNode::ELEMENT_NODE) {
|
|
385
|
+
return Qnil;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
Check_Type(attr_name, T_STRING);
|
|
389
|
+
const char* attr_str = StringValueCStr(attr_name);
|
|
390
|
+
|
|
391
|
+
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
392
|
+
const XMLCh* value = element->getAttribute(XStr(attr_str).unicodeForm());
|
|
393
|
+
|
|
394
|
+
if (!value || XMLString::stringLen(value) == 0) {
|
|
395
|
+
return Qnil;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
CharStr utf8_value(value);
|
|
399
|
+
return rb_str_new_cstr(utf8_value.localForm());
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// node[attribute_name] = value
|
|
403
|
+
static VALUE node_set_attribute(VALUE self, VALUE attr_name, VALUE attr_value) {
|
|
404
|
+
NodeWrapper* wrapper;
|
|
405
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
406
|
+
|
|
407
|
+
if (!wrapper->node || wrapper->node->getNodeType() != DOMNode::ELEMENT_NODE) {
|
|
408
|
+
return Qnil;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
Check_Type(attr_name, T_STRING);
|
|
412
|
+
Check_Type(attr_value, T_STRING);
|
|
413
|
+
|
|
414
|
+
const char* attr_str = StringValueCStr(attr_name);
|
|
415
|
+
const char* value_str = StringValueCStr(attr_value);
|
|
416
|
+
|
|
417
|
+
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
418
|
+
element->setAttribute(XStr(attr_str).unicodeForm(), XStr(value_str).unicodeForm());
|
|
419
|
+
|
|
420
|
+
return attr_value;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// node.children
|
|
424
|
+
static VALUE node_children(VALUE self) {
|
|
425
|
+
NodeWrapper* wrapper;
|
|
426
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
427
|
+
|
|
428
|
+
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
429
|
+
VALUE children = rb_ary_new();
|
|
430
|
+
|
|
431
|
+
if (!wrapper->node) {
|
|
432
|
+
return children;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
DOMNodeList* child_nodes = wrapper->node->getChildNodes();
|
|
436
|
+
XMLSize_t count = child_nodes->getLength();
|
|
437
|
+
|
|
438
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
439
|
+
DOMNode* child = child_nodes->item(i);
|
|
440
|
+
rb_ary_push(children, wrap_node(child, doc_ref));
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
return children;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// node.xpath(path)
|
|
447
|
+
static VALUE node_xpath(VALUE self, VALUE path) {
|
|
448
|
+
NodeWrapper* node_wrapper;
|
|
449
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, node_wrapper);
|
|
450
|
+
|
|
451
|
+
if (!node_wrapper->node) {
|
|
452
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
453
|
+
wrapper->nodes_array = rb_ary_new();
|
|
454
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
Check_Type(path, T_STRING);
|
|
458
|
+
const char* xpath_str = StringValueCStr(path);
|
|
459
|
+
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
460
|
+
|
|
461
|
+
try {
|
|
462
|
+
DOMDocument* doc = node_wrapper->node->getOwnerDocument();
|
|
463
|
+
if (!doc) {
|
|
464
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
465
|
+
wrapper->nodes_array = rb_ary_new();
|
|
466
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
DOMXPathNSResolver* resolver = doc->createNSResolver(node_wrapper->node);
|
|
470
|
+
DOMXPathExpression* expression = doc->createExpression(
|
|
471
|
+
XStr(xpath_str).unicodeForm(), resolver);
|
|
472
|
+
|
|
473
|
+
DOMXPathResult* result = expression->evaluate(
|
|
474
|
+
node_wrapper->node,
|
|
475
|
+
DOMXPathResult::ORDERED_NODE_SNAPSHOT_TYPE,
|
|
476
|
+
NULL);
|
|
477
|
+
|
|
478
|
+
VALUE nodes_array = rb_ary_new();
|
|
479
|
+
XMLSize_t length = result->getSnapshotLength();
|
|
480
|
+
|
|
481
|
+
for (XMLSize_t i = 0; i < length; i++) {
|
|
482
|
+
result->snapshotItem(i);
|
|
483
|
+
DOMNode* node = result->getNodeValue();
|
|
484
|
+
if (node) {
|
|
485
|
+
rb_ary_push(nodes_array, wrap_node(node, doc_ref));
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
expression->release();
|
|
490
|
+
resolver->release();
|
|
491
|
+
result->release();
|
|
492
|
+
|
|
493
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
494
|
+
wrapper->nodes_array = nodes_array;
|
|
495
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
496
|
+
|
|
497
|
+
} catch (const DOMXPathException& e) {
|
|
498
|
+
CharStr message(e.getMessage());
|
|
499
|
+
rb_raise(rb_eRuntimeError, "XPath error: %s", message.localForm());
|
|
500
|
+
} catch (const DOMException& e) {
|
|
501
|
+
CharStr message(e.getMessage());
|
|
502
|
+
rb_raise(rb_eRuntimeError, "DOM error: %s", message.localForm());
|
|
503
|
+
} catch (...) {
|
|
504
|
+
rb_raise(rb_eRuntimeError, "Unknown XPath error");
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
508
|
+
wrapper->nodes_array = rb_ary_new();
|
|
509
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// nodeset.length / nodeset.size
|
|
513
|
+
static VALUE nodeset_length(VALUE self) {
|
|
514
|
+
NodeSetWrapper* wrapper;
|
|
515
|
+
TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
|
|
516
|
+
|
|
517
|
+
return LONG2NUM(RARRAY_LEN(wrapper->nodes_array));
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// nodeset[index]
|
|
521
|
+
static VALUE nodeset_at(VALUE self, VALUE index) {
|
|
522
|
+
NodeSetWrapper* wrapper;
|
|
523
|
+
TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
|
|
524
|
+
|
|
525
|
+
return rb_ary_entry(wrapper->nodes_array, NUM2LONG(index));
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
// nodeset.each
|
|
529
|
+
static VALUE nodeset_each(VALUE self) {
|
|
530
|
+
NodeSetWrapper* wrapper;
|
|
531
|
+
TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
|
|
532
|
+
|
|
533
|
+
if (!rb_block_given_p()) {
|
|
534
|
+
return rb_funcall(wrapper->nodes_array, rb_intern("each"), 0);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
long len = RARRAY_LEN(wrapper->nodes_array);
|
|
538
|
+
for (long i = 0; i < len; i++) {
|
|
539
|
+
rb_yield(rb_ary_entry(wrapper->nodes_array, i));
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
return self;
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
// nodeset.to_a
|
|
546
|
+
static VALUE nodeset_to_a(VALUE self) {
|
|
547
|
+
NodeSetWrapper* wrapper;
|
|
548
|
+
TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
|
|
549
|
+
|
|
550
|
+
return rb_ary_dup(wrapper->nodes_array);
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
extern "C" void Init_rxerces(void) {
|
|
554
|
+
rb_mRXerces = rb_define_module("RXerces");
|
|
555
|
+
rb_mXML = rb_define_module_under(rb_mRXerces, "XML");
|
|
556
|
+
|
|
557
|
+
rb_cDocument = rb_define_class_under(rb_mXML, "Document", rb_cObject);
|
|
558
|
+
rb_undef_alloc_func(rb_cDocument);
|
|
559
|
+
rb_define_singleton_method(rb_cDocument, "parse", RUBY_METHOD_FUNC(document_parse), 1);
|
|
560
|
+
rb_define_method(rb_cDocument, "root", RUBY_METHOD_FUNC(document_root), 0);
|
|
561
|
+
rb_define_method(rb_cDocument, "to_s", RUBY_METHOD_FUNC(document_to_s), 0);
|
|
562
|
+
rb_define_method(rb_cDocument, "to_xml", RUBY_METHOD_FUNC(document_to_s), 0);
|
|
563
|
+
rb_define_method(rb_cDocument, "xpath", RUBY_METHOD_FUNC(document_xpath), 1);
|
|
564
|
+
|
|
565
|
+
rb_cNode = rb_define_class_under(rb_mXML, "Node", rb_cObject);
|
|
566
|
+
rb_undef_alloc_func(rb_cNode);
|
|
567
|
+
rb_define_method(rb_cNode, "name", RUBY_METHOD_FUNC(node_name), 0);
|
|
568
|
+
rb_define_method(rb_cNode, "text", RUBY_METHOD_FUNC(node_text), 0);
|
|
569
|
+
rb_define_method(rb_cNode, "content", RUBY_METHOD_FUNC(node_text), 0);
|
|
570
|
+
rb_define_method(rb_cNode, "text=", RUBY_METHOD_FUNC(node_text_set), 1);
|
|
571
|
+
rb_define_method(rb_cNode, "content=", RUBY_METHOD_FUNC(node_text_set), 1);
|
|
572
|
+
rb_define_method(rb_cNode, "[]", RUBY_METHOD_FUNC(node_get_attribute), 1);
|
|
573
|
+
rb_define_method(rb_cNode, "[]=", RUBY_METHOD_FUNC(node_set_attribute), 2);
|
|
574
|
+
rb_define_method(rb_cNode, "children", RUBY_METHOD_FUNC(node_children), 0);
|
|
575
|
+
rb_define_method(rb_cNode, "xpath", RUBY_METHOD_FUNC(node_xpath), 1);
|
|
576
|
+
|
|
577
|
+
rb_cElement = rb_define_class_under(rb_mXML, "Element", rb_cNode);
|
|
578
|
+
rb_undef_alloc_func(rb_cElement);
|
|
579
|
+
|
|
580
|
+
rb_cText = rb_define_class_under(rb_mXML, "Text", rb_cNode);
|
|
581
|
+
rb_undef_alloc_func(rb_cText);
|
|
582
|
+
|
|
583
|
+
rb_cNodeSet = rb_define_class_under(rb_mXML, "NodeSet", rb_cObject);
|
|
584
|
+
rb_undef_alloc_func(rb_cNodeSet);
|
|
585
|
+
rb_define_method(rb_cNodeSet, "length", RUBY_METHOD_FUNC(nodeset_length), 0);
|
|
586
|
+
rb_define_method(rb_cNodeSet, "size", RUBY_METHOD_FUNC(nodeset_length), 0);
|
|
587
|
+
rb_define_method(rb_cNodeSet, "[]", RUBY_METHOD_FUNC(nodeset_at), 1);
|
|
588
|
+
rb_define_method(rb_cNodeSet, "each", RUBY_METHOD_FUNC(nodeset_each), 0);
|
|
589
|
+
rb_define_method(rb_cNodeSet, "to_a", RUBY_METHOD_FUNC(nodeset_to_a), 0);
|
|
590
|
+
rb_include_module(rb_cNodeSet, rb_mEnumerable);
|
|
591
|
+
}
|
|
Binary file
|
data/lib/rxerces.rb
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
require 'rxerces/rxerces'
|
|
2
|
+
require 'rxerces/version'
|
|
3
|
+
|
|
4
|
+
# Main module providing Nokogiri-compatible XML parsing using Xerces-C
|
|
5
|
+
module RXerces
|
|
6
|
+
# Parse XML from a string
|
|
7
|
+
# @param string [String] XML string to parse
|
|
8
|
+
# @return [RXerces::XML::Document] parsed document
|
|
9
|
+
def self.XML(string)
|
|
10
|
+
RXerces::XML::Document.parse(string)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Alias for compatibility
|
|
14
|
+
class << self
|
|
15
|
+
alias_method :parse, :XML
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Nokogiri compatibility module
|
|
20
|
+
module Nokogiri
|
|
21
|
+
# Nokogiri-compatible XML module
|
|
22
|
+
module XML
|
|
23
|
+
# Parse XML from a string - delegates to RXerces
|
|
24
|
+
# @param string [String] XML string to parse
|
|
25
|
+
# @return [RXerces::XML::Document] parsed document
|
|
26
|
+
def self.parse(string)
|
|
27
|
+
RXerces::XML::Document.parse(string)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Alias Document class for compatibility
|
|
31
|
+
Document = RXerces::XML::Document
|
|
32
|
+
Node = RXerces::XML::Node
|
|
33
|
+
Element = RXerces::XML::Element
|
|
34
|
+
Text = RXerces::XML::Text
|
|
35
|
+
NodeSet = RXerces::XML::NodeSet
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Top-level parse method for compatibility
|
|
39
|
+
# @param string [String] XML string to parse
|
|
40
|
+
# @return [RXerces::XML::Document] parsed document
|
|
41
|
+
def self.XML(string)
|
|
42
|
+
RXerces::XML::Document.parse(string)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
class << self
|
|
46
|
+
alias_method :parse, :XML
|
|
47
|
+
end
|
|
48
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: rxerces
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- RXerces Contributors
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: rake
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '13.0'
|
|
19
|
+
type: :development
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '13.0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: rake-compiler
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '1.2'
|
|
33
|
+
type: :development
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '1.2'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: rspec
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - "~>"
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '3.12'
|
|
47
|
+
type: :development
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - "~>"
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '3.12'
|
|
54
|
+
description: A Ruby XML library with Nokogiri-compatible API, powered by Xerces-C
|
|
55
|
+
instead of libxml2
|
|
56
|
+
email:
|
|
57
|
+
- contributors@example.com
|
|
58
|
+
executables: []
|
|
59
|
+
extensions:
|
|
60
|
+
- ext/rxerces/extconf.rb
|
|
61
|
+
extra_rdoc_files: []
|
|
62
|
+
files:
|
|
63
|
+
- LICENSE
|
|
64
|
+
- README.md
|
|
65
|
+
- ext/rxerces/extconf.rb
|
|
66
|
+
- ext/rxerces/rxerces.cpp
|
|
67
|
+
- ext/rxerces/rxerces.h
|
|
68
|
+
- lib/rxerces.rb
|
|
69
|
+
- lib/rxerces/rxerces.bundle
|
|
70
|
+
- lib/rxerces/version.rb
|
|
71
|
+
homepage: https://github.com/example/rxerces
|
|
72
|
+
licenses:
|
|
73
|
+
- MIT
|
|
74
|
+
metadata: {}
|
|
75
|
+
rdoc_options: []
|
|
76
|
+
require_paths:
|
|
77
|
+
- lib
|
|
78
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - ">="
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: 2.7.0
|
|
83
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
84
|
+
requirements:
|
|
85
|
+
- - ">="
|
|
86
|
+
- !ruby/object:Gem::Version
|
|
87
|
+
version: '0'
|
|
88
|
+
requirements: []
|
|
89
|
+
rubygems_version: 3.6.9
|
|
90
|
+
specification_version: 4
|
|
91
|
+
summary: Nokogiri-compatible XML library using Xerces-C
|
|
92
|
+
test_files: []
|