rxerces 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +21 -0
- data/README.md +29 -1
- data/benchmarks/xpath_validation_cache_benchmark.rb +157 -0
- data/benchmarks/xpath_validation_micro_benchmark.rb +168 -0
- data/e +0 -0
- data/ext/rxerces/rxerces.bundle.dSYM/Contents/Info.plist +20 -0
- data/ext/rxerces/rxerces.bundle.dSYM/Contents/Resources/Relocations/aarch64/rxerces.bundle.yml +5 -0
- data/ext/rxerces/rxerces.cpp +670 -22
- data/lib/rxerces/version.rb +1 -1
- data/lib/rxerces.rb +3 -2
- data/rxerces.gemspec +2 -1
- data/spec/document_spec.rb +184 -17
- data/spec/node_spec.rb +230 -58
- data/spec/nodeset_spec.rb +90 -0
- data/spec/rxerces_shared.rb +1 -1
- data/spec/rxerces_spec.rb +58 -0
- data/spec/schema_spec.rb +28 -1
- data/spec/spec_helper.rb +5 -0
- data/spec/xpath_cache_spec.rb +409 -0
- data/spec/xpath_spec.rb +306 -18
- data/tmp/arm64-darwin24/rxerces/3.4.8/rxerces.bundle.dSYM/Contents/Info.plist +20 -0
- data/tmp/arm64-darwin24/rxerces/3.4.8/rxerces.bundle.dSYM/Contents/Resources/Relocations/aarch64/rxerces.bundle.yml +5 -0
- data.tar.gz.sig +0 -0
- metadata +24 -1
- metadata.gz.sig +0 -0
data/ext/rxerces/rxerces.cpp
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
#include "rxerces.h"
|
|
2
|
+
#include <ruby/encoding.h>
|
|
2
3
|
#include <xercesc/util/PlatformUtils.hpp>
|
|
3
4
|
#include <xercesc/parsers/XercesDOMParser.hpp>
|
|
4
5
|
#include <xercesc/dom/DOM.hpp>
|
|
5
6
|
#include <xercesc/util/XMLString.hpp>
|
|
7
|
+
#include <xercesc/util/XMLUni.hpp>
|
|
6
8
|
#include <xercesc/framework/MemBufInputSource.hpp>
|
|
7
9
|
#include <xercesc/framework/MemBufFormatTarget.hpp>
|
|
8
10
|
#include <xercesc/util/XercesDefs.hpp>
|
|
@@ -10,8 +12,12 @@
|
|
|
10
12
|
#include <xercesc/dom/DOMXPathExpression.hpp>
|
|
11
13
|
#include <xercesc/sax/ErrorHandler.hpp>
|
|
12
14
|
#include <xercesc/sax/SAXParseException.hpp>
|
|
15
|
+
#include <xercesc/sax/SAXException.hpp>
|
|
13
16
|
#include <sstream>
|
|
14
17
|
#include <vector>
|
|
18
|
+
#include <mutex>
|
|
19
|
+
#include <list>
|
|
20
|
+
#include <unordered_map>
|
|
15
21
|
|
|
16
22
|
#ifdef HAVE_XALAN
|
|
17
23
|
#include <xalanc/XPath/XPathEvaluator.hpp>
|
|
@@ -50,6 +56,17 @@ static bool xerces_initialized = false;
|
|
|
50
56
|
#ifdef HAVE_XALAN
|
|
51
57
|
static bool xalan_initialized = false;
|
|
52
58
|
#endif
|
|
59
|
+
static std::mutex init_mutex;
|
|
60
|
+
|
|
61
|
+
// XPath validation cache with LRU eviction
|
|
62
|
+
// Uses a list for LRU ordering (front = most recently used)
|
|
63
|
+
// and a map for O(1) lookup of list iterators
|
|
64
|
+
static std::list<std::string>* xpath_cache_lru_list = nullptr;
|
|
65
|
+
static std::unordered_map<std::string, std::list<std::string>::iterator>* xpath_cache_map = nullptr;
|
|
66
|
+
static std::mutex xpath_cache_mutex;
|
|
67
|
+
static bool cache_xpath_validation = true; // Default: enabled
|
|
68
|
+
static size_t xpath_cache_max_size = 10000; // Max cached expressions
|
|
69
|
+
static size_t xpath_max_length = 10000; // Max XPath expression length
|
|
53
70
|
|
|
54
71
|
// Forward declarations
|
|
55
72
|
static std::string css_to_xpath(const char* css);
|
|
@@ -63,6 +80,12 @@ static void ensure_xerces_initialized() {
|
|
|
63
80
|
return;
|
|
64
81
|
}
|
|
65
82
|
|
|
83
|
+
std::lock_guard<std::mutex> lock(init_mutex);
|
|
84
|
+
|
|
85
|
+
if (xerces_initialized) {
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
|
|
66
89
|
try {
|
|
67
90
|
XMLPlatformUtils::Initialize();
|
|
68
91
|
#ifdef HAVE_XALAN
|
|
@@ -80,6 +103,16 @@ static void ensure_xerces_initialized() {
|
|
|
80
103
|
|
|
81
104
|
// Cleanup function called at exit
|
|
82
105
|
static void cleanup_xerces() {
|
|
106
|
+
// Clean up XPath validation cache (LRU)
|
|
107
|
+
if (xpath_cache_lru_list) {
|
|
108
|
+
delete xpath_cache_lru_list;
|
|
109
|
+
xpath_cache_lru_list = nullptr;
|
|
110
|
+
}
|
|
111
|
+
if (xpath_cache_map) {
|
|
112
|
+
delete xpath_cache_map;
|
|
113
|
+
xpath_cache_map = nullptr;
|
|
114
|
+
}
|
|
115
|
+
|
|
83
116
|
#ifdef HAVE_XALAN
|
|
84
117
|
if (xalan_initialized) {
|
|
85
118
|
XPathEvaluator::terminate();
|
|
@@ -92,6 +125,167 @@ static void cleanup_xerces() {
|
|
|
92
125
|
}
|
|
93
126
|
}
|
|
94
127
|
|
|
128
|
+
// Validate XPath expression to prevent XPath injection attacks
|
|
129
|
+
static void validate_xpath_expression(const char* xpath_str) {
|
|
130
|
+
if (!xpath_str || strlen(xpath_str) == 0) {
|
|
131
|
+
rb_raise(rb_eArgError, "XPath expression cannot be empty");
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
std::string xpath(xpath_str);
|
|
135
|
+
|
|
136
|
+
// Check cache first if caching is enabled (LRU cache)
|
|
137
|
+
if (cache_xpath_validation) {
|
|
138
|
+
std::lock_guard<std::mutex> lock(xpath_cache_mutex);
|
|
139
|
+
if (!xpath_cache_lru_list) {
|
|
140
|
+
xpath_cache_lru_list = new std::list<std::string>();
|
|
141
|
+
}
|
|
142
|
+
if (!xpath_cache_map) {
|
|
143
|
+
xpath_cache_map = new std::unordered_map<std::string, std::list<std::string>::iterator>();
|
|
144
|
+
}
|
|
145
|
+
auto it = xpath_cache_map->find(xpath);
|
|
146
|
+
if (it != xpath_cache_map->end()) {
|
|
147
|
+
// Cache hit: move to front (most recently used)
|
|
148
|
+
xpath_cache_lru_list->splice(xpath_cache_lru_list->begin(), *xpath_cache_lru_list, it->second);
|
|
149
|
+
return; // Already validated
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
size_t len = xpath.length();
|
|
153
|
+
|
|
154
|
+
// Check for excessively long XPath expressions (potential DoS)
|
|
155
|
+
if (xpath_max_length > 0 && len > xpath_max_length) {
|
|
156
|
+
rb_raise(rb_eArgError, "XPath expression is too long (max %zu characters)", xpath_max_length);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Check for dangerous patterns that could indicate XPath injection
|
|
160
|
+
// These patterns are commonly used in XPath injection attacks
|
|
161
|
+
|
|
162
|
+
// 1. Check for unbalanced quotes which could break out of string literals
|
|
163
|
+
int single_quotes = 0;
|
|
164
|
+
int double_quotes = 0;
|
|
165
|
+
bool in_single_quote = false;
|
|
166
|
+
bool in_double_quote = false;
|
|
167
|
+
|
|
168
|
+
for (size_t i = 0; i < len; i++) {
|
|
169
|
+
char c = xpath[i];
|
|
170
|
+
|
|
171
|
+
// Track quote state
|
|
172
|
+
if (c == '\'' && !in_double_quote) {
|
|
173
|
+
in_single_quote = !in_single_quote;
|
|
174
|
+
single_quotes++;
|
|
175
|
+
} else if (c == '"' && !in_single_quote) {
|
|
176
|
+
in_double_quote = !in_double_quote;
|
|
177
|
+
double_quotes++;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Unbalanced quotes are suspicious
|
|
182
|
+
if (single_quotes % 2 != 0 || double_quotes % 2 != 0) {
|
|
183
|
+
rb_raise(rb_eArgError, "XPath expression contains unbalanced quotes");
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// 2. Check for suspicious comment patterns that could be used to bypass validation
|
|
187
|
+
if (xpath.find("(:") != std::string::npos || xpath.find(":)") != std::string::npos) {
|
|
188
|
+
rb_raise(rb_eArgError, "XPath expression contains suspicious comment patterns");
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// 3. Check for null bytes which could truncate validation
|
|
192
|
+
if (xpath.find('\0') != std::string::npos) {
|
|
193
|
+
rb_raise(rb_eArgError, "XPath expression contains null bytes");
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// 4. Check for excessive nesting which could cause stack overflow
|
|
197
|
+
int bracket_depth = 0;
|
|
198
|
+
int paren_depth = 0;
|
|
199
|
+
const int MAX_DEPTH = 100;
|
|
200
|
+
|
|
201
|
+
for (size_t i = 0; i < len; i++) {
|
|
202
|
+
char c = xpath[i];
|
|
203
|
+
|
|
204
|
+
if (c == '[') bracket_depth++;
|
|
205
|
+
else if (c == ']') bracket_depth--;
|
|
206
|
+
else if (c == '(') paren_depth++;
|
|
207
|
+
else if (c == ')') paren_depth--;
|
|
208
|
+
|
|
209
|
+
if (bracket_depth > MAX_DEPTH || paren_depth > MAX_DEPTH) {
|
|
210
|
+
rb_raise(rb_eArgError, "XPath expression has excessive nesting depth");
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if (bracket_depth < 0 || paren_depth < 0) {
|
|
214
|
+
rb_raise(rb_eArgError, "XPath expression has unbalanced brackets or parentheses");
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (bracket_depth != 0 || paren_depth != 0) {
|
|
219
|
+
rb_raise(rb_eArgError, "XPath expression has unbalanced brackets or parentheses");
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// 5. Check for suspicious function calls that could access system functions
|
|
223
|
+
// or perform dangerous operations
|
|
224
|
+
std::vector<std::string> dangerous_patterns = {
|
|
225
|
+
"document(", // Can access external documents
|
|
226
|
+
"doc(", // Can access external documents
|
|
227
|
+
"collection(", // Can access external collections
|
|
228
|
+
"unparsed-text(", // Can read arbitrary files
|
|
229
|
+
"system-property(", // Can leak system information
|
|
230
|
+
"environment-variable(", // Can leak environment variables
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
for (const auto& pattern : dangerous_patterns) {
|
|
234
|
+
if (xpath.find(pattern) != std::string::npos) {
|
|
235
|
+
rb_raise(rb_eArgError, "XPath expression contains potentially dangerous function: %s", pattern.c_str());
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// 6. Check for encoded characters that could bypass validation
|
|
240
|
+
// Use specific patterns to avoid false positives (e.g., "Q&A" in text)
|
|
241
|
+
if (xpath.find("&#") != std::string::npos || // Numeric character reference (<)
|
|
242
|
+
xpath.find("&#x") != std::string::npos || // Hex character reference (<)
|
|
243
|
+
xpath.find("&#") != std::string::npos) { // Encoded entity reference
|
|
244
|
+
rb_raise(rb_eArgError, "XPath expression contains encoded characters");
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// 7. Detect potential boolean-based blind XPath injection patterns
|
|
248
|
+
// These patterns use 'or' with always-true conditions
|
|
249
|
+
std::vector<std::string> injection_patterns = {
|
|
250
|
+
"or 1=1",
|
|
251
|
+
"or '1'='1'",
|
|
252
|
+
"or \"1\"=\"1\"",
|
|
253
|
+
"or true()",
|
|
254
|
+
"and 1=0",
|
|
255
|
+
"and false()",
|
|
256
|
+
"or 'a'='a'",
|
|
257
|
+
"or \"a\"=\"a\"",
|
|
258
|
+
};
|
|
259
|
+
|
|
260
|
+
// Convert to lowercase for case-insensitive comparison
|
|
261
|
+
std::string xpath_lower = xpath;
|
|
262
|
+
std::transform(xpath_lower.begin(), xpath_lower.end(), xpath_lower.begin(), ::tolower);
|
|
263
|
+
|
|
264
|
+
for (const auto& pattern : injection_patterns) {
|
|
265
|
+
if (xpath_lower.find(pattern) != std::string::npos) {
|
|
266
|
+
rb_raise(rb_eArgError, "XPath expression contains suspicious injection pattern");
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Add to cache if caching is enabled (LRU eviction)
|
|
271
|
+
if (cache_xpath_validation) {
|
|
272
|
+
std::lock_guard<std::mutex> lock(xpath_cache_mutex);
|
|
273
|
+
if (xpath_cache_lru_list && xpath_cache_map) {
|
|
274
|
+
// If cache is full, evict least recently used (back of list)
|
|
275
|
+
if (xpath_cache_max_size > 0 && xpath_cache_map->size() >= xpath_cache_max_size) {
|
|
276
|
+
std::string& lru = xpath_cache_lru_list->back();
|
|
277
|
+
xpath_cache_map->erase(lru);
|
|
278
|
+
xpath_cache_lru_list->pop_back();
|
|
279
|
+
}
|
|
280
|
+
// Add new entry to front (most recently used)
|
|
281
|
+
if (xpath_cache_max_size > 0) {
|
|
282
|
+
xpath_cache_lru_list->push_front(xpath);
|
|
283
|
+
(*xpath_cache_map)[xpath] = xpath_cache_lru_list->begin();
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
95
289
|
// Helper class to manage XMLCh strings
|
|
96
290
|
class XStr {
|
|
97
291
|
public:
|
|
@@ -353,35 +547,110 @@ static VALUE wrap_node(DOMNode* node, VALUE doc_ref) {
|
|
|
353
547
|
return Qnil;
|
|
354
548
|
}
|
|
355
549
|
|
|
356
|
-
|
|
357
|
-
wrapper->node = node;
|
|
358
|
-
wrapper->doc_ref = doc_ref;
|
|
359
|
-
|
|
360
|
-
VALUE rb_node;
|
|
361
|
-
|
|
550
|
+
VALUE rb_class;
|
|
362
551
|
switch (node->getNodeType()) {
|
|
363
552
|
case DOMNode::ELEMENT_NODE:
|
|
364
|
-
|
|
553
|
+
rb_class = rb_cElement;
|
|
365
554
|
break;
|
|
366
555
|
case DOMNode::TEXT_NODE:
|
|
367
|
-
|
|
556
|
+
rb_class = rb_cText;
|
|
368
557
|
break;
|
|
369
558
|
default:
|
|
370
|
-
|
|
559
|
+
rb_class = rb_cNode;
|
|
371
560
|
break;
|
|
372
561
|
}
|
|
373
562
|
|
|
563
|
+
VALUE rb_node = TypedData_Wrap_Struct(rb_class, &node_type, NULL);
|
|
564
|
+
NodeWrapper* wrapper = ALLOC(NodeWrapper);
|
|
565
|
+
wrapper->node = node;
|
|
566
|
+
wrapper->doc_ref = doc_ref;
|
|
567
|
+
DATA_PTR(rb_node) = wrapper;
|
|
568
|
+
|
|
374
569
|
return rb_node;
|
|
375
570
|
}
|
|
376
571
|
|
|
377
|
-
// RXerces::XML::Document.parse(string)
|
|
378
|
-
|
|
572
|
+
// RXerces::XML::Document.parse(string, options = {})
|
|
573
|
+
// Validate options hash for document_parse - only allow known keys
|
|
574
|
+
static void validate_parse_options(VALUE options) {
|
|
575
|
+
if (NIL_P(options)) {
|
|
576
|
+
return;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
Check_Type(options, T_HASH);
|
|
580
|
+
|
|
581
|
+
// Define allowed option keys
|
|
582
|
+
std::vector<const char*> allowed_keys = {
|
|
583
|
+
"allow_external_entities"
|
|
584
|
+
};
|
|
585
|
+
|
|
586
|
+
// Get all keys from the provided options hash
|
|
587
|
+
VALUE keys = rb_funcall(options, rb_intern("keys"), 0);
|
|
588
|
+
long keys_len = RARRAY_LEN(keys);
|
|
589
|
+
|
|
590
|
+
// Check each key against the allowed list
|
|
591
|
+
for (long i = 0; i < keys_len; i++) {
|
|
592
|
+
VALUE key = rb_ary_entry(keys, i);
|
|
593
|
+
|
|
594
|
+
// Convert symbol or string key to string for comparison
|
|
595
|
+
VALUE key_str;
|
|
596
|
+
if (TYPE(key) == T_SYMBOL) {
|
|
597
|
+
key_str = rb_sym_to_s(key);
|
|
598
|
+
} else if (TYPE(key) == T_STRING) {
|
|
599
|
+
key_str = key;
|
|
600
|
+
} else {
|
|
601
|
+
rb_raise(rb_eArgError, "Option keys must be symbols or strings");
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
const char* key_cstr = StringValueCStr(key_str);
|
|
605
|
+
bool found = false;
|
|
606
|
+
|
|
607
|
+
for (const auto& allowed : allowed_keys) {
|
|
608
|
+
if (strcmp(key_cstr, allowed) == 0) {
|
|
609
|
+
found = true;
|
|
610
|
+
break;
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
if (!found) {
|
|
615
|
+
rb_raise(rb_eArgError, "Unknown option: %s. Allowed options are: allow_external_entities", key_cstr);
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
static VALUE document_parse(int argc, VALUE* argv, VALUE klass) {
|
|
621
|
+
VALUE str, options;
|
|
622
|
+
rb_scan_args(argc, argv, "11", &str, &options);
|
|
623
|
+
|
|
379
624
|
ensure_xerces_initialized();
|
|
380
625
|
|
|
381
626
|
Check_Type(str, T_STRING);
|
|
382
627
|
const char* xml_str = StringValueCStr(str);
|
|
383
628
|
|
|
629
|
+
// Validate options hash before processing
|
|
630
|
+
validate_parse_options(options);
|
|
631
|
+
|
|
384
632
|
XercesDOMParser* parser = new XercesDOMParser();
|
|
633
|
+
|
|
634
|
+
// Check if external entities should be allowed (default: false for security)
|
|
635
|
+
bool allow_external = false;
|
|
636
|
+
if (!NIL_P(options)) {
|
|
637
|
+
VALUE allow_key = rb_intern("allow_external_entities");
|
|
638
|
+
VALUE allow_val = rb_hash_aref(options, ID2SYM(allow_key));
|
|
639
|
+
if (RTEST(allow_val)) {
|
|
640
|
+
allow_external = true;
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
if (allow_external) {
|
|
645
|
+
// Allow external entities (less secure)
|
|
646
|
+
parser->setLoadExternalDTD(true);
|
|
647
|
+
parser->setDisableDefaultEntityResolution(false);
|
|
648
|
+
} else {
|
|
649
|
+
// Security: Disable external entity processing to prevent XXE attacks
|
|
650
|
+
parser->setLoadExternalDTD(false);
|
|
651
|
+
parser->setDisableDefaultEntityResolution(true);
|
|
652
|
+
}
|
|
653
|
+
|
|
385
654
|
parser->setValidationScheme(XercesDOMParser::Val_Never);
|
|
386
655
|
parser->setDoNamespaces(true);
|
|
387
656
|
parser->setDoSchema(false);
|
|
@@ -485,8 +754,16 @@ static VALUE document_to_s(VALUE self) {
|
|
|
485
754
|
serializer->release();
|
|
486
755
|
|
|
487
756
|
return result;
|
|
757
|
+
} catch (const DOMException& e) {
|
|
758
|
+
CharStr message(e.getMessage());
|
|
759
|
+
rb_raise(rb_eRuntimeError, "Failed to serialize document: %s", message.localForm());
|
|
760
|
+
} catch (const XMLException& e) {
|
|
761
|
+
CharStr message(e.getMessage());
|
|
762
|
+
rb_raise(rb_eRuntimeError, "Failed to serialize document (XMLException): %s", message.localForm());
|
|
763
|
+
} catch (const std::exception& e) {
|
|
764
|
+
rb_raise(rb_eRuntimeError, "Failed to serialize document (std::exception): %s", e.what());
|
|
488
765
|
} catch (...) {
|
|
489
|
-
rb_raise(rb_eRuntimeError, "Failed to serialize document");
|
|
766
|
+
rb_raise(rb_eRuntimeError, "Failed to serialize document (unknown exception type)");
|
|
490
767
|
}
|
|
491
768
|
|
|
492
769
|
return Qnil;
|
|
@@ -608,9 +885,103 @@ static VALUE document_create_element(VALUE self, VALUE name) {
|
|
|
608
885
|
return Qnil;
|
|
609
886
|
}
|
|
610
887
|
|
|
888
|
+
// document.children - returns all children (elements, text, comments, etc.)
|
|
889
|
+
static VALUE document_children(VALUE self) {
|
|
890
|
+
DocumentWrapper* wrapper;
|
|
891
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
892
|
+
|
|
893
|
+
VALUE children = rb_ary_new();
|
|
894
|
+
|
|
895
|
+
if (!wrapper->doc) {
|
|
896
|
+
return children;
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
DOMNodeList* child_nodes = wrapper->doc->getChildNodes();
|
|
900
|
+
XMLSize_t count = child_nodes->getLength();
|
|
901
|
+
|
|
902
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
903
|
+
DOMNode* child = child_nodes->item(i);
|
|
904
|
+
rb_ary_push(children, wrap_node(child, self));
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
return children;
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
// document.element_children - returns only element children (no text nodes, comments, etc.)
|
|
911
|
+
static VALUE document_element_children(VALUE self) {
|
|
912
|
+
DocumentWrapper* wrapper;
|
|
913
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
914
|
+
|
|
915
|
+
VALUE children = rb_ary_new();
|
|
916
|
+
|
|
917
|
+
if (!wrapper->doc) {
|
|
918
|
+
return children;
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
DOMNodeList* child_nodes = wrapper->doc->getChildNodes();
|
|
922
|
+
XMLSize_t count = child_nodes->getLength();
|
|
923
|
+
|
|
924
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
925
|
+
DOMNode* child = child_nodes->item(i);
|
|
926
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
927
|
+
rb_ary_push(children, wrap_node(child, self));
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
return children;
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
// document.first_element_child - returns first element child
|
|
935
|
+
static VALUE document_first_element_child(VALUE self) {
|
|
936
|
+
DocumentWrapper* wrapper;
|
|
937
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
938
|
+
|
|
939
|
+
if (!wrapper->doc) {
|
|
940
|
+
return Qnil;
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
DOMNodeList* child_nodes = wrapper->doc->getChildNodes();
|
|
944
|
+
XMLSize_t count = child_nodes->getLength();
|
|
945
|
+
|
|
946
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
947
|
+
DOMNode* child = child_nodes->item(i);
|
|
948
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
949
|
+
return wrap_node(child, self);
|
|
950
|
+
}
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
return Qnil;
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
// document.last_element_child - returns last element child
|
|
957
|
+
static VALUE document_last_element_child(VALUE self) {
|
|
958
|
+
DocumentWrapper* wrapper;
|
|
959
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
960
|
+
|
|
961
|
+
if (!wrapper->doc) {
|
|
962
|
+
return Qnil;
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
DOMNodeList* child_nodes = wrapper->doc->getChildNodes();
|
|
966
|
+
XMLSize_t count = child_nodes->getLength();
|
|
967
|
+
|
|
968
|
+
// Search backwards for last element
|
|
969
|
+
for (XMLSize_t i = count; i > 0; i--) {
|
|
970
|
+
DOMNode* child = child_nodes->item(i - 1);
|
|
971
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
972
|
+
return wrap_node(child, self);
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
return Qnil;
|
|
977
|
+
}
|
|
978
|
+
|
|
611
979
|
#ifdef HAVE_XALAN
|
|
612
980
|
// Helper function to execute XPath using Xalan for full XPath 1.0 support
|
|
613
981
|
static VALUE execute_xpath_with_xalan(DOMNode* context_node, const char* xpath_str, VALUE doc_ref) {
|
|
982
|
+
// Validate XPath expression before execution
|
|
983
|
+
validate_xpath_expression(xpath_str);
|
|
984
|
+
|
|
614
985
|
ensure_xerces_initialized();
|
|
615
986
|
|
|
616
987
|
try {
|
|
@@ -718,6 +1089,9 @@ static VALUE document_xpath(VALUE self, VALUE path) {
|
|
|
718
1089
|
Check_Type(path, T_STRING);
|
|
719
1090
|
const char* xpath_str = StringValueCStr(path);
|
|
720
1091
|
|
|
1092
|
+
// Validate XPath expression before execution
|
|
1093
|
+
validate_xpath_expression(xpath_str);
|
|
1094
|
+
|
|
721
1095
|
#ifdef HAVE_XALAN
|
|
722
1096
|
// Use Xalan for full XPath 1.0 support
|
|
723
1097
|
DOMElement* root = doc_wrapper->doc->getDocumentElement();
|
|
@@ -782,6 +1156,19 @@ static VALUE document_xpath(VALUE self, VALUE path) {
|
|
|
782
1156
|
#endif
|
|
783
1157
|
}
|
|
784
1158
|
|
|
1159
|
+
// document.at_xpath(path) - returns first matching node or nil
|
|
1160
|
+
static VALUE document_at_xpath(VALUE self, VALUE path) {
|
|
1161
|
+
VALUE nodeset = document_xpath(self, path);
|
|
1162
|
+
NodeSetWrapper* wrapper;
|
|
1163
|
+
TypedData_Get_Struct(nodeset, NodeSetWrapper, &nodeset_type, wrapper);
|
|
1164
|
+
|
|
1165
|
+
if (RARRAY_LEN(wrapper->nodes_array) == 0) {
|
|
1166
|
+
return Qnil;
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
return rb_ary_entry(wrapper->nodes_array, 0);
|
|
1170
|
+
}
|
|
1171
|
+
|
|
785
1172
|
// document.css(selector) - Convert CSS to XPath and execute
|
|
786
1173
|
static VALUE document_css(VALUE self, VALUE selector) {
|
|
787
1174
|
Check_Type(selector, T_STRING);
|
|
@@ -1104,6 +1491,65 @@ static VALUE node_element_children(VALUE self) {
|
|
|
1104
1491
|
return children;
|
|
1105
1492
|
}
|
|
1106
1493
|
|
|
1494
|
+
// node.first_element_child - returns first element child
|
|
1495
|
+
static VALUE node_first_element_child(VALUE self) {
|
|
1496
|
+
NodeWrapper* wrapper;
|
|
1497
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
1498
|
+
|
|
1499
|
+
if (!wrapper->node) {
|
|
1500
|
+
return Qnil;
|
|
1501
|
+
}
|
|
1502
|
+
|
|
1503
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1504
|
+
DOMNodeList* child_nodes = wrapper->node->getChildNodes();
|
|
1505
|
+
XMLSize_t count = child_nodes->getLength();
|
|
1506
|
+
|
|
1507
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
1508
|
+
DOMNode* child = child_nodes->item(i);
|
|
1509
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
1510
|
+
return wrap_node(child, doc_ref);
|
|
1511
|
+
}
|
|
1512
|
+
}
|
|
1513
|
+
|
|
1514
|
+
return Qnil;
|
|
1515
|
+
}
|
|
1516
|
+
|
|
1517
|
+
// node.last_element_child - returns last element child
|
|
1518
|
+
static VALUE node_last_element_child(VALUE self) {
|
|
1519
|
+
NodeWrapper* wrapper;
|
|
1520
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
1521
|
+
|
|
1522
|
+
if (!wrapper->node) {
|
|
1523
|
+
return Qnil;
|
|
1524
|
+
}
|
|
1525
|
+
|
|
1526
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1527
|
+
DOMNodeList* child_nodes = wrapper->node->getChildNodes();
|
|
1528
|
+
XMLSize_t count = child_nodes->getLength();
|
|
1529
|
+
|
|
1530
|
+
// Search backwards for last element
|
|
1531
|
+
for (XMLSize_t i = count; i > 0; i--) {
|
|
1532
|
+
DOMNode* child = child_nodes->item(i - 1);
|
|
1533
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
1534
|
+
return wrap_node(child, doc_ref);
|
|
1535
|
+
}
|
|
1536
|
+
}
|
|
1537
|
+
|
|
1538
|
+
return Qnil;
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1541
|
+
// node.document - returns the document that owns this node
|
|
1542
|
+
static VALUE node_document(VALUE self) {
|
|
1543
|
+
NodeWrapper* wrapper;
|
|
1544
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
1545
|
+
|
|
1546
|
+
if (!wrapper->node) {
|
|
1547
|
+
return Qnil;
|
|
1548
|
+
}
|
|
1549
|
+
|
|
1550
|
+
return wrapper->doc_ref;
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1107
1553
|
// node.parent
|
|
1108
1554
|
static VALUE node_parent(VALUE self) {
|
|
1109
1555
|
NodeWrapper* wrapper;
|
|
@@ -1231,6 +1677,37 @@ static VALUE node_attributes(VALUE self) {
|
|
|
1231
1677
|
return hash;
|
|
1232
1678
|
}
|
|
1233
1679
|
|
|
1680
|
+
// node.attribute_nodes - returns array of attribute nodes (only for element nodes)
|
|
1681
|
+
static VALUE node_attribute_nodes(VALUE self) {
|
|
1682
|
+
NodeWrapper* wrapper;
|
|
1683
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
1684
|
+
|
|
1685
|
+
VALUE nodes_array = rb_ary_new();
|
|
1686
|
+
|
|
1687
|
+
if (!wrapper->node || wrapper->node->getNodeType() != DOMNode::ELEMENT_NODE) {
|
|
1688
|
+
return nodes_array;
|
|
1689
|
+
}
|
|
1690
|
+
|
|
1691
|
+
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
1692
|
+
DOMNamedNodeMap* attributes = element->getAttributes();
|
|
1693
|
+
|
|
1694
|
+
if (!attributes) {
|
|
1695
|
+
return nodes_array;
|
|
1696
|
+
}
|
|
1697
|
+
|
|
1698
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1699
|
+
XMLSize_t length = attributes->getLength();
|
|
1700
|
+
|
|
1701
|
+
for (XMLSize_t i = 0; i < length; i++) {
|
|
1702
|
+
DOMNode* attr = attributes->item(i);
|
|
1703
|
+
if (attr) {
|
|
1704
|
+
rb_ary_push(nodes_array, wrap_node(attr, doc_ref));
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
|
|
1708
|
+
return nodes_array;
|
|
1709
|
+
}
|
|
1710
|
+
|
|
1234
1711
|
// node.next_sibling
|
|
1235
1712
|
static VALUE node_next_sibling(VALUE self) {
|
|
1236
1713
|
NodeWrapper* wrapper;
|
|
@@ -1330,7 +1807,7 @@ static VALUE node_add_child(VALUE self, VALUE child) {
|
|
|
1330
1807
|
}
|
|
1331
1808
|
|
|
1332
1809
|
DOMNode* child_node = NULL;
|
|
1333
|
-
|
|
1810
|
+
VALUE doc_ref = wrapper->doc_ref; // Keep track of the Ruby document reference
|
|
1334
1811
|
|
|
1335
1812
|
// Check if child is a string or a node
|
|
1336
1813
|
if (TYPE(child) == T_STRING) {
|
|
@@ -1344,13 +1821,27 @@ static VALUE node_add_child(VALUE self, VALUE child) {
|
|
|
1344
1821
|
NodeWrapper* child_wrapper;
|
|
1345
1822
|
if (rb_obj_is_kind_of(child, rb_cNode)) {
|
|
1346
1823
|
TypedData_Get_Struct(child, NodeWrapper, &node_type, child_wrapper);
|
|
1347
|
-
|
|
1824
|
+
DOMNode* original_child = child_wrapper->node;
|
|
1348
1825
|
|
|
1349
1826
|
// Check if child belongs to a different document
|
|
1350
|
-
DOMDocument* child_doc =
|
|
1827
|
+
DOMDocument* child_doc = original_child->getOwnerDocument();
|
|
1351
1828
|
if (child_doc && child_doc != doc) {
|
|
1352
|
-
|
|
1353
|
-
|
|
1829
|
+
// Automatically import the node from the other document
|
|
1830
|
+
// The second parameter 'true' means deep copy (include all descendants)
|
|
1831
|
+
try {
|
|
1832
|
+
child_node = doc->importNode(original_child, true);
|
|
1833
|
+
|
|
1834
|
+
// Update the child wrapper to point to the imported node
|
|
1835
|
+
// and the new document reference
|
|
1836
|
+
child_wrapper->node = child_node;
|
|
1837
|
+
child_wrapper->doc_ref = doc_ref;
|
|
1838
|
+
} catch (const DOMException& e) {
|
|
1839
|
+
CharStr message(e.getMessage());
|
|
1840
|
+
rb_raise(rb_eRuntimeError, "Failed to import node from different document: %s",
|
|
1841
|
+
message.localForm());
|
|
1842
|
+
}
|
|
1843
|
+
} else {
|
|
1844
|
+
child_node = original_child;
|
|
1354
1845
|
}
|
|
1355
1846
|
} else {
|
|
1356
1847
|
rb_raise(rb_eTypeError, "Argument must be a String or Node");
|
|
@@ -1589,6 +2080,9 @@ static VALUE node_xpath(VALUE self, VALUE path) {
|
|
|
1589
2080
|
const char* xpath_str = StringValueCStr(path);
|
|
1590
2081
|
VALUE doc_ref = node_wrapper->doc_ref;
|
|
1591
2082
|
|
|
2083
|
+
// Validate XPath expression before execution
|
|
2084
|
+
validate_xpath_expression(xpath_str);
|
|
2085
|
+
|
|
1592
2086
|
#ifdef HAVE_XALAN
|
|
1593
2087
|
// Use Xalan for full XPath 1.0 support
|
|
1594
2088
|
return execute_xpath_with_xalan(node_wrapper->node, xpath_str, doc_ref);
|
|
@@ -1958,6 +2452,31 @@ static VALUE nodeset_text(VALUE self) {
|
|
|
1958
2452
|
}
|
|
1959
2453
|
|
|
1960
2454
|
// nodeset.inspect / nodeset.to_s - human-readable representation
|
|
2455
|
+
// Helper function to safely truncate UTF-8 strings using Ruby's built-in UTF-8 handling
|
|
2456
|
+
// Ruby's rb_str_substr operates on CHARACTER positions, not byte positions
|
|
2457
|
+
static std::string safe_truncate_utf8(const std::string& str, long max_chars) {
|
|
2458
|
+
if (str.empty()) {
|
|
2459
|
+
return str;
|
|
2460
|
+
}
|
|
2461
|
+
|
|
2462
|
+
// Create a Ruby string with UTF-8 encoding
|
|
2463
|
+
VALUE rb_str = rb_enc_str_new(str.c_str(), str.length(), rb_utf8_encoding());
|
|
2464
|
+
|
|
2465
|
+
// Get the character length (not byte length)
|
|
2466
|
+
long char_len = RSTRING_LEN(rb_str);
|
|
2467
|
+
|
|
2468
|
+
if (char_len <= max_chars) {
|
|
2469
|
+
return str;
|
|
2470
|
+
}
|
|
2471
|
+
|
|
2472
|
+
// Use Ruby's rb_str_substr which correctly handles multi-byte characters
|
|
2473
|
+
// Parameters: string, start position (in characters), length (in characters)
|
|
2474
|
+
VALUE truncated = rb_str_substr(rb_str, 0, max_chars);
|
|
2475
|
+
|
|
2476
|
+
// Convert back to C++ string
|
|
2477
|
+
return std::string(RSTRING_PTR(truncated), RSTRING_LEN(truncated));
|
|
2478
|
+
}
|
|
2479
|
+
|
|
1961
2480
|
static VALUE nodeset_inspect(VALUE self) {
|
|
1962
2481
|
NodeSetWrapper* wrapper;
|
|
1963
2482
|
TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
|
|
@@ -2028,7 +2547,7 @@ static VALUE nodeset_inspect(VALUE self) {
|
|
|
2028
2547
|
textStr = textStr.substr(start, end - start + 1);
|
|
2029
2548
|
|
|
2030
2549
|
if (textStr.length() > 30) {
|
|
2031
|
-
textStr = textStr
|
|
2550
|
+
textStr = safe_truncate_utf8(textStr, 27) + "...";
|
|
2032
2551
|
}
|
|
2033
2552
|
|
|
2034
2553
|
result += ">";
|
|
@@ -2056,7 +2575,7 @@ static VALUE nodeset_inspect(VALUE self) {
|
|
|
2056
2575
|
textStr = textStr.substr(start, end - start + 1);
|
|
2057
2576
|
|
|
2058
2577
|
if (textStr.length() > 30) {
|
|
2059
|
-
textStr = textStr
|
|
2578
|
+
textStr = safe_truncate_utf8(textStr, 27) + "...";
|
|
2060
2579
|
}
|
|
2061
2580
|
|
|
2062
2581
|
result += "text(\"";
|
|
@@ -2078,7 +2597,10 @@ static VALUE nodeset_inspect(VALUE self) {
|
|
|
2078
2597
|
}
|
|
2079
2598
|
|
|
2080
2599
|
result += "]>";
|
|
2081
|
-
|
|
2600
|
+
VALUE rb_result = rb_str_new_cstr(result.c_str());
|
|
2601
|
+
// Ensure the string is marked as UTF-8 encoded
|
|
2602
|
+
rb_enc_associate(rb_result, rb_utf8_encoding());
|
|
2603
|
+
return rb_result;
|
|
2082
2604
|
}
|
|
2083
2605
|
|
|
2084
2606
|
// Schema.from_document(schema_doc) or Schema.from_string(xsd_string)
|
|
@@ -2119,6 +2641,18 @@ static VALUE schema_from_document(int argc, VALUE* argv, VALUE klass) {
|
|
|
2119
2641
|
|
|
2120
2642
|
try {
|
|
2121
2643
|
schemaParser->parse(schemaInput);
|
|
2644
|
+
} catch (const XMLException& e) {
|
|
2645
|
+
delete schemaParser;
|
|
2646
|
+
delete wrapper->schemaContent;
|
|
2647
|
+
xfree(wrapper);
|
|
2648
|
+
CharStr message(e.getMessage());
|
|
2649
|
+
rb_raise(rb_eRuntimeError, "Schema parsing failed: %s", message.localForm());
|
|
2650
|
+
} catch (const SAXException& e) {
|
|
2651
|
+
delete schemaParser;
|
|
2652
|
+
delete wrapper->schemaContent;
|
|
2653
|
+
xfree(wrapper);
|
|
2654
|
+
CharStr message(e.getMessage());
|
|
2655
|
+
rb_raise(rb_eRuntimeError, "Schema parsing failed: %s", message.localForm());
|
|
2122
2656
|
} catch (...) {
|
|
2123
2657
|
delete schemaParser;
|
|
2124
2658
|
delete wrapper->schemaContent;
|
|
@@ -2200,6 +2734,12 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
2200
2734
|
validator->loadGrammar(schemaSource, Grammar::SchemaGrammarType, true);
|
|
2201
2735
|
validator->setExternalNoNamespaceSchemaLocation("schema.xsd");
|
|
2202
2736
|
validator->useCachedGrammarInParse(true);
|
|
2737
|
+
} catch (const XMLException& e) {
|
|
2738
|
+
CharStr message(e.getMessage());
|
|
2739
|
+
errorHandler.errors.push_back(std::string("Warning: Schema grammar could not be loaded: ") + message.localForm());
|
|
2740
|
+
} catch (const SAXException& e) {
|
|
2741
|
+
CharStr message(e.getMessage());
|
|
2742
|
+
errorHandler.errors.push_back(std::string("Warning: Schema grammar could not be loaded: ") + message.localForm());
|
|
2203
2743
|
} catch (...) {
|
|
2204
2744
|
// If grammar loading fails, just note it
|
|
2205
2745
|
errorHandler.errors.push_back("Warning: Schema grammar could not be loaded");
|
|
@@ -2251,25 +2791,129 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
2251
2791
|
}
|
|
2252
2792
|
|
|
2253
2793
|
return Qnil;
|
|
2254
|
-
}
|
|
2794
|
+
}
|
|
2795
|
+
|
|
2796
|
+
// RXerces.cache_xpath_validation? - check if XPath validation caching is enabled
|
|
2797
|
+
static VALUE rxerces_cache_xpath_validation_p(VALUE self) {
|
|
2798
|
+
return cache_xpath_validation ? Qtrue : Qfalse;
|
|
2799
|
+
}
|
|
2800
|
+
|
|
2801
|
+
// RXerces.cache_xpath_validation = bool - enable/disable XPath validation caching
|
|
2802
|
+
static VALUE rxerces_set_cache_xpath_validation(VALUE self, VALUE val) {
|
|
2803
|
+
cache_xpath_validation = RTEST(val);
|
|
2804
|
+
return val;
|
|
2805
|
+
}
|
|
2806
|
+
|
|
2807
|
+
// RXerces.clear_xpath_validation_cache - clear the XPath validation cache
|
|
2808
|
+
static VALUE rxerces_clear_xpath_validation_cache(VALUE self) {
|
|
2809
|
+
std::lock_guard<std::mutex> lock(xpath_cache_mutex);
|
|
2810
|
+
if (xpath_cache_lru_list) {
|
|
2811
|
+
xpath_cache_lru_list->clear();
|
|
2812
|
+
}
|
|
2813
|
+
if (xpath_cache_map) {
|
|
2814
|
+
xpath_cache_map->clear();
|
|
2815
|
+
}
|
|
2816
|
+
return Qnil;
|
|
2817
|
+
}
|
|
2818
|
+
|
|
2819
|
+
// RXerces.xpath_validation_cache_size - return number of cached expressions
|
|
2820
|
+
static VALUE rxerces_xpath_validation_cache_size(VALUE self) {
|
|
2821
|
+
std::lock_guard<std::mutex> lock(xpath_cache_mutex);
|
|
2822
|
+
if (!xpath_cache_map) {
|
|
2823
|
+
return LONG2NUM(0);
|
|
2824
|
+
}
|
|
2825
|
+
return LONG2NUM((long)xpath_cache_map->size());
|
|
2826
|
+
}
|
|
2827
|
+
|
|
2828
|
+
// RXerces.xpath_validation_cache_max_size - get max cache size
|
|
2829
|
+
static VALUE rxerces_xpath_validation_cache_max_size(VALUE self) {
|
|
2830
|
+
return LONG2NUM((long)xpath_cache_max_size);
|
|
2831
|
+
}
|
|
2832
|
+
|
|
2833
|
+
// RXerces.xpath_validation_cache_max_size = n - set max cache size
|
|
2834
|
+
static VALUE rxerces_set_xpath_validation_cache_max_size(VALUE self, VALUE val) {
|
|
2835
|
+
// Validate input: must be a non-negative integer
|
|
2836
|
+
if (!RB_INTEGER_TYPE_P(val)) {
|
|
2837
|
+
rb_raise(rb_eTypeError, "xpath_validation_cache_max_size must be an Integer");
|
|
2838
|
+
}
|
|
2839
|
+
|
|
2840
|
+
long size = NUM2LONG(val);
|
|
2841
|
+
if (size < 0) {
|
|
2842
|
+
rb_raise(rb_eArgError, "xpath_validation_cache_max_size must be non-negative");
|
|
2843
|
+
}
|
|
2844
|
+
|
|
2845
|
+
xpath_cache_max_size = (size_t)size;
|
|
2846
|
+
return val;
|
|
2847
|
+
}
|
|
2848
|
+
|
|
2849
|
+
// RXerces.xalan_enabled? - check if Xalan is available
|
|
2850
|
+
static VALUE rxerces_xalan_enabled_p(VALUE self) {
|
|
2851
|
+
#ifdef HAVE_XALAN
|
|
2852
|
+
return Qtrue;
|
|
2853
|
+
#else
|
|
2854
|
+
return Qfalse;
|
|
2855
|
+
#endif
|
|
2856
|
+
}
|
|
2857
|
+
|
|
2858
|
+
// RXerces.xpath_max_length - get max XPath expression length
|
|
2859
|
+
static VALUE rxerces_xpath_max_length(VALUE self) {
|
|
2860
|
+
return LONG2NUM((long)xpath_max_length);
|
|
2861
|
+
}
|
|
2862
|
+
|
|
2863
|
+
// RXerces.xpath_max_length = n - set max XPath expression length (0 = no limit)
|
|
2864
|
+
static VALUE rxerces_set_xpath_max_length(VALUE self, VALUE val) {
|
|
2865
|
+
// Validate input: must be a non-negative integer
|
|
2866
|
+
if (!RB_INTEGER_TYPE_P(val)) {
|
|
2867
|
+
rb_raise(rb_eTypeError, "xpath_max_length must be an Integer");
|
|
2868
|
+
}
|
|
2869
|
+
|
|
2870
|
+
long size = NUM2LONG(val);
|
|
2871
|
+
if (size < 0) {
|
|
2872
|
+
rb_raise(rb_eArgError, "xpath_max_length must be non-negative");
|
|
2873
|
+
}
|
|
2874
|
+
|
|
2875
|
+
xpath_max_length = (size_t)size;
|
|
2876
|
+
return val;
|
|
2877
|
+
}
|
|
2878
|
+
|
|
2879
|
+
extern "C" void Init_rxerces(void) {
|
|
2255
2880
|
rb_mRXerces = rb_define_module("RXerces");
|
|
2881
|
+
|
|
2882
|
+
// Module-level configuration methods for XPath validation caching
|
|
2883
|
+
rb_define_singleton_method(rb_mRXerces, "cache_xpath_validation?", RUBY_METHOD_FUNC(rxerces_cache_xpath_validation_p), 0);
|
|
2884
|
+
rb_define_singleton_method(rb_mRXerces, "cache_xpath_validation=", RUBY_METHOD_FUNC(rxerces_set_cache_xpath_validation), 1);
|
|
2885
|
+
rb_define_singleton_method(rb_mRXerces, "clear_xpath_validation_cache", RUBY_METHOD_FUNC(rxerces_clear_xpath_validation_cache), 0);
|
|
2886
|
+
rb_define_singleton_method(rb_mRXerces, "xpath_validation_cache_size", RUBY_METHOD_FUNC(rxerces_xpath_validation_cache_size), 0);
|
|
2887
|
+
rb_define_singleton_method(rb_mRXerces, "xpath_validation_cache_max_size", RUBY_METHOD_FUNC(rxerces_xpath_validation_cache_max_size), 0);
|
|
2888
|
+
rb_define_singleton_method(rb_mRXerces, "xpath_validation_cache_max_size=", RUBY_METHOD_FUNC(rxerces_set_xpath_validation_cache_max_size), 1);
|
|
2889
|
+
rb_define_singleton_method(rb_mRXerces, "xpath_max_length", RUBY_METHOD_FUNC(rxerces_xpath_max_length), 0);
|
|
2890
|
+
rb_define_singleton_method(rb_mRXerces, "xpath_max_length=", RUBY_METHOD_FUNC(rxerces_set_xpath_max_length), 1);
|
|
2891
|
+
rb_define_singleton_method(rb_mRXerces, "xalan_enabled?", RUBY_METHOD_FUNC(rxerces_xalan_enabled_p), 0);
|
|
2892
|
+
|
|
2256
2893
|
rb_mXML = rb_define_module_under(rb_mRXerces, "XML");
|
|
2257
2894
|
|
|
2258
2895
|
rb_cDocument = rb_define_class_under(rb_mXML, "Document", rb_cObject);
|
|
2259
2896
|
rb_undef_alloc_func(rb_cDocument);
|
|
2260
|
-
rb_define_singleton_method(rb_cDocument, "parse", RUBY_METHOD_FUNC(document_parse), 1);
|
|
2897
|
+
rb_define_singleton_method(rb_cDocument, "parse", RUBY_METHOD_FUNC(document_parse), -1);
|
|
2261
2898
|
rb_define_method(rb_cDocument, "root", RUBY_METHOD_FUNC(document_root), 0);
|
|
2262
2899
|
rb_define_method(rb_cDocument, "errors", RUBY_METHOD_FUNC(document_errors), 0);
|
|
2263
2900
|
rb_define_method(rb_cDocument, "to_s", RUBY_METHOD_FUNC(document_to_s), 0);
|
|
2264
2901
|
rb_define_alias(rb_cDocument, "to_xml", "to_s");
|
|
2265
2902
|
rb_define_method(rb_cDocument, "inspect", RUBY_METHOD_FUNC(document_inspect), 0);
|
|
2266
2903
|
rb_define_method(rb_cDocument, "xpath", RUBY_METHOD_FUNC(document_xpath), 1);
|
|
2904
|
+
rb_define_method(rb_cDocument, "at_xpath", RUBY_METHOD_FUNC(document_at_xpath), 1);
|
|
2905
|
+
rb_define_alias(rb_cDocument, "at", "at_xpath");
|
|
2267
2906
|
rb_define_method(rb_cDocument, "css", RUBY_METHOD_FUNC(document_css), 1);
|
|
2268
2907
|
rb_define_method(rb_cDocument, "at_css", RUBY_METHOD_FUNC(document_at_css), 1);
|
|
2269
2908
|
rb_define_method(rb_cDocument, "encoding", RUBY_METHOD_FUNC(document_encoding), 0);
|
|
2270
2909
|
rb_define_method(rb_cDocument, "text", RUBY_METHOD_FUNC(document_text), 0);
|
|
2271
2910
|
rb_define_alias(rb_cDocument, "content", "text");
|
|
2272
2911
|
rb_define_method(rb_cDocument, "create_element", RUBY_METHOD_FUNC(document_create_element), 1);
|
|
2912
|
+
rb_define_method(rb_cDocument, "children", RUBY_METHOD_FUNC(document_children), 0);
|
|
2913
|
+
rb_define_method(rb_cDocument, "element_children", RUBY_METHOD_FUNC(document_element_children), 0);
|
|
2914
|
+
rb_define_alias(rb_cDocument, "elements", "element_children");
|
|
2915
|
+
rb_define_method(rb_cDocument, "first_element_child", RUBY_METHOD_FUNC(document_first_element_child), 0);
|
|
2916
|
+
rb_define_method(rb_cDocument, "last_element_child", RUBY_METHOD_FUNC(document_last_element_child), 0);
|
|
2273
2917
|
|
|
2274
2918
|
rb_cNode = rb_define_class_under(rb_mXML, "Node", rb_cObject);
|
|
2275
2919
|
rb_undef_alloc_func(rb_cNode);
|
|
@@ -2288,9 +2932,13 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
2288
2932
|
rb_define_method(rb_cNode, "children", RUBY_METHOD_FUNC(node_children), 0);
|
|
2289
2933
|
rb_define_method(rb_cNode, "element_children", RUBY_METHOD_FUNC(node_element_children), 0);
|
|
2290
2934
|
rb_define_alias(rb_cNode, "elements", "element_children");
|
|
2935
|
+
rb_define_method(rb_cNode, "first_element_child", RUBY_METHOD_FUNC(node_first_element_child), 0);
|
|
2936
|
+
rb_define_method(rb_cNode, "last_element_child", RUBY_METHOD_FUNC(node_last_element_child), 0);
|
|
2937
|
+
rb_define_method(rb_cNode, "document", RUBY_METHOD_FUNC(node_document), 0);
|
|
2291
2938
|
rb_define_method(rb_cNode, "parent", RUBY_METHOD_FUNC(node_parent), 0);
|
|
2292
2939
|
rb_define_method(rb_cNode, "ancestors", RUBY_METHOD_FUNC(node_ancestors), -1);
|
|
2293
2940
|
rb_define_method(rb_cNode, "attributes", RUBY_METHOD_FUNC(node_attributes), 0);
|
|
2941
|
+
rb_define_method(rb_cNode, "attribute_nodes", RUBY_METHOD_FUNC(node_attribute_nodes), 0);
|
|
2294
2942
|
rb_define_method(rb_cNode, "next_sibling", RUBY_METHOD_FUNC(node_next_sibling), 0);
|
|
2295
2943
|
rb_define_method(rb_cNode, "next_element", RUBY_METHOD_FUNC(node_next_element), 0);
|
|
2296
2944
|
rb_define_method(rb_cNode, "previous_sibling", RUBY_METHOD_FUNC(node_previous_sibling), 0);
|