rxerces 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +20 -0
- data/README.md +40 -5
- data/ext/rxerces/extconf.rb +42 -0
- data/ext/rxerces/rxerces.cpp +980 -8
- data/lib/rxerces/nokogiri.rb +26 -0
- data/lib/rxerces/version.rb +1 -1
- data/rxerces.gemspec +5 -2
- data/spec/document_spec.rb +78 -0
- data/spec/node_spec.rb +434 -0
- data/spec/nodeset_spec.rb +59 -0
- data/spec/nokogiri_compatibility_spec.rb +44 -0
- data/spec/rxerces_shared.rb +1 -1
- data/spec/xpath_spec.rb +252 -18
- data.tar.gz.sig +0 -0
- metadata +4 -3
- metadata.gz.sig +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 301821063fa998d1a4522029ef825aaa7bddc45370ed0c1bb904f8b31fd9f036
|
|
4
|
+
data.tar.gz: cabdd59089a44485d847b7ff3637197d2c24e38abb3e5c68423c397a9869e843
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9edd86524fc0ac49a51a9b2658d723a84f85c295d362e8dad500bcd8a47e95c3ad4b41d073f7bdcee0927bd38a6b1710537ee190693ec739de20e65b22e04d8a
|
|
7
|
+
data.tar.gz: 035ed74771b3d3e6125c1567d5452ac773f80a2f9ae220efd063358f7fcc93326f1beb027bf67e45ad042a950a8372b208426f07ab5320c58bb453f40c4d4c19
|
checksums.yaml.gz.sig
CHANGED
|
Binary file
|
data/CHANGES.md
CHANGED
|
@@ -1,3 +1,23 @@
|
|
|
1
|
+
## 0.5.0 - 16-Dec-2025
|
|
2
|
+
* Implemented a real css method. Requires Xalan to be installed.
|
|
3
|
+
* Added text/content methods for most classes.
|
|
4
|
+
* Added a nicer inspect method for most classes.
|
|
5
|
+
* Added an HTML alias for XML, mainly for compatibility with nokogiri,
|
|
6
|
+
but keep in mind that this library parses HTML as XML.
|
|
7
|
+
* Added the Node#ancestors method.
|
|
8
|
+
* Added the Node#has_attribute method.
|
|
9
|
+
* Added first, last, empty? and inner_html methods for Node.
|
|
10
|
+
* Added elements, next_element and previous_element for Node.
|
|
11
|
+
* Added the Document#at_css method.
|
|
12
|
+
|
|
13
|
+
## 0.4.0 - 15-Dec-2025
|
|
14
|
+
* Now uses Xalan if installed for xpath 1.0 compliance.
|
|
15
|
+
* Added Node#search.
|
|
16
|
+
* Added Node#at and Node#at_xpath.
|
|
17
|
+
* Added Document#encoding.
|
|
18
|
+
* Added Node#namespace.
|
|
19
|
+
* Added a placeholder css method for now, it's on the TODO list.
|
|
20
|
+
|
|
1
21
|
## 0.3.0 - 14-Dec-2025
|
|
2
22
|
* Added Node#parent.
|
|
3
23
|
* Added Element#attributes.
|
data/README.md
CHANGED
|
@@ -39,6 +39,28 @@ sudo apt-get install libxerces-c-dev
|
|
|
39
39
|
sudo yum install xerces-c-devel
|
|
40
40
|
```
|
|
41
41
|
|
|
42
|
+
### Xalan
|
|
43
|
+
|
|
44
|
+
For XPath 1.0 compliance, you will need to install the Xalan library. Note
|
|
45
|
+
that this is optional, and rxerces will default to using the Xpath support
|
|
46
|
+
from Xerces, which is more limited.
|
|
47
|
+
|
|
48
|
+
**Ubuntu/Debian:**
|
|
49
|
+
```bash
|
|
50
|
+
sudo apt-get install libxalan-c-dev
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
**Fedora/RHEL:**
|
|
54
|
+
```bash
|
|
55
|
+
sudo yum install xalan-c-devel
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Note that MacOS, contrary to what the documentation currently says, does not
|
|
59
|
+
have a brew package for Xalan. You will either need to use Mac ports or clone
|
|
60
|
+
and build the code manually. I found that it required some tweaking to work:
|
|
61
|
+
|
|
62
|
+
https://github.com/apache/xalan-c/pull/44
|
|
63
|
+
|
|
42
64
|
### Install the Gem
|
|
43
65
|
|
|
44
66
|
Add this line to your application's Gemfile:
|
|
@@ -80,16 +102,27 @@ RXerces provides optional Nokogiri compatibility. Require `rxerces/nokogiri` to
|
|
|
80
102
|
```ruby
|
|
81
103
|
require 'rxerces/nokogiri'
|
|
82
104
|
|
|
83
|
-
#
|
|
105
|
+
# Parse XML with Nokogiri syntax
|
|
84
106
|
doc = Nokogiri.XML('<root><child>text</child></root>')
|
|
85
107
|
puts doc.root.name # => "root"
|
|
86
108
|
|
|
87
|
-
#
|
|
88
|
-
|
|
109
|
+
# Parse HTML with Nokogiri syntax
|
|
110
|
+
html_doc = Nokogiri.HTML('<html><body><h1>Hello</h1></body></html>')
|
|
111
|
+
puts html_doc.root.name # => "html"
|
|
112
|
+
|
|
113
|
+
# Alternative syntax
|
|
114
|
+
xml_doc = Nokogiri::XML.parse('<root>text</root>')
|
|
115
|
+
html_doc = Nokogiri::HTML.parse('<html>...</html>')
|
|
116
|
+
|
|
117
|
+
# Classes are aliased for both XML and HTML
|
|
118
|
+
Nokogiri::XML::Document == RXerces::XML::Document # => true
|
|
119
|
+
Nokogiri::HTML::Document == RXerces::XML::Document # => true
|
|
89
120
|
```
|
|
90
121
|
|
|
91
122
|
**Note:** If you don't need Nokogiri compatibility, just `require 'rxerces'` and use the `RXerces` module directly.
|
|
92
123
|
|
|
124
|
+
**HTML Parsing Note:** Since RXerces uses Xerces-C (an XML parser), `Nokogiri::HTML` parses HTML as XML. This means it won't perform HTML-specific error correction or tag fixing like Nokogiri does with libxml2's HTML parser. For well-formed HTML/XHTML documents, this works fine.
|
|
125
|
+
|
|
93
126
|
### Working with Nodes
|
|
94
127
|
|
|
95
128
|
```ruby
|
|
@@ -156,7 +189,7 @@ puts doc.to_s
|
|
|
156
189
|
|
|
157
190
|
### XPath Queries
|
|
158
191
|
|
|
159
|
-
RXerces supports XPath queries using Xerces-C's XPath implementation:
|
|
192
|
+
RXerces supports XPath queries using Xerces-C's XPath implementation by default:
|
|
160
193
|
|
|
161
194
|
```ruby
|
|
162
195
|
xml = <<-XML
|
|
@@ -206,6 +239,8 @@ Not supported:
|
|
|
206
239
|
|
|
207
240
|
For more complex queries, you can combine basic XPath with Ruby's `select` and `find` methods.
|
|
208
241
|
|
|
242
|
+
For full XPath 1.0 support, install the Xalan library.
|
|
243
|
+
|
|
209
244
|
## API Reference
|
|
210
245
|
|
|
211
246
|
### RXerces Module
|
|
@@ -270,7 +305,7 @@ bundle exec rake
|
|
|
270
305
|
|
|
271
306
|
- Uses Apache Xerces-C 3.x for XML parsing
|
|
272
307
|
- C++ extension compiled with Ruby's native extension API
|
|
273
|
-
- XPath support is basic (full XPath requires
|
|
308
|
+
- XPath support is basic by default (full XPath requires Xalan)
|
|
274
309
|
- Memory management handled by Ruby's GC and Xerces-C's DOM
|
|
275
310
|
|
|
276
311
|
## Differences from Nokogiri
|
data/ext/rxerces/extconf.rb
CHANGED
|
@@ -29,4 +29,46 @@ unless find_header('xercesc/util/PlatformUtils.hpp')
|
|
|
29
29
|
puts "Proceeding with compilation..."
|
|
30
30
|
end
|
|
31
31
|
|
|
32
|
+
# Check for Xalan-C (optional for enhanced XPath support)
|
|
33
|
+
# Use dir_config which handles --with-xalan-dir and --with-xalan-include/lib
|
|
34
|
+
dir_config('xalan')
|
|
35
|
+
|
|
36
|
+
# Also try to auto-detect in common locations
|
|
37
|
+
if RUBY_PLATFORM =~ /darwin/
|
|
38
|
+
homebrew_xalan = `brew --prefix xalan-c 2>/dev/null`.chomp
|
|
39
|
+
if !homebrew_xalan.empty? && File.directory?(homebrew_xalan)
|
|
40
|
+
$INCFLAGS << " -I#{homebrew_xalan}/include" unless $INCFLAGS.include?("-I#{homebrew_xalan}/include")
|
|
41
|
+
$LDFLAGS << " -L#{homebrew_xalan}/lib" unless $LDFLAGS.include?("-L#{homebrew_xalan}/lib")
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Try standard locations
|
|
46
|
+
xalan_found_prefix = nil
|
|
47
|
+
['/usr/local', '/opt/local', '/usr'].each do |prefix|
|
|
48
|
+
if File.directory?("#{prefix}/include/xalanc")
|
|
49
|
+
$INCFLAGS << " -I#{prefix}/include" unless $INCFLAGS.include?("-I#{prefix}/include")
|
|
50
|
+
$LDFLAGS << " -L#{prefix}/lib" unless $LDFLAGS.include?("-L#{prefix}/lib")
|
|
51
|
+
xalan_found_prefix = prefix
|
|
52
|
+
break
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Check for Xalan libraries
|
|
57
|
+
# Note: We skip the header check because Xalan C++ headers require C++ compilation
|
|
58
|
+
# which mkmf's find_header doesn't handle well. The library check is sufficient.
|
|
59
|
+
if have_library('xalanMsg') && have_library('xalan-c')
|
|
60
|
+
$CXXFLAGS << " -DHAVE_XALAN"
|
|
61
|
+
# Add rpath so the dynamic libraries can be found at runtime
|
|
62
|
+
if xalan_found_prefix
|
|
63
|
+
$LDFLAGS << " -Wl,-rpath,#{xalan_found_prefix}/lib"
|
|
64
|
+
end
|
|
65
|
+
puts "Xalan-C found: Full XPath 1.0 support enabled"
|
|
66
|
+
else
|
|
67
|
+
puts "Xalan-C not found: Using Xerces XPath subset"
|
|
68
|
+
puts "For full XPath 1.0 support, install Xalan-C:"
|
|
69
|
+
puts " macOS: Build from source (no homebrew formula available)"
|
|
70
|
+
puts " Linux: May be available via package manager"
|
|
71
|
+
puts " Or specify: --with-xalan-dir=/path/to/xalan"
|
|
72
|
+
end
|
|
73
|
+
|
|
32
74
|
create_makefile('rxerces/rxerces')
|