moxml 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +224 -43
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +51 -0
- data/docs/_guides/modifying-xml.adoc +292 -0
- data/docs/_guides/parsing-xml.adoc +230 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_guides/xml-declaration.adoc +450 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +97 -0
- data/docs/_pages/adapters/libxml.adoc +285 -0
- data/docs/_pages/adapters/nokogiri.adoc +251 -0
- data/docs/_pages/adapters/oga.adoc +291 -0
- data/docs/_pages/adapters/ox.adoc +56 -0
- data/docs/_pages/adapters/rexml.adoc +292 -0
- data/docs/_pages/best-practices.adoc +429 -0
- data/docs/_pages/compatibility.adoc +467 -0
- data/docs/_pages/configuration.adoc +250 -0
- data/docs/_pages/error-handling.adoc +349 -0
- data/docs/_pages/headed-ox-limitations.adoc +574 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +140 -0
- data/docs/_pages/node-api-reference.adoc +49 -0
- data/docs/_pages/performance.adoc +35 -0
- data/docs/_pages/quick-start.adoc +243 -0
- data/docs/_pages/thread-safety.adoc +28 -0
- data/docs/_references/document-api.adoc +407 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +267 -0
- data/docs/_tutorials/builder-pattern.adoc +342 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +324 -0
- data/docs/_tutorials/xpath-queries.adoc +358 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1564 -0
- data/lib/moxml/adapter/nokogiri.rb +156 -9
- data/lib/moxml/adapter/oga.rb +190 -15
- data/lib/moxml/adapter/ox.rb +322 -28
- data/lib/moxml/adapter/rexml.rb +157 -28
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +57 -2
- data/lib/moxml/declaration.rb +9 -0
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +53 -6
- data/lib/moxml/document_builder.rb +34 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +155 -4
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1770 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_preservation_spec.rb +217 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +178 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "handler"
|
|
4
|
+
|
|
5
|
+
module Moxml
|
|
6
|
+
module SAX
|
|
7
|
+
# Block-based SAX handler with DSL
|
|
8
|
+
#
|
|
9
|
+
# Provides a convenient block-based API for simple SAX parsing cases
|
|
10
|
+
# without requiring a full class definition.
|
|
11
|
+
#
|
|
12
|
+
# @example Block-based parsing
|
|
13
|
+
# context.sax_parse(xml) do
|
|
14
|
+
# start_element { |name, attrs| puts "Element: #{name}" }
|
|
15
|
+
# characters { |text| puts "Text: #{text}" }
|
|
16
|
+
# end_element { |name| puts "End: #{name}" }
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# @example With instance variables
|
|
20
|
+
# books = []
|
|
21
|
+
# context.sax_parse(xml) do
|
|
22
|
+
# start_element do |name, attrs|
|
|
23
|
+
# books << { id: attrs["id"] } if name == "book"
|
|
24
|
+
# end
|
|
25
|
+
# end
|
|
26
|
+
#
|
|
27
|
+
class BlockHandler < Handler
|
|
28
|
+
# Create a new block handler
|
|
29
|
+
#
|
|
30
|
+
# @param block [Proc] Block containing DSL calls
|
|
31
|
+
# @yield DSL context for defining handlers
|
|
32
|
+
def initialize(&block)
|
|
33
|
+
super()
|
|
34
|
+
@handlers = {}
|
|
35
|
+
instance_eval(&block) if block
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Define handler for document start event
|
|
39
|
+
#
|
|
40
|
+
# @yield Block to execute when document parsing begins
|
|
41
|
+
# @yieldreturn [void]
|
|
42
|
+
# @return [void]
|
|
43
|
+
def start_document(&block)
|
|
44
|
+
@handlers[:start_document] = block
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Define handler for document end event
|
|
48
|
+
#
|
|
49
|
+
# @yield Block to execute when document parsing completes
|
|
50
|
+
# @yieldreturn [void]
|
|
51
|
+
# @return [void]
|
|
52
|
+
def end_document(&block)
|
|
53
|
+
@handlers[:end_document] = block
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Define handler for element start event
|
|
57
|
+
#
|
|
58
|
+
# @yield Block to execute when opening tag is encountered
|
|
59
|
+
# @yieldparam name [String] Element name
|
|
60
|
+
# @yieldparam attributes [Hash<String, String>] Element attributes
|
|
61
|
+
# @yieldparam namespaces [Hash<String, String>] Namespace declarations
|
|
62
|
+
# @yieldreturn [void]
|
|
63
|
+
# @return [void]
|
|
64
|
+
def start_element(&block)
|
|
65
|
+
@handlers[:start_element] = block
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Define handler for element end event
|
|
69
|
+
#
|
|
70
|
+
# @yield Block to execute when closing tag is encountered
|
|
71
|
+
# @yieldparam name [String] Element name
|
|
72
|
+
# @yieldreturn [void]
|
|
73
|
+
# @return [void]
|
|
74
|
+
def end_element(&block)
|
|
75
|
+
@handlers[:end_element] = block
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Define handler for character data event
|
|
79
|
+
#
|
|
80
|
+
# @yield Block to execute when character data is encountered
|
|
81
|
+
# @yieldparam text [String] Character data
|
|
82
|
+
# @yieldreturn [void]
|
|
83
|
+
# @return [void]
|
|
84
|
+
def characters(&block)
|
|
85
|
+
@handlers[:characters] = block
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Define handler for CDATA section event
|
|
89
|
+
#
|
|
90
|
+
# @yield Block to execute when CDATA section is encountered
|
|
91
|
+
# @yieldparam text [String] CDATA content
|
|
92
|
+
# @yieldreturn [void]
|
|
93
|
+
# @return [void]
|
|
94
|
+
def cdata(&block)
|
|
95
|
+
@handlers[:cdata] = block
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Define handler for comment event
|
|
99
|
+
#
|
|
100
|
+
# @yield Block to execute when comment is encountered
|
|
101
|
+
# @yieldparam text [String] Comment content
|
|
102
|
+
# @yieldreturn [void]
|
|
103
|
+
# @return [void]
|
|
104
|
+
def comment(&block)
|
|
105
|
+
@handlers[:comment] = block
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Define handler for processing instruction event
|
|
109
|
+
#
|
|
110
|
+
# @yield Block to execute when PI is encountered
|
|
111
|
+
# @yieldparam target [String] PI target
|
|
112
|
+
# @yieldparam data [String] PI data
|
|
113
|
+
# @yieldreturn [void]
|
|
114
|
+
# @return [void]
|
|
115
|
+
def processing_instruction(&block)
|
|
116
|
+
@handlers[:processing_instruction] = block
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Define handler for error event
|
|
120
|
+
#
|
|
121
|
+
# @yield Block to execute when error occurs
|
|
122
|
+
# @yieldparam error [Moxml::ParseError] The error
|
|
123
|
+
# @yieldreturn [void]
|
|
124
|
+
# @return [void]
|
|
125
|
+
def error(&block)
|
|
126
|
+
@handlers[:error] = block
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Define handler for warning event
|
|
130
|
+
#
|
|
131
|
+
# @yield Block to execute when warning occurs
|
|
132
|
+
# @yieldparam message [String] Warning message
|
|
133
|
+
# @yieldreturn [void]
|
|
134
|
+
# @return [void]
|
|
135
|
+
def warning(&block)
|
|
136
|
+
@handlers[:warning] = block
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# @private
|
|
140
|
+
def on_start_document
|
|
141
|
+
@handlers[:start_document]&.call
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# @private
|
|
145
|
+
def on_end_document
|
|
146
|
+
@handlers[:end_document]&.call
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# @private
|
|
150
|
+
def on_start_element(name, attributes = {}, namespaces = {})
|
|
151
|
+
@handlers[:start_element]&.call(name, attributes, namespaces)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# @private
|
|
155
|
+
def on_end_element(name)
|
|
156
|
+
@handlers[:end_element]&.call(name)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# @private
|
|
160
|
+
def on_characters(text)
|
|
161
|
+
@handlers[:characters]&.call(text)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# @private
|
|
165
|
+
def on_cdata(text)
|
|
166
|
+
@handlers[:cdata]&.call(text)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# @private
|
|
170
|
+
def on_comment(text)
|
|
171
|
+
@handlers[:comment]&.call(text)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# @private
|
|
175
|
+
def on_processing_instruction(target, data)
|
|
176
|
+
@handlers[:processing_instruction]&.call(target, data)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# @private
|
|
180
|
+
def on_error(error)
|
|
181
|
+
if @handlers[:error]
|
|
182
|
+
@handlers[:error].call(error)
|
|
183
|
+
else
|
|
184
|
+
super
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# @private
|
|
189
|
+
def on_warning(message)
|
|
190
|
+
@handlers[:warning]&.call(message)
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "handler"
|
|
4
|
+
|
|
5
|
+
module Moxml
|
|
6
|
+
module SAX
|
|
7
|
+
# Element-focused SAX handler with stack tracking
|
|
8
|
+
#
|
|
9
|
+
# Extends the base Handler with utilities for tracking element context:
|
|
10
|
+
# - Element stack (current hierarchy)
|
|
11
|
+
# - Current path (array of element names from root)
|
|
12
|
+
# - Helper methods for checking context
|
|
13
|
+
#
|
|
14
|
+
# @example Using element context
|
|
15
|
+
# class MyHandler < Moxml::SAX::ElementHandler
|
|
16
|
+
# def on_start_element(name, attributes = {}, namespaces = {})
|
|
17
|
+
# super # Important: call super to update stack
|
|
18
|
+
#
|
|
19
|
+
# if path_matches?(%r{/library/book/title$})
|
|
20
|
+
# puts "Found title at: #{current_path.join('/')}"
|
|
21
|
+
# end
|
|
22
|
+
# end
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
class ElementHandler < Handler
|
|
26
|
+
# @return [Array<String>] Stack of currently open elements
|
|
27
|
+
attr_reader :element_stack
|
|
28
|
+
|
|
29
|
+
# @return [Array<String>] Current path from root to current element
|
|
30
|
+
attr_reader :current_path
|
|
31
|
+
|
|
32
|
+
def initialize
|
|
33
|
+
super
|
|
34
|
+
@element_stack = []
|
|
35
|
+
@current_path = []
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Tracks element on stack before calling super
|
|
39
|
+
#
|
|
40
|
+
# @param name [String] Element name
|
|
41
|
+
# @param attributes [Hash] Element attributes
|
|
42
|
+
# @param namespaces [Hash] Namespace declarations
|
|
43
|
+
# @return [void]
|
|
44
|
+
def on_start_element(name, attributes = {}, namespaces = {})
|
|
45
|
+
@element_stack.push(name)
|
|
46
|
+
@current_path.push(name)
|
|
47
|
+
super
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Removes element from stack before calling super
|
|
51
|
+
#
|
|
52
|
+
# @param name [String] Element name
|
|
53
|
+
# @return [void]
|
|
54
|
+
def on_end_element(name)
|
|
55
|
+
@element_stack.pop
|
|
56
|
+
@current_path.pop
|
|
57
|
+
super
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Check if currently inside an element with the given name
|
|
61
|
+
#
|
|
62
|
+
# @param name [String] Element name to check
|
|
63
|
+
# @return [Boolean] true if inside the element
|
|
64
|
+
# @example
|
|
65
|
+
# in_element?("book") # true if inside any <book> element
|
|
66
|
+
def in_element?(name)
|
|
67
|
+
@element_stack.include?(name)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Get the name of the current (innermost) element
|
|
71
|
+
#
|
|
72
|
+
# @return [String, nil] Current element name, or nil if at document level
|
|
73
|
+
# @example
|
|
74
|
+
# current_element # => "title"
|
|
75
|
+
def current_element
|
|
76
|
+
@element_stack.last
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Get the name of the parent element
|
|
80
|
+
#
|
|
81
|
+
# @return [String, nil] Parent element name, or nil if no parent
|
|
82
|
+
# @example
|
|
83
|
+
# parent_element # => "book"
|
|
84
|
+
def parent_element
|
|
85
|
+
@element_stack[-2]
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Get current depth in the document tree
|
|
89
|
+
#
|
|
90
|
+
# @return [Integer] Current nesting level (0 at document root)
|
|
91
|
+
# @example
|
|
92
|
+
# depth # => 3 (e.g., /library/book/title)
|
|
93
|
+
def depth
|
|
94
|
+
@element_stack.length
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Check if current path matches a pattern
|
|
98
|
+
#
|
|
99
|
+
# @param pattern [String, Regexp] Pattern to match against path
|
|
100
|
+
# @return [Boolean] true if path matches
|
|
101
|
+
# @example
|
|
102
|
+
# path_matches?(/book\/title$/) # true if at /*/book/title
|
|
103
|
+
# path_matches?("/library/book/title") # exact path match
|
|
104
|
+
def path_matches?(pattern)
|
|
105
|
+
path_str = "/#{@current_path.join('/')}"
|
|
106
|
+
if pattern.is_a?(Regexp)
|
|
107
|
+
!path_str.match?(pattern).nil?
|
|
108
|
+
else
|
|
109
|
+
path_str == pattern.to_s
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Get the full path as a string
|
|
114
|
+
#
|
|
115
|
+
# @param separator [String] Path separator (default: "/")
|
|
116
|
+
# @return [String] Full path string
|
|
117
|
+
# @example
|
|
118
|
+
# path_string # => "/library/book/title"
|
|
119
|
+
def path_string(separator = "/")
|
|
120
|
+
separator + @current_path.join(separator)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module SAX
|
|
5
|
+
# Abstract base class for SAX event handlers
|
|
6
|
+
#
|
|
7
|
+
# This class defines the interface for handling SAX parsing events.
|
|
8
|
+
# Subclass this and override the event methods you need to handle.
|
|
9
|
+
#
|
|
10
|
+
# All event methods have default implementations that do nothing,
|
|
11
|
+
# so you only need to override the events you care about.
|
|
12
|
+
#
|
|
13
|
+
# @example Create a custom handler
|
|
14
|
+
# class BookHandler < Moxml::SAX::Handler
|
|
15
|
+
# def on_start_element(name, attributes = {}, namespaces = {})
|
|
16
|
+
# puts "Found element: #{name}"
|
|
17
|
+
# end
|
|
18
|
+
# end
|
|
19
|
+
#
|
|
20
|
+
class Handler
|
|
21
|
+
# Called when parsing begins
|
|
22
|
+
#
|
|
23
|
+
# @return [void]
|
|
24
|
+
def on_start_document
|
|
25
|
+
# Override in subclass if needed
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Called when parsing completes successfully
|
|
29
|
+
#
|
|
30
|
+
# @return [void]
|
|
31
|
+
def on_end_document
|
|
32
|
+
# Override in subclass if needed
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Called when an opening tag is encountered
|
|
36
|
+
#
|
|
37
|
+
# @param name [String] Element name (with namespace prefix if present)
|
|
38
|
+
# @param attributes [Hash<String, String>] Element attributes
|
|
39
|
+
# @param namespaces [Hash<String, String>] Namespace declarations on this element
|
|
40
|
+
# @return [void]
|
|
41
|
+
def on_start_element(name, attributes = {}, namespaces = {})
|
|
42
|
+
# Override in subclass if needed
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Called when a closing tag is encountered
|
|
46
|
+
#
|
|
47
|
+
# @param name [String] Element name
|
|
48
|
+
# @return [void]
|
|
49
|
+
def on_end_element(name)
|
|
50
|
+
# Override in subclass if needed
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Called when character data is encountered
|
|
54
|
+
#
|
|
55
|
+
# Note: This may be called multiple times for a single text node
|
|
56
|
+
# if the parser breaks it into chunks. Concatenate if needed.
|
|
57
|
+
#
|
|
58
|
+
# @param text [String] Character data
|
|
59
|
+
# @return [void]
|
|
60
|
+
def on_characters(text)
|
|
61
|
+
# Override in subclass if needed
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Called when a CDATA section is encountered
|
|
65
|
+
#
|
|
66
|
+
# @param text [String] CDATA content
|
|
67
|
+
# @return [void]
|
|
68
|
+
def on_cdata(text)
|
|
69
|
+
# Override in subclass if needed
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Called when a comment is encountered
|
|
73
|
+
#
|
|
74
|
+
# @param text [String] Comment content
|
|
75
|
+
# @return [void]
|
|
76
|
+
def on_comment(text)
|
|
77
|
+
# Override in subclass if needed
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Called when a processing instruction is encountered
|
|
81
|
+
#
|
|
82
|
+
# @param target [String] PI target
|
|
83
|
+
# @param data [String] PI data/content
|
|
84
|
+
# @return [void]
|
|
85
|
+
def on_processing_instruction(target, data)
|
|
86
|
+
# Override in subclass if needed
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Called when a fatal parsing error occurs
|
|
90
|
+
#
|
|
91
|
+
# Default implementation raises the error.
|
|
92
|
+
# Override to handle errors differently.
|
|
93
|
+
#
|
|
94
|
+
# @param error [Moxml::ParseError] The parsing error
|
|
95
|
+
# @return [void]
|
|
96
|
+
# @raise [Moxml::ParseError] By default
|
|
97
|
+
def on_error(error)
|
|
98
|
+
raise error
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Called when a non-fatal warning occurs
|
|
102
|
+
#
|
|
103
|
+
# Default implementation ignores warnings.
|
|
104
|
+
# Override to handle warnings (e.g., log them).
|
|
105
|
+
#
|
|
106
|
+
# @param message [String] Warning message
|
|
107
|
+
# @return [void]
|
|
108
|
+
def on_warning(message)
|
|
109
|
+
# Override in subclass if needed
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
data/lib/moxml/sax.rb
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "sax/handler"
|
|
4
|
+
require_relative "sax/element_handler"
|
|
5
|
+
require_relative "sax/block_handler"
|
|
6
|
+
|
|
7
|
+
module Moxml
|
|
8
|
+
# SAX (Simple API for XML) parsing interface
|
|
9
|
+
#
|
|
10
|
+
# Provides event-driven XML parsing across all Moxml adapters.
|
|
11
|
+
# SAX parsing is memory-efficient and suitable for processing large XML files.
|
|
12
|
+
#
|
|
13
|
+
# @example Class-based handler
|
|
14
|
+
# class MyHandler < Moxml::SAX::Handler
|
|
15
|
+
# def on_start_element(name, attributes = {}, namespaces = {})
|
|
16
|
+
# puts "Started element: #{name}"
|
|
17
|
+
# end
|
|
18
|
+
# end
|
|
19
|
+
#
|
|
20
|
+
# context = Moxml.new
|
|
21
|
+
# context.sax_parse(xml_string, MyHandler.new)
|
|
22
|
+
#
|
|
23
|
+
# @example Block-based handler
|
|
24
|
+
# context.sax_parse(xml_string) do
|
|
25
|
+
# start_element { |name, attrs| puts "Element: #{name}" }
|
|
26
|
+
# characters { |text| puts "Text: #{text}" }
|
|
27
|
+
# end
|
|
28
|
+
#
|
|
29
|
+
module SAX
|
|
30
|
+
end
|
|
31
|
+
end
|
data/lib/moxml/version.rb
CHANGED
|
@@ -10,19 +10,19 @@ module Moxml
|
|
|
10
10
|
basic: {
|
|
11
11
|
"<" => "<",
|
|
12
12
|
">" => ">",
|
|
13
|
-
"&" => "&"
|
|
13
|
+
"&" => "&",
|
|
14
14
|
},
|
|
15
15
|
quotes: {
|
|
16
16
|
"'" => "'",
|
|
17
|
-
'"' => """
|
|
17
|
+
'"' => """,
|
|
18
18
|
},
|
|
19
19
|
full: {
|
|
20
20
|
"<" => "<",
|
|
21
21
|
">" => ">",
|
|
22
22
|
"'" => "'",
|
|
23
23
|
'"' => """,
|
|
24
|
-
"&" => "&"
|
|
25
|
-
}
|
|
24
|
+
"&" => "&",
|
|
25
|
+
},
|
|
26
26
|
}.freeze
|
|
27
27
|
MODES = MAPPINGS.keys.freeze
|
|
28
28
|
|
data/lib/moxml/xml_utils.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
require_relative "xml_utils/encoder"
|
|
4
4
|
|
|
5
5
|
# Ruby 3.3+ requires the URI module to be explicitly required
|
|
6
|
-
require "uri" unless defined?(
|
|
6
|
+
require "uri" unless defined?(URI)
|
|
7
7
|
|
|
8
8
|
module Moxml
|
|
9
9
|
module XmlUtils
|
|
@@ -47,17 +47,20 @@ module Moxml
|
|
|
47
47
|
def validate_element_name(name)
|
|
48
48
|
return if name.is_a?(String) && name.match?(/^[a-zA-Z_][\w\-.:]*$/)
|
|
49
49
|
|
|
50
|
-
raise ValidationError, "Invalid XML name: #{name}"
|
|
50
|
+
raise ValidationError, "Invalid XML element name: #{name}"
|
|
51
51
|
end
|
|
52
52
|
|
|
53
53
|
def validate_pi_target(target)
|
|
54
54
|
return if target.is_a?(String) && target.match?(/^[a-zA-Z_][\w\-.]*$/)
|
|
55
55
|
|
|
56
|
-
raise ValidationError,
|
|
56
|
+
raise ValidationError,
|
|
57
|
+
"Invalid XML processing instruction target: #{target}"
|
|
57
58
|
end
|
|
58
59
|
|
|
59
60
|
def validate_uri(uri)
|
|
60
|
-
|
|
61
|
+
if uri.empty? || uri.match?(/\A#{::URI::DEFAULT_PARSER.make_regexp}\z/)
|
|
62
|
+
return
|
|
63
|
+
end
|
|
61
64
|
|
|
62
65
|
raise ValidationError, "Invalid URI: #{uri}"
|
|
63
66
|
end
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module XPath
|
|
5
|
+
module AST
|
|
6
|
+
# Abstract base class for all XPath AST nodes
|
|
7
|
+
#
|
|
8
|
+
# All AST nodes must implement the #evaluate method which takes
|
|
9
|
+
# a context and returns a result (NodeSet, String, Number, or Boolean).
|
|
10
|
+
#
|
|
11
|
+
# @abstract Subclass and override {#evaluate} to implement
|
|
12
|
+
class Node
|
|
13
|
+
attr_reader :type, :children, :value
|
|
14
|
+
|
|
15
|
+
# Initialize a new AST node
|
|
16
|
+
#
|
|
17
|
+
# @param type [Symbol] Node type
|
|
18
|
+
# @param children [Array] Child nodes
|
|
19
|
+
# @param value [Object] Optional value for leaf nodes
|
|
20
|
+
def initialize(type = :node, children = [], value = nil)
|
|
21
|
+
@type = type
|
|
22
|
+
@children = Array(children)
|
|
23
|
+
@value = value
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Evaluate this AST node in the given context
|
|
27
|
+
#
|
|
28
|
+
# @param context [Moxml::XPath::Context] Evaluation context
|
|
29
|
+
# @return [Moxml::NodeSet, String, Numeric, Boolean] Result of evaluation
|
|
30
|
+
# @raise [NotImplementedError] if not overridden by subclass
|
|
31
|
+
def evaluate(context)
|
|
32
|
+
raise ::NotImplementedError,
|
|
33
|
+
"#{self.class}#evaluate must be implemented by subclass"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Check if this node is a constant value
|
|
37
|
+
#
|
|
38
|
+
# @return [Boolean] true if node represents a constant value
|
|
39
|
+
def constant?
|
|
40
|
+
false
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Get the result type of this node
|
|
44
|
+
#
|
|
45
|
+
# @return [Symbol] One of :node_set, :string, :number, :boolean
|
|
46
|
+
def result_type
|
|
47
|
+
:unknown
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# String representation for debugging
|
|
51
|
+
#
|
|
52
|
+
# @return [String] Debug representation
|
|
53
|
+
def inspect
|
|
54
|
+
if @value
|
|
55
|
+
"#<#{self.class.name} @type=#{@type} @value=#{@value.inspect}>"
|
|
56
|
+
elsif @children.any?
|
|
57
|
+
"#<#{self.class.name} @type=#{@type} children=#{@children.size}>"
|
|
58
|
+
else
|
|
59
|
+
"#<#{self.class.name} @type=#{@type}>"
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
alias to_s inspect
|
|
64
|
+
|
|
65
|
+
# Factory methods for creating specific node types
|
|
66
|
+
|
|
67
|
+
# Create an absolute path node (starts with / or //)
|
|
68
|
+
def self.absolute_path(descendant_or_self, *steps)
|
|
69
|
+
new(:absolute_path, [descendant_or_self] + steps)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Create a relative path node
|
|
73
|
+
def self.relative_path(*steps)
|
|
74
|
+
new(:relative_path, steps)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Create a path node
|
|
78
|
+
def self.path(*steps)
|
|
79
|
+
new(:path, steps)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Create an axis node
|
|
83
|
+
def self.axis(axis_name, node_test, *predicates)
|
|
84
|
+
new(:axis, [axis_name, node_test] + predicates)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Create a node test
|
|
88
|
+
def self.test(namespace, name)
|
|
89
|
+
new(:test, [], { namespace: namespace, name: name })
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Create a wildcard test
|
|
93
|
+
def self.wildcard
|
|
94
|
+
new(:wildcard)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Create a predicate node
|
|
98
|
+
def self.predicate(condition)
|
|
99
|
+
new(:predicate, [condition])
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Create a function call node
|
|
103
|
+
def self.function(name, *args)
|
|
104
|
+
new(:function, args, name)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Create a variable reference node
|
|
108
|
+
def self.variable(name)
|
|
109
|
+
new(:variable, [], name)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Create a literal string node
|
|
113
|
+
def self.string(value)
|
|
114
|
+
new(:string, [], value)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Create a literal number node
|
|
118
|
+
def self.number(value)
|
|
119
|
+
new(:number, [], value.to_f)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Create a binary operator node
|
|
123
|
+
def self.binary_op(operator, left, right)
|
|
124
|
+
new(:binary_op, [left, right], operator)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Create a unary operator node
|
|
128
|
+
def self.unary_op(operator, operand)
|
|
129
|
+
new(:unary_op, [operand], operator)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Create a union node (|)
|
|
133
|
+
def self.union(*expressions)
|
|
134
|
+
new(:union, expressions)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Create an attribute node
|
|
138
|
+
def self.attribute(name)
|
|
139
|
+
new(:attribute, [], name)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Create a current node (.)
|
|
143
|
+
def self.current
|
|
144
|
+
new(:current)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Create a parent node (..)
|
|
148
|
+
def self.parent
|
|
149
|
+
new(:parent)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Create a node type test (text(), comment(), etc.)
|
|
153
|
+
def self.node_type(type_name)
|
|
154
|
+
new(:node_type, [], type_name)
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|