moxml 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +238 -40
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +45 -0
- data/docs/_guides/modifying-xml.adoc +293 -0
- data/docs/_guides/parsing-xml.adoc +231 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +98 -0
- data/docs/_pages/adapters/libxml.adoc +286 -0
- data/docs/_pages/adapters/nokogiri.adoc +252 -0
- data/docs/_pages/adapters/oga.adoc +292 -0
- data/docs/_pages/adapters/ox.adoc +55 -0
- data/docs/_pages/adapters/rexml.adoc +293 -0
- data/docs/_pages/best-practices.adoc +430 -0
- data/docs/_pages/compatibility.adoc +468 -0
- data/docs/_pages/configuration.adoc +251 -0
- data/docs/_pages/error-handling.adoc +350 -0
- data/docs/_pages/headed-ox-limitations.adoc +558 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +141 -0
- data/docs/_pages/node-api-reference.adoc +50 -0
- data/docs/_pages/performance.adoc +36 -0
- data/docs/_pages/quick-start.adoc +244 -0
- data/docs/_pages/thread-safety.adoc +29 -0
- data/docs/_references/document-api.adoc +408 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +268 -0
- data/docs/_tutorials/builder-pattern.adoc +343 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +325 -0
- data/docs/_tutorials/xpath-queries.adoc +359 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1548 -0
- data/lib/moxml/adapter/nokogiri.rb +121 -9
- data/lib/moxml/adapter/oga.rb +123 -12
- data/lib/moxml/adapter/ox.rb +282 -26
- data/lib/moxml/adapter/rexml.rb +127 -20
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +39 -1
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +39 -6
- data/lib/moxml/document_builder.rb +27 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +94 -3
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1768 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +176 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
data/lib/moxml/node.rb
CHANGED
|
@@ -31,7 +31,7 @@ module Moxml
|
|
|
31
31
|
def children
|
|
32
32
|
NodeSet.new(
|
|
33
33
|
adapter.children(@native).map { adapter.patch_node(_1, @native) },
|
|
34
|
-
context
|
|
34
|
+
context,
|
|
35
35
|
)
|
|
36
36
|
end
|
|
37
37
|
|
|
@@ -84,6 +84,92 @@ module Moxml
|
|
|
84
84
|
Node.wrap(adapter.at_xpath(@native, expression, namespaces), context)
|
|
85
85
|
end
|
|
86
86
|
|
|
87
|
+
# Convenience find methods (aliases for xpath methods)
|
|
88
|
+
def find(xpath_expression, namespaces = {})
|
|
89
|
+
at_xpath(xpath_expression, namespaces)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def find_all(xpath_expression, namespaces = {})
|
|
93
|
+
xpath(xpath_expression, namespaces).to_a
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Check if node has any children
|
|
97
|
+
def has_children?
|
|
98
|
+
!children.empty?
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Get first/last child
|
|
102
|
+
def first_child
|
|
103
|
+
children.first
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def last_child
|
|
107
|
+
children.last
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Returns the text content of this node
|
|
111
|
+
# For elements, returns concatenated text of all text children
|
|
112
|
+
# For text nodes, returns the content if available
|
|
113
|
+
def text
|
|
114
|
+
if respond_to?(:content)
|
|
115
|
+
content
|
|
116
|
+
elsif respond_to?(:children)
|
|
117
|
+
children.select { |c| c.is_a?(Text) }.map(&:content).join
|
|
118
|
+
else
|
|
119
|
+
""
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Returns the text content of this node
|
|
124
|
+
# Subclasses should override this method
|
|
125
|
+
# Element and Text have their own implementations
|
|
126
|
+
def text
|
|
127
|
+
""
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Attribute accessor - only works on Element nodes
|
|
131
|
+
# Returns nil for non-element nodes
|
|
132
|
+
def [](name)
|
|
133
|
+
return nil unless respond_to?(:attribute)
|
|
134
|
+
|
|
135
|
+
attr = attribute(name)
|
|
136
|
+
attr&.value if attr.respond_to?(:value)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Returns the namespace of this node
|
|
140
|
+
# Only applicable to Element nodes, returns nil for others
|
|
141
|
+
def namespace
|
|
142
|
+
return nil unless element?
|
|
143
|
+
|
|
144
|
+
ns = adapter.namespace(@native)
|
|
145
|
+
ns && Namespace.new(ns, context)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Returns all namespace definitions on this node
|
|
149
|
+
# Only applicable to Element nodes, returns empty array for others
|
|
150
|
+
def namespaces
|
|
151
|
+
return [] unless element?
|
|
152
|
+
|
|
153
|
+
adapter.namespace_definitions(@native).map do |ns|
|
|
154
|
+
Namespace.new(ns, context)
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Recursively yield all descendant nodes
|
|
159
|
+
# Used by XPath descendant-or-self and descendant axes
|
|
160
|
+
def each_node(&block)
|
|
161
|
+
children.each do |child|
|
|
162
|
+
yield child
|
|
163
|
+
child.each_node(&block) if child.respond_to?(:each_node)
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Clone the node (deep copy)
|
|
168
|
+
def clone
|
|
169
|
+
Node.wrap(adapter.dup(@native), context)
|
|
170
|
+
end
|
|
171
|
+
alias dup clone
|
|
172
|
+
|
|
87
173
|
def ==(other)
|
|
88
174
|
self.class == other.class && @native == other.native
|
|
89
175
|
end
|
|
@@ -106,6 +192,7 @@ module Moxml
|
|
|
106
192
|
when :document then Document
|
|
107
193
|
when :declaration then Declaration
|
|
108
194
|
when :doctype then Doctype
|
|
195
|
+
when :attribute then Attribute
|
|
109
196
|
else self
|
|
110
197
|
end
|
|
111
198
|
|
|
@@ -129,7 +216,11 @@ module Moxml
|
|
|
129
216
|
when String then Text.new(adapter.create_text(node), context)
|
|
130
217
|
when Node then node
|
|
131
218
|
else
|
|
132
|
-
raise
|
|
219
|
+
raise Moxml::DocumentStructureError.new(
|
|
220
|
+
"Invalid node type: #{node.class}. Expected String or Moxml::Node",
|
|
221
|
+
operation: "prepare_node",
|
|
222
|
+
state: "node_type: #{node.class}",
|
|
223
|
+
)
|
|
133
224
|
end
|
|
134
225
|
end
|
|
135
226
|
|
|
@@ -141,7 +232,7 @@ module Moxml
|
|
|
141
232
|
# Oga: <empty /> (with a space)
|
|
142
233
|
# Nokogiri: <empty/> (without a space)
|
|
143
234
|
# The expanded format is enforced to avoid this conflict
|
|
144
|
-
expand_empty: true
|
|
235
|
+
expand_empty: true,
|
|
145
236
|
}
|
|
146
237
|
end
|
|
147
238
|
end
|
data/lib/moxml/node_set.rb
CHANGED
|
@@ -52,6 +52,31 @@ module Moxml
|
|
|
52
52
|
self.class.new(nodes + other.nodes, context)
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
+
def <<(node)
|
|
56
|
+
# If it's a wrapped Moxml node, unwrap to native before storing
|
|
57
|
+
native_node = node.respond_to?(:native) ? node.native : node
|
|
58
|
+
@nodes << native_node
|
|
59
|
+
self
|
|
60
|
+
end
|
|
61
|
+
alias push <<
|
|
62
|
+
|
|
63
|
+
# Deduplicate nodes based on native object identity
|
|
64
|
+
# This is crucial for XPath operations like descendant-or-self
|
|
65
|
+
# which may yield the same native node multiple times
|
|
66
|
+
def uniq_by_native
|
|
67
|
+
seen = {}
|
|
68
|
+
unique_natives = @nodes.select do |native|
|
|
69
|
+
id = native.object_id
|
|
70
|
+
if seen[id]
|
|
71
|
+
false
|
|
72
|
+
else
|
|
73
|
+
seen[id] = true
|
|
74
|
+
true
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
self.class.new(unique_natives, context)
|
|
78
|
+
end
|
|
79
|
+
|
|
55
80
|
def ==(other)
|
|
56
81
|
self.class == other.class &&
|
|
57
82
|
length == other.length &&
|
|
@@ -68,5 +93,14 @@ module Moxml
|
|
|
68
93
|
each(&:remove)
|
|
69
94
|
self
|
|
70
95
|
end
|
|
96
|
+
|
|
97
|
+
# Delete a node from the set
|
|
98
|
+
# Accepts both wrapped Moxml nodes and native nodes
|
|
99
|
+
def delete(node)
|
|
100
|
+
# If it's a wrapped Moxml node, unwrap to native
|
|
101
|
+
native_node = node.respond_to?(:native) ? node.native : node
|
|
102
|
+
@nodes.delete(native_node)
|
|
103
|
+
self
|
|
104
|
+
end
|
|
71
105
|
end
|
|
72
106
|
end
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "handler"
|
|
4
|
+
|
|
5
|
+
module Moxml
|
|
6
|
+
module SAX
|
|
7
|
+
# Block-based SAX handler with DSL
|
|
8
|
+
#
|
|
9
|
+
# Provides a convenient block-based API for simple SAX parsing cases
|
|
10
|
+
# without requiring a full class definition.
|
|
11
|
+
#
|
|
12
|
+
# @example Block-based parsing
|
|
13
|
+
# context.sax_parse(xml) do
|
|
14
|
+
# start_element { |name, attrs| puts "Element: #{name}" }
|
|
15
|
+
# characters { |text| puts "Text: #{text}" }
|
|
16
|
+
# end_element { |name| puts "End: #{name}" }
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# @example With instance variables
|
|
20
|
+
# books = []
|
|
21
|
+
# context.sax_parse(xml) do
|
|
22
|
+
# start_element do |name, attrs|
|
|
23
|
+
# books << { id: attrs["id"] } if name == "book"
|
|
24
|
+
# end
|
|
25
|
+
# end
|
|
26
|
+
#
|
|
27
|
+
class BlockHandler < Handler
|
|
28
|
+
# Create a new block handler
|
|
29
|
+
#
|
|
30
|
+
# @param block [Proc] Block containing DSL calls
|
|
31
|
+
# @yield DSL context for defining handlers
|
|
32
|
+
def initialize(&block)
|
|
33
|
+
super()
|
|
34
|
+
@handlers = {}
|
|
35
|
+
instance_eval(&block) if block
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Define handler for document start event
|
|
39
|
+
#
|
|
40
|
+
# @yield Block to execute when document parsing begins
|
|
41
|
+
# @yieldreturn [void]
|
|
42
|
+
# @return [void]
|
|
43
|
+
def start_document(&block)
|
|
44
|
+
@handlers[:start_document] = block
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Define handler for document end event
|
|
48
|
+
#
|
|
49
|
+
# @yield Block to execute when document parsing completes
|
|
50
|
+
# @yieldreturn [void]
|
|
51
|
+
# @return [void]
|
|
52
|
+
def end_document(&block)
|
|
53
|
+
@handlers[:end_document] = block
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Define handler for element start event
|
|
57
|
+
#
|
|
58
|
+
# @yield Block to execute when opening tag is encountered
|
|
59
|
+
# @yieldparam name [String] Element name
|
|
60
|
+
# @yieldparam attributes [Hash<String, String>] Element attributes
|
|
61
|
+
# @yieldparam namespaces [Hash<String, String>] Namespace declarations
|
|
62
|
+
# @yieldreturn [void]
|
|
63
|
+
# @return [void]
|
|
64
|
+
def start_element(&block)
|
|
65
|
+
@handlers[:start_element] = block
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Define handler for element end event
|
|
69
|
+
#
|
|
70
|
+
# @yield Block to execute when closing tag is encountered
|
|
71
|
+
# @yieldparam name [String] Element name
|
|
72
|
+
# @yieldreturn [void]
|
|
73
|
+
# @return [void]
|
|
74
|
+
def end_element(&block)
|
|
75
|
+
@handlers[:end_element] = block
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Define handler for character data event
|
|
79
|
+
#
|
|
80
|
+
# @yield Block to execute when character data is encountered
|
|
81
|
+
# @yieldparam text [String] Character data
|
|
82
|
+
# @yieldreturn [void]
|
|
83
|
+
# @return [void]
|
|
84
|
+
def characters(&block)
|
|
85
|
+
@handlers[:characters] = block
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Define handler for CDATA section event
|
|
89
|
+
#
|
|
90
|
+
# @yield Block to execute when CDATA section is encountered
|
|
91
|
+
# @yieldparam text [String] CDATA content
|
|
92
|
+
# @yieldreturn [void]
|
|
93
|
+
# @return [void]
|
|
94
|
+
def cdata(&block)
|
|
95
|
+
@handlers[:cdata] = block
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Define handler for comment event
|
|
99
|
+
#
|
|
100
|
+
# @yield Block to execute when comment is encountered
|
|
101
|
+
# @yieldparam text [String] Comment content
|
|
102
|
+
# @yieldreturn [void]
|
|
103
|
+
# @return [void]
|
|
104
|
+
def comment(&block)
|
|
105
|
+
@handlers[:comment] = block
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Define handler for processing instruction event
|
|
109
|
+
#
|
|
110
|
+
# @yield Block to execute when PI is encountered
|
|
111
|
+
# @yieldparam target [String] PI target
|
|
112
|
+
# @yieldparam data [String] PI data
|
|
113
|
+
# @yieldreturn [void]
|
|
114
|
+
# @return [void]
|
|
115
|
+
def processing_instruction(&block)
|
|
116
|
+
@handlers[:processing_instruction] = block
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Define handler for error event
|
|
120
|
+
#
|
|
121
|
+
# @yield Block to execute when error occurs
|
|
122
|
+
# @yieldparam error [Moxml::ParseError] The error
|
|
123
|
+
# @yieldreturn [void]
|
|
124
|
+
# @return [void]
|
|
125
|
+
def error(&block)
|
|
126
|
+
@handlers[:error] = block
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Define handler for warning event
|
|
130
|
+
#
|
|
131
|
+
# @yield Block to execute when warning occurs
|
|
132
|
+
# @yieldparam message [String] Warning message
|
|
133
|
+
# @yieldreturn [void]
|
|
134
|
+
# @return [void]
|
|
135
|
+
def warning(&block)
|
|
136
|
+
@handlers[:warning] = block
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# @private
|
|
140
|
+
def on_start_document
|
|
141
|
+
@handlers[:start_document]&.call
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# @private
|
|
145
|
+
def on_end_document
|
|
146
|
+
@handlers[:end_document]&.call
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# @private
|
|
150
|
+
def on_start_element(name, attributes = {}, namespaces = {})
|
|
151
|
+
@handlers[:start_element]&.call(name, attributes, namespaces)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# @private
|
|
155
|
+
def on_end_element(name)
|
|
156
|
+
@handlers[:end_element]&.call(name)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# @private
|
|
160
|
+
def on_characters(text)
|
|
161
|
+
@handlers[:characters]&.call(text)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# @private
|
|
165
|
+
def on_cdata(text)
|
|
166
|
+
@handlers[:cdata]&.call(text)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# @private
|
|
170
|
+
def on_comment(text)
|
|
171
|
+
@handlers[:comment]&.call(text)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# @private
|
|
175
|
+
def on_processing_instruction(target, data)
|
|
176
|
+
@handlers[:processing_instruction]&.call(target, data)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# @private
|
|
180
|
+
def on_error(error)
|
|
181
|
+
if @handlers[:error]
|
|
182
|
+
@handlers[:error].call(error)
|
|
183
|
+
else
|
|
184
|
+
super
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# @private
|
|
189
|
+
def on_warning(message)
|
|
190
|
+
@handlers[:warning]&.call(message)
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "handler"
|
|
4
|
+
|
|
5
|
+
module Moxml
|
|
6
|
+
module SAX
|
|
7
|
+
# Element-focused SAX handler with stack tracking
|
|
8
|
+
#
|
|
9
|
+
# Extends the base Handler with utilities for tracking element context:
|
|
10
|
+
# - Element stack (current hierarchy)
|
|
11
|
+
# - Current path (array of element names from root)
|
|
12
|
+
# - Helper methods for checking context
|
|
13
|
+
#
|
|
14
|
+
# @example Using element context
|
|
15
|
+
# class MyHandler < Moxml::SAX::ElementHandler
|
|
16
|
+
# def on_start_element(name, attributes = {}, namespaces = {})
|
|
17
|
+
# super # Important: call super to update stack
|
|
18
|
+
#
|
|
19
|
+
# if path_matches?(%r{/library/book/title$})
|
|
20
|
+
# puts "Found title at: #{current_path.join('/')}"
|
|
21
|
+
# end
|
|
22
|
+
# end
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
class ElementHandler < Handler
|
|
26
|
+
# @return [Array<String>] Stack of currently open elements
|
|
27
|
+
attr_reader :element_stack
|
|
28
|
+
|
|
29
|
+
# @return [Array<String>] Current path from root to current element
|
|
30
|
+
attr_reader :current_path
|
|
31
|
+
|
|
32
|
+
def initialize
|
|
33
|
+
super
|
|
34
|
+
@element_stack = []
|
|
35
|
+
@current_path = []
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Tracks element on stack before calling super
|
|
39
|
+
#
|
|
40
|
+
# @param name [String] Element name
|
|
41
|
+
# @param attributes [Hash] Element attributes
|
|
42
|
+
# @param namespaces [Hash] Namespace declarations
|
|
43
|
+
# @return [void]
|
|
44
|
+
def on_start_element(name, attributes = {}, namespaces = {})
|
|
45
|
+
@element_stack.push(name)
|
|
46
|
+
@current_path.push(name)
|
|
47
|
+
super
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Removes element from stack before calling super
|
|
51
|
+
#
|
|
52
|
+
# @param name [String] Element name
|
|
53
|
+
# @return [void]
|
|
54
|
+
def on_end_element(name)
|
|
55
|
+
@element_stack.pop
|
|
56
|
+
@current_path.pop
|
|
57
|
+
super
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Check if currently inside an element with the given name
|
|
61
|
+
#
|
|
62
|
+
# @param name [String] Element name to check
|
|
63
|
+
# @return [Boolean] true if inside the element
|
|
64
|
+
# @example
|
|
65
|
+
# in_element?("book") # true if inside any <book> element
|
|
66
|
+
def in_element?(name)
|
|
67
|
+
@element_stack.include?(name)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Get the name of the current (innermost) element
|
|
71
|
+
#
|
|
72
|
+
# @return [String, nil] Current element name, or nil if at document level
|
|
73
|
+
# @example
|
|
74
|
+
# current_element # => "title"
|
|
75
|
+
def current_element
|
|
76
|
+
@element_stack.last
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Get the name of the parent element
|
|
80
|
+
#
|
|
81
|
+
# @return [String, nil] Parent element name, or nil if no parent
|
|
82
|
+
# @example
|
|
83
|
+
# parent_element # => "book"
|
|
84
|
+
def parent_element
|
|
85
|
+
@element_stack[-2]
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Get current depth in the document tree
|
|
89
|
+
#
|
|
90
|
+
# @return [Integer] Current nesting level (0 at document root)
|
|
91
|
+
# @example
|
|
92
|
+
# depth # => 3 (e.g., /library/book/title)
|
|
93
|
+
def depth
|
|
94
|
+
@element_stack.length
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Check if current path matches a pattern
|
|
98
|
+
#
|
|
99
|
+
# @param pattern [String, Regexp] Pattern to match against path
|
|
100
|
+
# @return [Boolean] true if path matches
|
|
101
|
+
# @example
|
|
102
|
+
# path_matches?(/book\/title$/) # true if at /*/book/title
|
|
103
|
+
# path_matches?("/library/book/title") # exact path match
|
|
104
|
+
def path_matches?(pattern)
|
|
105
|
+
path_str = "/#{@current_path.join('/')}"
|
|
106
|
+
if pattern.is_a?(Regexp)
|
|
107
|
+
!path_str.match?(pattern).nil?
|
|
108
|
+
else
|
|
109
|
+
path_str == pattern.to_s
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Get the full path as a string
|
|
114
|
+
#
|
|
115
|
+
# @param separator [String] Path separator (default: "/")
|
|
116
|
+
# @return [String] Full path string
|
|
117
|
+
# @example
|
|
118
|
+
# path_string # => "/library/book/title"
|
|
119
|
+
def path_string(separator = "/")
|
|
120
|
+
separator + @current_path.join(separator)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module SAX
|
|
5
|
+
# Abstract base class for SAX event handlers
|
|
6
|
+
#
|
|
7
|
+
# This class defines the interface for handling SAX parsing events.
|
|
8
|
+
# Subclass this and override the event methods you need to handle.
|
|
9
|
+
#
|
|
10
|
+
# All event methods have default implementations that do nothing,
|
|
11
|
+
# so you only need to override the events you care about.
|
|
12
|
+
#
|
|
13
|
+
# @example Create a custom handler
|
|
14
|
+
# class BookHandler < Moxml::SAX::Handler
|
|
15
|
+
# def on_start_element(name, attributes = {}, namespaces = {})
|
|
16
|
+
# puts "Found element: #{name}"
|
|
17
|
+
# end
|
|
18
|
+
# end
|
|
19
|
+
#
|
|
20
|
+
class Handler
|
|
21
|
+
# Called when parsing begins
|
|
22
|
+
#
|
|
23
|
+
# @return [void]
|
|
24
|
+
def on_start_document
|
|
25
|
+
# Override in subclass if needed
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Called when parsing completes successfully
|
|
29
|
+
#
|
|
30
|
+
# @return [void]
|
|
31
|
+
def on_end_document
|
|
32
|
+
# Override in subclass if needed
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Called when an opening tag is encountered
|
|
36
|
+
#
|
|
37
|
+
# @param name [String] Element name (with namespace prefix if present)
|
|
38
|
+
# @param attributes [Hash<String, String>] Element attributes
|
|
39
|
+
# @param namespaces [Hash<String, String>] Namespace declarations on this element
|
|
40
|
+
# @return [void]
|
|
41
|
+
def on_start_element(name, attributes = {}, namespaces = {})
|
|
42
|
+
# Override in subclass if needed
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Called when a closing tag is encountered
|
|
46
|
+
#
|
|
47
|
+
# @param name [String] Element name
|
|
48
|
+
# @return [void]
|
|
49
|
+
def on_end_element(name)
|
|
50
|
+
# Override in subclass if needed
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Called when character data is encountered
|
|
54
|
+
#
|
|
55
|
+
# Note: This may be called multiple times for a single text node
|
|
56
|
+
# if the parser breaks it into chunks. Concatenate if needed.
|
|
57
|
+
#
|
|
58
|
+
# @param text [String] Character data
|
|
59
|
+
# @return [void]
|
|
60
|
+
def on_characters(text)
|
|
61
|
+
# Override in subclass if needed
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Called when a CDATA section is encountered
|
|
65
|
+
#
|
|
66
|
+
# @param text [String] CDATA content
|
|
67
|
+
# @return [void]
|
|
68
|
+
def on_cdata(text)
|
|
69
|
+
# Override in subclass if needed
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Called when a comment is encountered
|
|
73
|
+
#
|
|
74
|
+
# @param text [String] Comment content
|
|
75
|
+
# @return [void]
|
|
76
|
+
def on_comment(text)
|
|
77
|
+
# Override in subclass if needed
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Called when a processing instruction is encountered
|
|
81
|
+
#
|
|
82
|
+
# @param target [String] PI target
|
|
83
|
+
# @param data [String] PI data/content
|
|
84
|
+
# @return [void]
|
|
85
|
+
def on_processing_instruction(target, data)
|
|
86
|
+
# Override in subclass if needed
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Called when a fatal parsing error occurs
|
|
90
|
+
#
|
|
91
|
+
# Default implementation raises the error.
|
|
92
|
+
# Override to handle errors differently.
|
|
93
|
+
#
|
|
94
|
+
# @param error [Moxml::ParseError] The parsing error
|
|
95
|
+
# @return [void]
|
|
96
|
+
# @raise [Moxml::ParseError] By default
|
|
97
|
+
def on_error(error)
|
|
98
|
+
raise error
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Called when a non-fatal warning occurs
|
|
102
|
+
#
|
|
103
|
+
# Default implementation ignores warnings.
|
|
104
|
+
# Override to handle warnings (e.g., log them).
|
|
105
|
+
#
|
|
106
|
+
# @param message [String] Warning message
|
|
107
|
+
# @return [void]
|
|
108
|
+
def on_warning(message)
|
|
109
|
+
# Override in subclass if needed
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
data/lib/moxml/sax.rb
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "sax/handler"
|
|
4
|
+
require_relative "sax/element_handler"
|
|
5
|
+
require_relative "sax/block_handler"
|
|
6
|
+
|
|
7
|
+
module Moxml
|
|
8
|
+
# SAX (Simple API for XML) parsing interface
|
|
9
|
+
#
|
|
10
|
+
# Provides event-driven XML parsing across all Moxml adapters.
|
|
11
|
+
# SAX parsing is memory-efficient and suitable for processing large XML files.
|
|
12
|
+
#
|
|
13
|
+
# @example Class-based handler
|
|
14
|
+
# class MyHandler < Moxml::SAX::Handler
|
|
15
|
+
# def on_start_element(name, attributes = {}, namespaces = {})
|
|
16
|
+
# puts "Started element: #{name}"
|
|
17
|
+
# end
|
|
18
|
+
# end
|
|
19
|
+
#
|
|
20
|
+
# context = Moxml.new
|
|
21
|
+
# context.sax_parse(xml_string, MyHandler.new)
|
|
22
|
+
#
|
|
23
|
+
# @example Block-based handler
|
|
24
|
+
# context.sax_parse(xml_string) do
|
|
25
|
+
# start_element { |name, attrs| puts "Element: #{name}" }
|
|
26
|
+
# characters { |text| puts "Text: #{text}" }
|
|
27
|
+
# end
|
|
28
|
+
#
|
|
29
|
+
module SAX
|
|
30
|
+
end
|
|
31
|
+
end
|
data/lib/moxml/version.rb
CHANGED
|
@@ -10,19 +10,19 @@ module Moxml
|
|
|
10
10
|
basic: {
|
|
11
11
|
"<" => "<",
|
|
12
12
|
">" => ">",
|
|
13
|
-
"&" => "&"
|
|
13
|
+
"&" => "&",
|
|
14
14
|
},
|
|
15
15
|
quotes: {
|
|
16
16
|
"'" => "'",
|
|
17
|
-
'"' => """
|
|
17
|
+
'"' => """,
|
|
18
18
|
},
|
|
19
19
|
full: {
|
|
20
20
|
"<" => "<",
|
|
21
21
|
">" => ">",
|
|
22
22
|
"'" => "'",
|
|
23
23
|
'"' => """,
|
|
24
|
-
"&" => "&"
|
|
25
|
-
}
|
|
24
|
+
"&" => "&",
|
|
25
|
+
},
|
|
26
26
|
}.freeze
|
|
27
27
|
MODES = MAPPINGS.keys.freeze
|
|
28
28
|
|
data/lib/moxml/xml_utils.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
require_relative "xml_utils/encoder"
|
|
4
4
|
|
|
5
5
|
# Ruby 3.3+ requires the URI module to be explicitly required
|
|
6
|
-
require "uri" unless defined?(
|
|
6
|
+
require "uri" unless defined?(URI)
|
|
7
7
|
|
|
8
8
|
module Moxml
|
|
9
9
|
module XmlUtils
|
|
@@ -47,17 +47,20 @@ module Moxml
|
|
|
47
47
|
def validate_element_name(name)
|
|
48
48
|
return if name.is_a?(String) && name.match?(/^[a-zA-Z_][\w\-.:]*$/)
|
|
49
49
|
|
|
50
|
-
raise ValidationError, "Invalid XML name: #{name}"
|
|
50
|
+
raise ValidationError, "Invalid XML element name: #{name}"
|
|
51
51
|
end
|
|
52
52
|
|
|
53
53
|
def validate_pi_target(target)
|
|
54
54
|
return if target.is_a?(String) && target.match?(/^[a-zA-Z_][\w\-.]*$/)
|
|
55
55
|
|
|
56
|
-
raise ValidationError,
|
|
56
|
+
raise ValidationError,
|
|
57
|
+
"Invalid XML processing instruction target: #{target}"
|
|
57
58
|
end
|
|
58
59
|
|
|
59
60
|
def validate_uri(uri)
|
|
60
|
-
|
|
61
|
+
if uri.empty? || uri.match?(/\A#{::URI::DEFAULT_PARSER.make_regexp}\z/)
|
|
62
|
+
return
|
|
63
|
+
end
|
|
61
64
|
|
|
62
65
|
raise ValidationError, "Invalid URI: #{uri}"
|
|
63
66
|
end
|