moxml 0.1.15 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/.rubocop_todo.yml +49 -133
- data/README.adoc +18 -0
- data/Rakefile +31 -0
- data/benchmarks/generate_report.rb +1 -1
- data/lib/moxml/adapter/base.rb +79 -8
- data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +42 -20
- data/lib/moxml/adapter/headed_ox.rb +30 -12
- data/lib/moxml/adapter/libxml.rb +181 -68
- data/lib/moxml/adapter/nokogiri.rb +33 -11
- data/lib/moxml/adapter/oga.rb +51 -96
- data/lib/moxml/adapter/ox.rb +79 -21
- data/lib/moxml/adapter/rexml.rb +64 -11
- data/lib/moxml/attribute.rb +7 -1
- data/lib/moxml/builder.rb +77 -24
- data/lib/moxml/config.rb +18 -1
- data/lib/moxml/declaration.rb +4 -2
- data/lib/moxml/document.rb +5 -2
- data/lib/moxml/document_builder.rb +9 -8
- data/lib/moxml/element.rb +22 -13
- data/lib/moxml/entity_registry.rb +16 -2
- data/lib/moxml/native_attachment.rb +65 -0
- data/lib/moxml/node.rb +21 -50
- data/lib/moxml/node_set.rb +1 -1
- data/lib/moxml/text.rb +6 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +44 -22
- data/lib/moxml/xpath/parser.rb +12 -7
- data/lib/moxml.rb +1 -0
- data/scripts/format_xml.rb +16 -0
- data/scripts/pretty_format_xml.rb +14 -0
- data/spec/consistency/round_trip_spec.rb +3 -30
- data/spec/integration/all_adapters_spec.rb +2 -0
- data/spec/integration/headed_ox_integration_spec.rb +0 -2
- data/spec/integration/shared_examples/edge_cases.rb +3 -9
- data/spec/integration/shared_examples/entity_reference_whitespace.rb +122 -0
- data/spec/integration/shared_examples/integration_workflows.rb +3 -3
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +0 -7
- data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
- data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +135 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +0 -3
- data/spec/moxml/adapter/entity_restoration_spec.rb +97 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
- data/spec/moxml/builder_spec.rb +249 -0
- data/spec/moxml/entity_preservation_spec.rb +130 -0
- data/spec/moxml/entity_reference_spec.rb +114 -0
- data/spec/moxml/entity_registry_spec.rb +68 -0
- data/spec/moxml/xpath/axes_spec.rb +0 -1
- data/spec/moxml/xpath/compiler_spec.rb +0 -2
- data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
- data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
- data/spec/performance/memory_usage_spec.rb +0 -4
- metadata +10 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bbd69145e9a360635af848bf0bdda2883e35760b2763021f6bf6f1d6dca9827e
|
|
4
|
+
data.tar.gz: aa492e21514fd80a01f98709eddf8c3aa323b584210d56534ad5e2c2b467df18
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1cdb7d6c934f1ea788a40d81d987c97d4c1fc21ad71d22eaac73abf45d093680667f3303b35934378b8cce0d99e3fc9db47c85632678247426527d7fb3491bed
|
|
7
|
+
data.tar.gz: 79c352eb8df9b86831d554e17538abd4da8a6dfce61b4e566bc236334601e43bff7c670894ea05c72abac40bb1b3b90375ef8caaf6b488a9c43bc33fc70d6785
|
data/.gitignore
CHANGED
|
@@ -28,6 +28,9 @@ libxml_*.txt
|
|
|
28
28
|
# Generated benchmark reports (machine-specific)
|
|
29
29
|
/benchmarks/PERFORMANCE_REPORT.md
|
|
30
30
|
|
|
31
|
+
# Local TODO tracking (kept locally, not committed)
|
|
32
|
+
TODO*
|
|
33
|
+
|
|
31
34
|
# IDE and editor files
|
|
32
35
|
.vscode/
|
|
33
36
|
.idea/
|
|
@@ -49,3 +52,6 @@ libxml_*.txt
|
|
|
49
52
|
/_site
|
|
50
53
|
/docs/_site
|
|
51
54
|
/docs/.jekyll-cache
|
|
55
|
+
|
|
56
|
+
# Utility scripts (local only)
|
|
57
|
+
/scripts/
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,97 +1,46 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-04-
|
|
3
|
+
# on 2026-04-23 07:48:23 UTC using RuboCop version 1.86.0.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
|
8
8
|
|
|
9
|
-
# Offense count:
|
|
10
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
11
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
12
|
-
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
13
|
-
Layout/ArgumentAlignment:
|
|
14
|
-
Exclude:
|
|
15
|
-
- 'spec/moxml/allocation_benchmark_spec.rb'
|
|
16
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
17
|
-
|
|
18
|
-
# Offense count: 14
|
|
9
|
+
# Offense count: 4
|
|
19
10
|
# This cop supports safe autocorrection (--autocorrect).
|
|
20
|
-
|
|
21
|
-
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
22
|
-
Layout/BlockAlignment:
|
|
11
|
+
Layout/EmptyLineAfterGuardClause:
|
|
23
12
|
Exclude:
|
|
24
|
-
- 'lib/moxml/adapter/
|
|
25
|
-
- '
|
|
26
|
-
- '
|
|
27
|
-
- 'spec/moxml/lazy_parse_spec.rb'
|
|
28
|
-
- 'spec/moxml/node_cache_spec.rb'
|
|
13
|
+
- 'lib/moxml/adapter/customized_rexml/formatter.rb'
|
|
14
|
+
- 'lib/moxml/adapter/libxml.rb'
|
|
15
|
+
- 'lib/moxml/entity_registry.rb'
|
|
29
16
|
|
|
30
|
-
# Offense count:
|
|
17
|
+
# Offense count: 1
|
|
31
18
|
# This cop supports safe autocorrection (--autocorrect).
|
|
32
|
-
|
|
19
|
+
# Configuration parameters: EmptyLineBetweenMethodDefs, EmptyLineBetweenClassDefs, EmptyLineBetweenModuleDefs, DefLikeMacros, AllowAdjacentOneLineDefs, NumberOfEmptyLines.
|
|
20
|
+
Layout/EmptyLineBetweenDefs:
|
|
33
21
|
Exclude:
|
|
34
22
|
- 'lib/moxml/adapter/ox.rb'
|
|
35
|
-
- 'spec/moxml/allocation_benchmark_spec.rb'
|
|
36
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
37
|
-
- 'spec/moxml/lazy_parse_spec.rb'
|
|
38
|
-
- 'spec/moxml/node_cache_spec.rb'
|
|
39
23
|
|
|
40
|
-
# Offense count:
|
|
41
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
42
|
-
Layout/ClosingParenthesisIndentation:
|
|
43
|
-
Exclude:
|
|
44
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
45
|
-
|
|
46
|
-
# Offense count: 3
|
|
47
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
48
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
49
|
-
# SupportedStyles: consistent, consistent_relative_to_receiver, special_for_inner_method_call, special_for_inner_method_call_in_parentheses
|
|
50
|
-
Layout/FirstArgumentIndentation:
|
|
51
|
-
Exclude:
|
|
52
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
53
|
-
|
|
54
|
-
# Offense count: 13
|
|
24
|
+
# Offense count: 1
|
|
55
25
|
# This cop supports safe autocorrection (--autocorrect).
|
|
56
|
-
|
|
57
|
-
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
58
|
-
Layout/IndentationWidth:
|
|
26
|
+
Layout/EmptyLines:
|
|
59
27
|
Exclude:
|
|
60
28
|
- 'lib/moxml/adapter/ox.rb'
|
|
61
|
-
- 'spec/moxml/allocation_benchmark_spec.rb'
|
|
62
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
63
|
-
- 'spec/moxml/lazy_parse_spec.rb'
|
|
64
|
-
- 'spec/moxml/node_cache_spec.rb'
|
|
65
29
|
|
|
66
|
-
# Offense count:
|
|
30
|
+
# Offense count: 330
|
|
67
31
|
# This cop supports safe autocorrection (--autocorrect).
|
|
68
32
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
69
33
|
# URISchemes: http, https
|
|
70
34
|
Layout/LineLength:
|
|
71
35
|
Enabled: false
|
|
72
36
|
|
|
73
|
-
# Offense count:
|
|
74
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
75
|
-
Layout/MultilineBlockLayout:
|
|
76
|
-
Exclude:
|
|
77
|
-
- 'spec/moxml/allocation_benchmark_spec.rb'
|
|
78
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
79
|
-
|
|
80
|
-
# Offense count: 3
|
|
81
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
82
|
-
# Configuration parameters: EnforcedStyle.
|
|
83
|
-
# SupportedStyles: symmetrical, new_line, same_line
|
|
84
|
-
Layout/MultilineMethodCallBraceLayout:
|
|
85
|
-
Exclude:
|
|
86
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
87
|
-
|
|
88
|
-
# Offense count: 3
|
|
37
|
+
# Offense count: 1
|
|
89
38
|
# This cop supports safe autocorrection (--autocorrect).
|
|
90
|
-
# Configuration parameters:
|
|
91
|
-
|
|
39
|
+
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
40
|
+
# SupportedStyles: aligned, indented
|
|
41
|
+
Layout/MultilineOperationIndentation:
|
|
92
42
|
Exclude:
|
|
93
|
-
- '
|
|
94
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
43
|
+
- 'lib/moxml/adapter/ox.rb'
|
|
95
44
|
|
|
96
45
|
# Offense count: 7
|
|
97
46
|
# Configuration parameters: AllowedMethods.
|
|
@@ -101,7 +50,7 @@ Lint/ConstantDefinitionInBlock:
|
|
|
101
50
|
- 'spec/moxml/declaration_preservation_spec.rb'
|
|
102
51
|
- 'spec/moxml/sax_spec.rb'
|
|
103
52
|
|
|
104
|
-
# Offense count:
|
|
53
|
+
# Offense count: 10
|
|
105
54
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
|
106
55
|
Lint/DuplicateBranch:
|
|
107
56
|
Exclude:
|
|
@@ -110,8 +59,9 @@ Lint/DuplicateBranch:
|
|
|
110
59
|
- 'lib/moxml/adapter/libxml.rb'
|
|
111
60
|
- 'lib/moxml/adapter/ox.rb'
|
|
112
61
|
- 'lib/moxml/document.rb'
|
|
62
|
+
- 'lib/moxml/entity_registry.rb'
|
|
113
63
|
|
|
114
|
-
# Offense count:
|
|
64
|
+
# Offense count: 5
|
|
115
65
|
Lint/DuplicateMethods:
|
|
116
66
|
Exclude:
|
|
117
67
|
- 'lib/moxml/config.rb'
|
|
@@ -138,6 +88,11 @@ Lint/EmptyWhen:
|
|
|
138
88
|
Exclude:
|
|
139
89
|
- 'lib/moxml/xpath/compiler.rb'
|
|
140
90
|
|
|
91
|
+
# Offense count: 3
|
|
92
|
+
Lint/HashCompareByIdentity:
|
|
93
|
+
Exclude:
|
|
94
|
+
- 'lib/moxml/native_attachment.rb'
|
|
95
|
+
|
|
141
96
|
# Offense count: 1
|
|
142
97
|
Lint/IneffectiveAccessModifier:
|
|
143
98
|
Exclude:
|
|
@@ -156,28 +111,28 @@ Lint/NoReturnInBeginEndBlocks:
|
|
|
156
111
|
Exclude:
|
|
157
112
|
- 'examples/api_client/api_client.rb'
|
|
158
113
|
|
|
159
|
-
# Offense count:
|
|
114
|
+
# Offense count: 104
|
|
160
115
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
161
116
|
Metrics/AbcSize:
|
|
162
117
|
Enabled: false
|
|
163
118
|
|
|
164
|
-
# Offense count:
|
|
119
|
+
# Offense count: 8
|
|
165
120
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
|
|
166
121
|
# AllowedMethods: refine
|
|
167
122
|
Metrics/BlockLength:
|
|
168
123
|
Max: 90
|
|
169
124
|
|
|
170
|
-
# Offense count:
|
|
125
|
+
# Offense count: 7
|
|
171
126
|
# Configuration parameters: CountBlocks, CountModifierForms.
|
|
172
127
|
Metrics/BlockNesting:
|
|
173
128
|
Max: 4
|
|
174
129
|
|
|
175
|
-
# Offense count:
|
|
130
|
+
# Offense count: 76
|
|
176
131
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
177
132
|
Metrics/CyclomaticComplexity:
|
|
178
133
|
Enabled: false
|
|
179
134
|
|
|
180
|
-
# Offense count:
|
|
135
|
+
# Offense count: 186
|
|
181
136
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
182
137
|
Metrics/MethodLength:
|
|
183
138
|
Max: 110
|
|
@@ -187,19 +142,11 @@ Metrics/MethodLength:
|
|
|
187
142
|
Metrics/ParameterLists:
|
|
188
143
|
Max: 7
|
|
189
144
|
|
|
190
|
-
# Offense count:
|
|
145
|
+
# Offense count: 52
|
|
191
146
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
192
147
|
Metrics/PerceivedComplexity:
|
|
193
148
|
Enabled: false
|
|
194
149
|
|
|
195
|
-
# Offense count: 2
|
|
196
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
197
|
-
# Configuration parameters: EnforcedStyleForLeadingUnderscores.
|
|
198
|
-
# SupportedStylesForLeadingUnderscores: disallowed, required, optional
|
|
199
|
-
Naming/MemoizedInstanceVariableName:
|
|
200
|
-
Exclude:
|
|
201
|
-
- 'lib/moxml/element.rb'
|
|
202
|
-
|
|
203
150
|
# Offense count: 16
|
|
204
151
|
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
|
205
152
|
# AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
|
|
@@ -233,12 +180,6 @@ Naming/VariableNumber:
|
|
|
233
180
|
- 'spec/moxml/allocation_guard_spec.rb'
|
|
234
181
|
- 'spec/support/allocation_helper.rb'
|
|
235
182
|
|
|
236
|
-
# Offense count: 1
|
|
237
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
238
|
-
Performance/TimesMap:
|
|
239
|
-
Exclude:
|
|
240
|
-
- 'spec/support/allocation_helper.rb'
|
|
241
|
-
|
|
242
183
|
# Offense count: 5
|
|
243
184
|
RSpec/BeforeAfterAll:
|
|
244
185
|
Exclude:
|
|
@@ -264,12 +205,12 @@ RSpec/ContextWording:
|
|
|
264
205
|
- 'spec/moxml/xpath/parser_spec.rb'
|
|
265
206
|
- 'spec/performance/benchmark_spec.rb'
|
|
266
207
|
|
|
267
|
-
# Offense count:
|
|
208
|
+
# Offense count: 24
|
|
268
209
|
# Configuration parameters: IgnoredMetadata.
|
|
269
210
|
RSpec/DescribeClass:
|
|
270
211
|
Enabled: false
|
|
271
212
|
|
|
272
|
-
# Offense count:
|
|
213
|
+
# Offense count: 295
|
|
273
214
|
# Configuration parameters: CountAsOne.
|
|
274
215
|
RSpec/ExampleLength:
|
|
275
216
|
Max: 64
|
|
@@ -305,7 +246,7 @@ RSpec/LeakyConstantDeclaration:
|
|
|
305
246
|
RSpec/MessageSpies:
|
|
306
247
|
EnforcedStyle: receive
|
|
307
248
|
|
|
308
|
-
# Offense count:
|
|
249
|
+
# Offense count: 390
|
|
309
250
|
RSpec/MultipleExpectations:
|
|
310
251
|
Max: 10
|
|
311
252
|
|
|
@@ -326,12 +267,6 @@ RSpec/NoExpectationExample:
|
|
|
326
267
|
Exclude:
|
|
327
268
|
- 'spec/performance/xpath_benchmark_spec.rb'
|
|
328
269
|
|
|
329
|
-
# Offense count: 6
|
|
330
|
-
RSpec/PendingWithoutReason:
|
|
331
|
-
Exclude:
|
|
332
|
-
- 'spec/moxml/xpath/functions/position_functions_spec.rb'
|
|
333
|
-
- 'spec/moxml/xpath/functions/special_functions_spec.rb'
|
|
334
|
-
|
|
335
270
|
# Offense count: 4
|
|
336
271
|
RSpec/RepeatedExample:
|
|
337
272
|
Exclude:
|
|
@@ -371,21 +306,6 @@ Security/Eval:
|
|
|
371
306
|
Exclude:
|
|
372
307
|
- 'spec/moxml/xpath/ruby/generator_spec.rb'
|
|
373
308
|
|
|
374
|
-
# Offense count: 11
|
|
375
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
376
|
-
# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
|
|
377
|
-
# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
|
|
378
|
-
# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
|
|
379
|
-
# FunctionalMethods: let, let!, subject, watch
|
|
380
|
-
# AllowedMethods: lambda, proc, it
|
|
381
|
-
Style/BlockDelimiters:
|
|
382
|
-
Exclude:
|
|
383
|
-
- 'lib/moxml/adapter/ox.rb'
|
|
384
|
-
- 'spec/moxml/allocation_benchmark_spec.rb'
|
|
385
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
386
|
-
- 'spec/moxml/lazy_parse_spec.rb'
|
|
387
|
-
- 'spec/moxml/node_cache_spec.rb'
|
|
388
|
-
|
|
389
309
|
# Offense count: 1
|
|
390
310
|
Style/DocumentDynamicEvalDefinition:
|
|
391
311
|
Exclude:
|
|
@@ -404,25 +324,17 @@ Style/HashLikeCase:
|
|
|
404
324
|
- 'lib/moxml/adapter/customized_rexml/formatter.rb'
|
|
405
325
|
- 'lib/moxml/adapter/ox.rb'
|
|
406
326
|
|
|
407
|
-
# Offense count: 1
|
|
408
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
409
|
-
Style/MapToHash:
|
|
410
|
-
Exclude:
|
|
411
|
-
- 'spec/moxml/node_cache_spec.rb'
|
|
412
|
-
|
|
413
327
|
# Offense count: 1
|
|
414
328
|
Style/MissingRespondToMissing:
|
|
415
329
|
Exclude:
|
|
416
330
|
- 'lib/moxml/xpath/ruby/node.rb'
|
|
417
331
|
|
|
418
332
|
# Offense count: 1
|
|
419
|
-
# This cop supports
|
|
420
|
-
# Configuration parameters:
|
|
421
|
-
|
|
422
|
-
Style/NumericPredicate:
|
|
333
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
334
|
+
# Configuration parameters: AllowMethodComparison, ComparisonsThreshold.
|
|
335
|
+
Style/MultipleComparison:
|
|
423
336
|
Exclude:
|
|
424
|
-
- '
|
|
425
|
-
- 'lib/moxml/node_set.rb'
|
|
337
|
+
- 'lib/moxml/xpath/compiler.rb'
|
|
426
338
|
|
|
427
339
|
# Offense count: 5
|
|
428
340
|
# Configuration parameters: AllowedClasses.
|
|
@@ -440,16 +352,20 @@ Style/OptionalBooleanParameter:
|
|
|
440
352
|
- 'lib/moxml/adapter/libxml.rb'
|
|
441
353
|
- 'lib/moxml/xpath/compiler.rb'
|
|
442
354
|
|
|
443
|
-
# Offense count:
|
|
444
|
-
# This cop supports
|
|
445
|
-
Style/
|
|
355
|
+
# Offense count: 2
|
|
356
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
357
|
+
Style/RedundantAssignment:
|
|
446
358
|
Exclude:
|
|
447
359
|
- 'lib/moxml/adapter/ox.rb'
|
|
448
360
|
|
|
449
361
|
# Offense count: 1
|
|
450
362
|
# This cop supports safe autocorrection (--autocorrect).
|
|
451
|
-
|
|
452
|
-
# SupportedStyles: single_quotes, double_quotes
|
|
453
|
-
Style/StringLiterals:
|
|
363
|
+
Style/RedundantConstantBase:
|
|
454
364
|
Exclude:
|
|
455
|
-
- 'spec/moxml/
|
|
365
|
+
- 'spec/moxml/adapter/headed_ox_spec.rb'
|
|
366
|
+
|
|
367
|
+
# Offense count: 1
|
|
368
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
369
|
+
Style/SelectByKind:
|
|
370
|
+
Exclude:
|
|
371
|
+
- 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
|
data/README.adoc
CHANGED
|
@@ -887,6 +887,18 @@ The Ox adapter provides maximum parsing speed but has XPath limitations.
|
|
|
887
887
|
doc.xpath("//book").find { |book| book["id"] == "123" }
|
|
888
888
|
----
|
|
889
889
|
|
|
890
|
+
**Upstream Ox gem limitations:**
|
|
891
|
+
|
|
892
|
+
These limitations exist in the Ox gem itself and cannot be worked around in Moxml
|
|
893
|
+
without changes to the Ox C extension:
|
|
894
|
+
|
|
895
|
+
* *Namespace introspection* — Ox stores `xmlns` attributes but does not expose
|
|
896
|
+
namespace accessors on `Ox::Element`. Methods like `node.namespace`,
|
|
897
|
+
`node.namespaces`, and namespace inheritance are unavailable.
|
|
898
|
+
* *Parent node reparenting* — Ox has no method to change a node's parent after
|
|
899
|
+
creation, preventing `node.parent=` functionality. Nodes are immutable with
|
|
900
|
+
respect to their parent relationship.
|
|
901
|
+
|
|
890
902
|
For complete Ox adapter documentation including all limitations and workarounds,
|
|
891
903
|
see link:docs/_pages/adapters/ox.adoc[Ox Adapter Guide].
|
|
892
904
|
|
|
@@ -912,6 +924,12 @@ comprehensive pure Ruby XPath 1.0 engine.
|
|
|
912
924
|
* Prefer pure Ruby XPath for debugging
|
|
913
925
|
* Basic namespace queries are sufficient
|
|
914
926
|
|
|
927
|
+
**Inherited Ox limitations:**
|
|
928
|
+
|
|
929
|
+
HeadedOx inherits the upstream Ox gem limitations described above (namespace
|
|
930
|
+
introspection and parent node reparenting). Additionally, some sibling axes
|
|
931
|
+
are not fully supported due to Ox's tree structure.
|
|
932
|
+
|
|
915
933
|
[source,ruby]
|
|
916
934
|
----
|
|
917
935
|
# Use HeadedOx adapter
|
data/Rakefile
CHANGED
|
@@ -10,6 +10,37 @@ require "rubocop/rake_task"
|
|
|
10
10
|
RuboCop::RakeTask.new
|
|
11
11
|
|
|
12
12
|
namespace :spec do
|
|
13
|
+
desc "Validate XML fixtures are well-formed (requires xmllint)"
|
|
14
|
+
task :validate_fixtures do
|
|
15
|
+
fixtures = Dir.glob("spec/fixtures/**/*.xml")
|
|
16
|
+
if fixtures.empty?
|
|
17
|
+
abort "No XML fixtures found in spec/fixtures/"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
unless system("which xmllint > /dev/null 2>&1")
|
|
21
|
+
abort "xmllint not found. Install with: brew install libxml2 (macOS) or apt install libxml2-utils (Linux)"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Intentionally malformed fixtures (W3C test cases for error handling)
|
|
25
|
+
exemptions = %w[
|
|
26
|
+
spec/fixtures/w3c/namespaces/1.0/035.xml
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
errors = []
|
|
30
|
+
fixtures.each do |path|
|
|
31
|
+
next if exemptions.include?(path)
|
|
32
|
+
|
|
33
|
+
output = `xmllint --noout "#{path}" 2>&1`
|
|
34
|
+
errors << "#{path}: #{output.strip}" unless $?.success?
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
if errors.empty?
|
|
38
|
+
puts "#{fixtures.size} XML fixtures validated OK"
|
|
39
|
+
else
|
|
40
|
+
abort "Invalid fixtures:\n#{errors.join("\n")}"
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
13
44
|
desc "Run unit tests only"
|
|
14
45
|
RSpec::Core::RakeTask.new(:unit) do |t|
|
|
15
46
|
t.pattern = "spec/unit/**/*_spec.rb"
|
|
@@ -488,7 +488,7 @@ class MoxmlBenchmarkReport
|
|
|
488
488
|
f.puts "- Can accept 99.20% pass rate (16 documented Ox limitations)"
|
|
489
489
|
f.puts ""
|
|
490
490
|
f.puts "**Note:** HeadedOx = Ox parsing speed + full XPath features."
|
|
491
|
-
f.puts "See docs/
|
|
491
|
+
f.puts "See docs/_pages/headed-ox-limitations.adoc for complete details."
|
|
492
492
|
f.puts ""
|
|
493
493
|
end
|
|
494
494
|
|
data/lib/moxml/adapter/base.rb
CHANGED
|
@@ -8,9 +8,54 @@ module Moxml
|
|
|
8
8
|
class Base
|
|
9
9
|
# include XmlUtils
|
|
10
10
|
|
|
11
|
+
# Entity marker for adapters that resolve entities during parsing.
|
|
12
|
+
# U+FFFC (Object Replacement Character) + U+FEFF (BOM) is a two-character
|
|
13
|
+
# sentinel chosen because this exact sequence followed by a valid entity
|
|
14
|
+
# name pattern is vanishingly unlikely in real XML content.
|
|
15
|
+
# Non-standard entities like © are converted to this marker before
|
|
16
|
+
# parsing, then restored during serialization.
|
|
17
|
+
# Standard XML entities (& < > " ') are NOT converted.
|
|
18
|
+
ENTITY_MARKER = "\u{FFFC}\u{FEFF}"
|
|
19
|
+
ENTITY_NAME_PATTERN = "[a-zA-Z_][\\w.:-]*"
|
|
20
|
+
ENTITY_NAME_RE = /&(#{ENTITY_NAME_PATTERN});/
|
|
21
|
+
ENTITY_MARKER_RE = /\u{FFFC}\u{FEFF}(#{ENTITY_NAME_PATTERN});/
|
|
22
|
+
SERIALIZED_ENTITY_MARKER_RE = /(#{ENTITY_NAME_PATTERN});/
|
|
23
|
+
STANDARD_ENTITIES = %w[amp lt gt quot apos].freeze
|
|
24
|
+
|
|
11
25
|
class << self
|
|
12
26
|
include XmlUtils
|
|
13
27
|
|
|
28
|
+
# Replace non-standard entity references with markers before parsing.
|
|
29
|
+
# Always returns a UTF-8 encoded string.
|
|
30
|
+
def preprocess_entities(xml)
|
|
31
|
+
return "" if xml.nil?
|
|
32
|
+
|
|
33
|
+
str = if xml.encoding == Encoding::BINARY
|
|
34
|
+
# Binary strings are assumed to be UTF-8. If the bytes are
|
|
35
|
+
# not valid UTF-8, fall back to encoding as UTF-8 with
|
|
36
|
+
# replacement to avoid raising on gsub.
|
|
37
|
+
dup = xml.dup.force_encoding("UTF-8")
|
|
38
|
+
dup.valid_encoding? ? dup : xml.dup.encode("UTF-8", "ASCII-8BIT", invalid: :replace, undef: :replace)
|
|
39
|
+
elsif xml.encoding == Encoding::UTF_8
|
|
40
|
+
xml
|
|
41
|
+
else
|
|
42
|
+
xml.encode("UTF-8")
|
|
43
|
+
end
|
|
44
|
+
str.gsub(ENTITY_NAME_RE) do |match|
|
|
45
|
+
STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Restore entity markers back to named entity references.
|
|
50
|
+
def restore_entities(text)
|
|
51
|
+
return text unless text.is_a?(String)
|
|
52
|
+
|
|
53
|
+
# Force UTF-8 encoding since markers are UTF-8 characters
|
|
54
|
+
str = text.encoding == Encoding::UTF_8 ? text : text.dup.force_encoding("UTF-8")
|
|
55
|
+
result = str.gsub(ENTITY_MARKER_RE, '&\1;')
|
|
56
|
+
result.gsub(SERIALIZED_ENTITY_MARKER_RE, '&\1;')
|
|
57
|
+
end
|
|
58
|
+
|
|
14
59
|
def set_root(_doc, _element)
|
|
15
60
|
raise Moxml::NotImplementedError.new(
|
|
16
61
|
"set_root not implemented",
|
|
@@ -149,6 +194,40 @@ namespace_validation_mode: :strict)
|
|
|
149
194
|
node
|
|
150
195
|
end
|
|
151
196
|
|
|
197
|
+
# Check if the native document has an XML declaration
|
|
198
|
+
# @param native_doc the native document object
|
|
199
|
+
# @param wrapper [Moxml::Document] the wrapper with has_xml_declaration flag
|
|
200
|
+
# @return [Boolean]
|
|
201
|
+
def has_declaration?(_native_doc, wrapper)
|
|
202
|
+
wrapper.has_xml_declaration
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Return the actual native node after an add_child operation.
|
|
206
|
+
# Override for adapters where node identity may change (e.g., LibXML doc.root=).
|
|
207
|
+
def actual_native(child_native, _parent_native)
|
|
208
|
+
child_native
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Returns all namespaces in scope for this element, including
|
|
212
|
+
# inherited from ancestors. Adapters with native support (Nokogiri)
|
|
213
|
+
# override this. Default walks the ancestor chain.
|
|
214
|
+
def in_scope_namespaces(element)
|
|
215
|
+
namespaces = {}
|
|
216
|
+
node = element
|
|
217
|
+
|
|
218
|
+
while node
|
|
219
|
+
break unless node_type(node) == :element
|
|
220
|
+
|
|
221
|
+
namespace_definitions(node).each do |ns|
|
|
222
|
+
prefix = namespace_prefix(ns)
|
|
223
|
+
namespaces[prefix] = ns unless namespaces.key?(prefix)
|
|
224
|
+
end
|
|
225
|
+
node = parent(node)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
namespaces.values
|
|
229
|
+
end
|
|
230
|
+
|
|
152
231
|
protected
|
|
153
232
|
|
|
154
233
|
def create_native_element(_name, _owner_doc = nil)
|
|
@@ -222,14 +301,6 @@ namespace_validation_mode: :strict)
|
|
|
222
301
|
adapter: name,
|
|
223
302
|
)
|
|
224
303
|
end
|
|
225
|
-
|
|
226
|
-
def in_scope_namespaces(_element)
|
|
227
|
-
raise Moxml::NotImplementedError.new(
|
|
228
|
-
"in_scope_namespaces not implemented",
|
|
229
|
-
feature: "in_scope_namespaces",
|
|
230
|
-
adapter: name,
|
|
231
|
-
)
|
|
232
|
-
end
|
|
233
304
|
end
|
|
234
305
|
end
|
|
235
306
|
end
|
|
@@ -9,7 +9,7 @@ module Moxml
|
|
|
9
9
|
# are read-only after creation. This wrapper allows mutation by
|
|
10
10
|
# storing values internally and regenerating XML when needed.
|
|
11
11
|
class Declaration
|
|
12
|
-
attr_accessor :version, :encoding
|
|
12
|
+
attr_accessor :version, :encoding, :removed, :parent_doc
|
|
13
13
|
attr_reader :native
|
|
14
14
|
|
|
15
15
|
def initialize(native_doc, version = nil, encoding = nil,
|
|
@@ -7,12 +7,13 @@ module Moxml
|
|
|
7
7
|
module CustomizedRexml
|
|
8
8
|
# Custom REXML formatter that fixes indentation and wrapping issues
|
|
9
9
|
class Formatter < ::REXML::Formatters::Pretty
|
|
10
|
-
def initialize(indentation: 2, self_close_empty: false)
|
|
10
|
+
def initialize(indentation: 2, self_close_empty: false, adapter: nil)
|
|
11
11
|
@indentation = " " * indentation
|
|
12
12
|
@level = 0
|
|
13
13
|
@compact = true
|
|
14
14
|
@width = -1 # Disable line wrapping
|
|
15
15
|
@self_close_empty = self_close_empty
|
|
16
|
+
@adapter = adapter
|
|
16
17
|
end
|
|
17
18
|
|
|
18
19
|
def write(node, output)
|
|
@@ -31,7 +32,13 @@ module Moxml
|
|
|
31
32
|
output << "<#{node.expanded_name}"
|
|
32
33
|
write_attributes(node, output)
|
|
33
34
|
|
|
34
|
-
|
|
35
|
+
# Check for entity refs stored in adapter attachments
|
|
36
|
+
entity_refs = @adapter&.attachments&.get(node, :entity_refs)
|
|
37
|
+
child_sequence = @adapter&.attachments&.get(node, :child_sequence)
|
|
38
|
+
|
|
39
|
+
has_no_children = node.children.empty? && !(entity_refs && !entity_refs.empty?)
|
|
40
|
+
|
|
41
|
+
if has_no_children && @self_close_empty
|
|
35
42
|
output << "/>"
|
|
36
43
|
return
|
|
37
44
|
end
|
|
@@ -44,26 +51,41 @@ module Moxml
|
|
|
44
51
|
mixed = has_text && has_elements
|
|
45
52
|
|
|
46
53
|
# Handle children based on content type
|
|
47
|
-
|
|
54
|
+
all_children_empty = node.children.empty? && !(entity_refs && !entity_refs.empty?)
|
|
55
|
+
unless all_children_empty
|
|
48
56
|
@level += @indentation.length unless mixed
|
|
49
57
|
|
|
50
|
-
|
|
51
|
-
#
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
58
|
+
if entity_refs && !entity_refs.empty? && child_sequence
|
|
59
|
+
# Interleave native children with entity refs using tracked sequence
|
|
60
|
+
eref_idx = 0
|
|
61
|
+
native_idx = 0
|
|
62
|
+
child_sequence.each do |type|
|
|
63
|
+
case type
|
|
64
|
+
when :native
|
|
65
|
+
if native_idx < node.children.size
|
|
66
|
+
child = node.children[native_idx]
|
|
67
|
+
native_idx += 1
|
|
68
|
+
next if child.is_a?(::REXML::Text) &&
|
|
69
|
+
child.to_s.strip.empty? &&
|
|
70
|
+
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
71
|
+
write(child, output)
|
|
72
|
+
end
|
|
73
|
+
when :eref
|
|
74
|
+
if eref_idx < entity_refs.size
|
|
75
|
+
write(entity_refs[eref_idx], output)
|
|
76
|
+
eref_idx += 1
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
else
|
|
81
|
+
node.children.each_with_index do |child, _index|
|
|
82
|
+
# Skip insignificant whitespace
|
|
83
|
+
next if child.is_a?(::REXML::Text) &&
|
|
84
|
+
child.to_s.strip.empty? &&
|
|
85
|
+
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
86
|
+
|
|
87
|
+
write(child, output)
|
|
88
|
+
end
|
|
67
89
|
end
|
|
68
90
|
|
|
69
91
|
# Reset indentation for closing tag in non-mixed content
|