moxml 0.1.15 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +6 -0
  3. data/.rubocop_todo.yml +49 -133
  4. data/README.adoc +18 -0
  5. data/Rakefile +31 -0
  6. data/benchmarks/generate_report.rb +1 -1
  7. data/lib/moxml/adapter/base.rb +79 -8
  8. data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
  9. data/lib/moxml/adapter/customized_rexml/formatter.rb +42 -20
  10. data/lib/moxml/adapter/headed_ox.rb +30 -12
  11. data/lib/moxml/adapter/libxml.rb +181 -68
  12. data/lib/moxml/adapter/nokogiri.rb +33 -11
  13. data/lib/moxml/adapter/oga.rb +51 -96
  14. data/lib/moxml/adapter/ox.rb +79 -21
  15. data/lib/moxml/adapter/rexml.rb +64 -11
  16. data/lib/moxml/attribute.rb +7 -1
  17. data/lib/moxml/builder.rb +77 -24
  18. data/lib/moxml/config.rb +18 -1
  19. data/lib/moxml/declaration.rb +4 -2
  20. data/lib/moxml/document.rb +5 -2
  21. data/lib/moxml/document_builder.rb +9 -8
  22. data/lib/moxml/element.rb +22 -13
  23. data/lib/moxml/entity_registry.rb +16 -2
  24. data/lib/moxml/native_attachment.rb +65 -0
  25. data/lib/moxml/node.rb +21 -50
  26. data/lib/moxml/node_set.rb +1 -1
  27. data/lib/moxml/text.rb +6 -0
  28. data/lib/moxml/version.rb +1 -1
  29. data/lib/moxml/xpath/compiler.rb +44 -22
  30. data/lib/moxml/xpath/parser.rb +12 -7
  31. data/lib/moxml.rb +1 -0
  32. data/scripts/format_xml.rb +16 -0
  33. data/scripts/pretty_format_xml.rb +14 -0
  34. data/spec/consistency/round_trip_spec.rb +3 -30
  35. data/spec/integration/all_adapters_spec.rb +2 -0
  36. data/spec/integration/headed_ox_integration_spec.rb +0 -2
  37. data/spec/integration/shared_examples/edge_cases.rb +3 -9
  38. data/spec/integration/shared_examples/entity_reference_whitespace.rb +122 -0
  39. data/spec/integration/shared_examples/integration_workflows.rb +3 -3
  40. data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +0 -7
  41. data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
  42. data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +135 -0
  43. data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +0 -3
  44. data/spec/moxml/adapter/entity_restoration_spec.rb +97 -0
  45. data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
  46. data/spec/moxml/builder_spec.rb +249 -0
  47. data/spec/moxml/entity_preservation_spec.rb +130 -0
  48. data/spec/moxml/entity_reference_spec.rb +114 -0
  49. data/spec/moxml/entity_registry_spec.rb +68 -0
  50. data/spec/moxml/xpath/axes_spec.rb +0 -1
  51. data/spec/moxml/xpath/compiler_spec.rb +0 -2
  52. data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
  53. data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
  54. data/spec/performance/memory_usage_spec.rb +0 -4
  55. metadata +10 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 41df5c029544cf0136de3a87a60a1926df913b973b2653f7bb56c2e3725176b8
4
- data.tar.gz: 59ef11350181ac4b5ddec209128f26cc17e629cb778776b6a198df6853a5ce22
3
+ metadata.gz: bbd69145e9a360635af848bf0bdda2883e35760b2763021f6bf6f1d6dca9827e
4
+ data.tar.gz: aa492e21514fd80a01f98709eddf8c3aa323b584210d56534ad5e2c2b467df18
5
5
  SHA512:
6
- metadata.gz: c1e5e7f0a5a036bc900bd2d13522ff381cdb01fdc02d501aac6b328816bf4c2a953a0f47721665b1a209b9cdf89f29081a6410b286d6f4bdab7389b411e5475e
7
- data.tar.gz: 7d05e346a9b0e32da8de5e26b29500dee443fed6c356ba12a3dafc4247b9fb2be454a02d41105e370d91c363f6bfd8445fa79c664003fa1e8f7eca8adb8da31e
6
+ metadata.gz: 1cdb7d6c934f1ea788a40d81d987c97d4c1fc21ad71d22eaac73abf45d093680667f3303b35934378b8cce0d99e3fc9db47c85632678247426527d7fb3491bed
7
+ data.tar.gz: 79c352eb8df9b86831d554e17538abd4da8a6dfce61b4e566bc236334601e43bff7c670894ea05c72abac40bb1b3b90375ef8caaf6b488a9c43bc33fc70d6785
data/.gitignore CHANGED
@@ -28,6 +28,9 @@ libxml_*.txt
28
28
  # Generated benchmark reports (machine-specific)
29
29
  /benchmarks/PERFORMANCE_REPORT.md
30
30
 
31
+ # Local TODO tracking (kept locally, not committed)
32
+ TODO*
33
+
31
34
  # IDE and editor files
32
35
  .vscode/
33
36
  .idea/
@@ -49,3 +52,6 @@ libxml_*.txt
49
52
  /_site
50
53
  /docs/_site
51
54
  /docs/.jekyll-cache
55
+
56
+ # Utility scripts (local only)
57
+ /scripts/
data/.rubocop_todo.yml CHANGED
@@ -1,97 +1,46 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-04-22 01:41:34 UTC using RuboCop version 1.86.0.
3
+ # on 2026-04-23 07:48:23 UTC using RuboCop version 1.86.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 18
10
- # This cop supports safe autocorrection (--autocorrect).
11
- # Configuration parameters: EnforcedStyle, IndentationWidth.
12
- # SupportedStyles: with_first_argument, with_fixed_indentation
13
- Layout/ArgumentAlignment:
14
- Exclude:
15
- - 'spec/moxml/allocation_benchmark_spec.rb'
16
- - 'spec/moxml/allocation_guard_spec.rb'
17
-
18
- # Offense count: 14
9
+ # Offense count: 4
19
10
  # This cop supports safe autocorrection (--autocorrect).
20
- # Configuration parameters: EnforcedStyleAlignWith.
21
- # SupportedStylesAlignWith: either, start_of_block, start_of_line
22
- Layout/BlockAlignment:
11
+ Layout/EmptyLineAfterGuardClause:
23
12
  Exclude:
24
- - 'lib/moxml/adapter/ox.rb'
25
- - 'spec/moxml/allocation_benchmark_spec.rb'
26
- - 'spec/moxml/allocation_guard_spec.rb'
27
- - 'spec/moxml/lazy_parse_spec.rb'
28
- - 'spec/moxml/node_cache_spec.rb'
13
+ - 'lib/moxml/adapter/customized_rexml/formatter.rb'
14
+ - 'lib/moxml/adapter/libxml.rb'
15
+ - 'lib/moxml/entity_registry.rb'
29
16
 
30
- # Offense count: 7
17
+ # Offense count: 1
31
18
  # This cop supports safe autocorrection (--autocorrect).
32
- Layout/BlockEndNewline:
19
+ # Configuration parameters: EmptyLineBetweenMethodDefs, EmptyLineBetweenClassDefs, EmptyLineBetweenModuleDefs, DefLikeMacros, AllowAdjacentOneLineDefs, NumberOfEmptyLines.
20
+ Layout/EmptyLineBetweenDefs:
33
21
  Exclude:
34
22
  - 'lib/moxml/adapter/ox.rb'
35
- - 'spec/moxml/allocation_benchmark_spec.rb'
36
- - 'spec/moxml/allocation_guard_spec.rb'
37
- - 'spec/moxml/lazy_parse_spec.rb'
38
- - 'spec/moxml/node_cache_spec.rb'
39
23
 
40
- # Offense count: 3
41
- # This cop supports safe autocorrection (--autocorrect).
42
- Layout/ClosingParenthesisIndentation:
43
- Exclude:
44
- - 'spec/moxml/allocation_guard_spec.rb'
45
-
46
- # Offense count: 3
47
- # This cop supports safe autocorrection (--autocorrect).
48
- # Configuration parameters: EnforcedStyle, IndentationWidth.
49
- # SupportedStyles: consistent, consistent_relative_to_receiver, special_for_inner_method_call, special_for_inner_method_call_in_parentheses
50
- Layout/FirstArgumentIndentation:
51
- Exclude:
52
- - 'spec/moxml/allocation_guard_spec.rb'
53
-
54
- # Offense count: 13
24
+ # Offense count: 1
55
25
  # This cop supports safe autocorrection (--autocorrect).
56
- # Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
57
- # SupportedStylesAlignWith: start_of_line, relative_to_receiver
58
- Layout/IndentationWidth:
26
+ Layout/EmptyLines:
59
27
  Exclude:
60
28
  - 'lib/moxml/adapter/ox.rb'
61
- - 'spec/moxml/allocation_benchmark_spec.rb'
62
- - 'spec/moxml/allocation_guard_spec.rb'
63
- - 'spec/moxml/lazy_parse_spec.rb'
64
- - 'spec/moxml/node_cache_spec.rb'
65
29
 
66
- # Offense count: 307
30
+ # Offense count: 330
67
31
  # This cop supports safe autocorrection (--autocorrect).
68
32
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
69
33
  # URISchemes: http, https
70
34
  Layout/LineLength:
71
35
  Enabled: false
72
36
 
73
- # Offense count: 3
74
- # This cop supports safe autocorrection (--autocorrect).
75
- Layout/MultilineBlockLayout:
76
- Exclude:
77
- - 'spec/moxml/allocation_benchmark_spec.rb'
78
- - 'spec/moxml/allocation_guard_spec.rb'
79
-
80
- # Offense count: 3
81
- # This cop supports safe autocorrection (--autocorrect).
82
- # Configuration parameters: EnforcedStyle.
83
- # SupportedStyles: symmetrical, new_line, same_line
84
- Layout/MultilineMethodCallBraceLayout:
85
- Exclude:
86
- - 'spec/moxml/allocation_guard_spec.rb'
87
-
88
- # Offense count: 3
37
+ # Offense count: 1
89
38
  # This cop supports safe autocorrection (--autocorrect).
90
- # Configuration parameters: AllowInHeredoc.
91
- Layout/TrailingWhitespace:
39
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
40
+ # SupportedStyles: aligned, indented
41
+ Layout/MultilineOperationIndentation:
92
42
  Exclude:
93
- - 'spec/moxml/allocation_benchmark_spec.rb'
94
- - 'spec/moxml/allocation_guard_spec.rb'
43
+ - 'lib/moxml/adapter/ox.rb'
95
44
 
96
45
  # Offense count: 7
97
46
  # Configuration parameters: AllowedMethods.
@@ -101,7 +50,7 @@ Lint/ConstantDefinitionInBlock:
101
50
  - 'spec/moxml/declaration_preservation_spec.rb'
102
51
  - 'spec/moxml/sax_spec.rb'
103
52
 
104
- # Offense count: 8
53
+ # Offense count: 10
105
54
  # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
106
55
  Lint/DuplicateBranch:
107
56
  Exclude:
@@ -110,8 +59,9 @@ Lint/DuplicateBranch:
110
59
  - 'lib/moxml/adapter/libxml.rb'
111
60
  - 'lib/moxml/adapter/ox.rb'
112
61
  - 'lib/moxml/document.rb'
62
+ - 'lib/moxml/entity_registry.rb'
113
63
 
114
- # Offense count: 4
64
+ # Offense count: 5
115
65
  Lint/DuplicateMethods:
116
66
  Exclude:
117
67
  - 'lib/moxml/config.rb'
@@ -138,6 +88,11 @@ Lint/EmptyWhen:
138
88
  Exclude:
139
89
  - 'lib/moxml/xpath/compiler.rb'
140
90
 
91
+ # Offense count: 3
92
+ Lint/HashCompareByIdentity:
93
+ Exclude:
94
+ - 'lib/moxml/native_attachment.rb'
95
+
141
96
  # Offense count: 1
142
97
  Lint/IneffectiveAccessModifier:
143
98
  Exclude:
@@ -156,28 +111,28 @@ Lint/NoReturnInBeginEndBlocks:
156
111
  Exclude:
157
112
  - 'examples/api_client/api_client.rb'
158
113
 
159
- # Offense count: 100
114
+ # Offense count: 104
160
115
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
161
116
  Metrics/AbcSize:
162
117
  Enabled: false
163
118
 
164
- # Offense count: 7
119
+ # Offense count: 8
165
120
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
166
121
  # AllowedMethods: refine
167
122
  Metrics/BlockLength:
168
123
  Max: 90
169
124
 
170
- # Offense count: 5
125
+ # Offense count: 7
171
126
  # Configuration parameters: CountBlocks, CountModifierForms.
172
127
  Metrics/BlockNesting:
173
128
  Max: 4
174
129
 
175
- # Offense count: 70
130
+ # Offense count: 76
176
131
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
177
132
  Metrics/CyclomaticComplexity:
178
133
  Enabled: false
179
134
 
180
- # Offense count: 182
135
+ # Offense count: 186
181
136
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
182
137
  Metrics/MethodLength:
183
138
  Max: 110
@@ -187,19 +142,11 @@ Metrics/MethodLength:
187
142
  Metrics/ParameterLists:
188
143
  Max: 7
189
144
 
190
- # Offense count: 47
145
+ # Offense count: 52
191
146
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
192
147
  Metrics/PerceivedComplexity:
193
148
  Enabled: false
194
149
 
195
- # Offense count: 2
196
- # This cop supports unsafe autocorrection (--autocorrect-all).
197
- # Configuration parameters: EnforcedStyleForLeadingUnderscores.
198
- # SupportedStylesForLeadingUnderscores: disallowed, required, optional
199
- Naming/MemoizedInstanceVariableName:
200
- Exclude:
201
- - 'lib/moxml/element.rb'
202
-
203
150
  # Offense count: 16
204
151
  # Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
205
152
  # AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
@@ -233,12 +180,6 @@ Naming/VariableNumber:
233
180
  - 'spec/moxml/allocation_guard_spec.rb'
234
181
  - 'spec/support/allocation_helper.rb'
235
182
 
236
- # Offense count: 1
237
- # This cop supports unsafe autocorrection (--autocorrect-all).
238
- Performance/TimesMap:
239
- Exclude:
240
- - 'spec/support/allocation_helper.rb'
241
-
242
183
  # Offense count: 5
243
184
  RSpec/BeforeAfterAll:
244
185
  Exclude:
@@ -264,12 +205,12 @@ RSpec/ContextWording:
264
205
  - 'spec/moxml/xpath/parser_spec.rb'
265
206
  - 'spec/performance/benchmark_spec.rb'
266
207
 
267
- # Offense count: 23
208
+ # Offense count: 24
268
209
  # Configuration parameters: IgnoredMetadata.
269
210
  RSpec/DescribeClass:
270
211
  Enabled: false
271
212
 
272
- # Offense count: 271
213
+ # Offense count: 295
273
214
  # Configuration parameters: CountAsOne.
274
215
  RSpec/ExampleLength:
275
216
  Max: 64
@@ -305,7 +246,7 @@ RSpec/LeakyConstantDeclaration:
305
246
  RSpec/MessageSpies:
306
247
  EnforcedStyle: receive
307
248
 
308
- # Offense count: 356
249
+ # Offense count: 390
309
250
  RSpec/MultipleExpectations:
310
251
  Max: 10
311
252
 
@@ -326,12 +267,6 @@ RSpec/NoExpectationExample:
326
267
  Exclude:
327
268
  - 'spec/performance/xpath_benchmark_spec.rb'
328
269
 
329
- # Offense count: 6
330
- RSpec/PendingWithoutReason:
331
- Exclude:
332
- - 'spec/moxml/xpath/functions/position_functions_spec.rb'
333
- - 'spec/moxml/xpath/functions/special_functions_spec.rb'
334
-
335
270
  # Offense count: 4
336
271
  RSpec/RepeatedExample:
337
272
  Exclude:
@@ -371,21 +306,6 @@ Security/Eval:
371
306
  Exclude:
372
307
  - 'spec/moxml/xpath/ruby/generator_spec.rb'
373
308
 
374
- # Offense count: 11
375
- # This cop supports safe autocorrection (--autocorrect).
376
- # Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
377
- # SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
378
- # ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
379
- # FunctionalMethods: let, let!, subject, watch
380
- # AllowedMethods: lambda, proc, it
381
- Style/BlockDelimiters:
382
- Exclude:
383
- - 'lib/moxml/adapter/ox.rb'
384
- - 'spec/moxml/allocation_benchmark_spec.rb'
385
- - 'spec/moxml/allocation_guard_spec.rb'
386
- - 'spec/moxml/lazy_parse_spec.rb'
387
- - 'spec/moxml/node_cache_spec.rb'
388
-
389
309
  # Offense count: 1
390
310
  Style/DocumentDynamicEvalDefinition:
391
311
  Exclude:
@@ -404,25 +324,17 @@ Style/HashLikeCase:
404
324
  - 'lib/moxml/adapter/customized_rexml/formatter.rb'
405
325
  - 'lib/moxml/adapter/ox.rb'
406
326
 
407
- # Offense count: 1
408
- # This cop supports unsafe autocorrection (--autocorrect-all).
409
- Style/MapToHash:
410
- Exclude:
411
- - 'spec/moxml/node_cache_spec.rb'
412
-
413
327
  # Offense count: 1
414
328
  Style/MissingRespondToMissing:
415
329
  Exclude:
416
330
  - 'lib/moxml/xpath/ruby/node.rb'
417
331
 
418
332
  # Offense count: 1
419
- # This cop supports unsafe autocorrection (--autocorrect-all).
420
- # Configuration parameters: EnforcedStyle, AllowedMethods, AllowedPatterns.
421
- # SupportedStyles: predicate, comparison
422
- Style/NumericPredicate:
333
+ # This cop supports safe autocorrection (--autocorrect).
334
+ # Configuration parameters: AllowMethodComparison, ComparisonsThreshold.
335
+ Style/MultipleComparison:
423
336
  Exclude:
424
- - 'spec/**/*'
425
- - 'lib/moxml/node_set.rb'
337
+ - 'lib/moxml/xpath/compiler.rb'
426
338
 
427
339
  # Offense count: 5
428
340
  # Configuration parameters: AllowedClasses.
@@ -440,16 +352,20 @@ Style/OptionalBooleanParameter:
440
352
  - 'lib/moxml/adapter/libxml.rb'
441
353
  - 'lib/moxml/xpath/compiler.rb'
442
354
 
443
- # Offense count: 1
444
- # This cop supports unsafe autocorrection (--autocorrect-all).
445
- Style/SelectByKind:
355
+ # Offense count: 2
356
+ # This cop supports safe autocorrection (--autocorrect).
357
+ Style/RedundantAssignment:
446
358
  Exclude:
447
359
  - 'lib/moxml/adapter/ox.rb'
448
360
 
449
361
  # Offense count: 1
450
362
  # This cop supports safe autocorrection (--autocorrect).
451
- # Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
452
- # SupportedStyles: single_quotes, double_quotes
453
- Style/StringLiterals:
363
+ Style/RedundantConstantBase:
454
364
  Exclude:
455
- - 'spec/moxml/lazy_parse_spec.rb'
365
+ - 'spec/moxml/adapter/headed_ox_spec.rb'
366
+
367
+ # Offense count: 1
368
+ # This cop supports unsafe autocorrection (--autocorrect-all).
369
+ Style/SelectByKind:
370
+ Exclude:
371
+ - 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
data/README.adoc CHANGED
@@ -887,6 +887,18 @@ The Ox adapter provides maximum parsing speed but has XPath limitations.
887
887
  doc.xpath("//book").find { |book| book["id"] == "123" }
888
888
  ----
889
889
 
890
+ **Upstream Ox gem limitations:**
891
+
892
+ These limitations exist in the Ox gem itself and cannot be worked around in Moxml
893
+ without changes to the Ox C extension:
894
+
895
+ * *Namespace introspection* — Ox stores `xmlns` attributes but does not expose
896
+ namespace accessors on `Ox::Element`. Methods like `node.namespace`,
897
+ `node.namespaces`, and namespace inheritance are unavailable.
898
+ * *Parent node reparenting* — Ox has no method to change a node's parent after
899
+ creation, preventing `node.parent=` functionality. Nodes are immutable with
900
+ respect to their parent relationship.
901
+
890
902
  For complete Ox adapter documentation including all limitations and workarounds,
891
903
  see link:docs/_pages/adapters/ox.adoc[Ox Adapter Guide].
892
904
 
@@ -912,6 +924,12 @@ comprehensive pure Ruby XPath 1.0 engine.
912
924
  * Prefer pure Ruby XPath for debugging
913
925
  * Basic namespace queries are sufficient
914
926
 
927
+ **Inherited Ox limitations:**
928
+
929
+ HeadedOx inherits the upstream Ox gem limitations described above (namespace
930
+ introspection and parent node reparenting). Additionally, some sibling axes
931
+ are not fully supported due to Ox's tree structure.
932
+
915
933
  [source,ruby]
916
934
  ----
917
935
  # Use HeadedOx adapter
data/Rakefile CHANGED
@@ -10,6 +10,37 @@ require "rubocop/rake_task"
10
10
  RuboCop::RakeTask.new
11
11
 
12
12
  namespace :spec do
13
+ desc "Validate XML fixtures are well-formed (requires xmllint)"
14
+ task :validate_fixtures do
15
+ fixtures = Dir.glob("spec/fixtures/**/*.xml")
16
+ if fixtures.empty?
17
+ abort "No XML fixtures found in spec/fixtures/"
18
+ end
19
+
20
+ unless system("which xmllint > /dev/null 2>&1")
21
+ abort "xmllint not found. Install with: brew install libxml2 (macOS) or apt install libxml2-utils (Linux)"
22
+ end
23
+
24
+ # Intentionally malformed fixtures (W3C test cases for error handling)
25
+ exemptions = %w[
26
+ spec/fixtures/w3c/namespaces/1.0/035.xml
27
+ ]
28
+
29
+ errors = []
30
+ fixtures.each do |path|
31
+ next if exemptions.include?(path)
32
+
33
+ output = `xmllint --noout "#{path}" 2>&1`
34
+ errors << "#{path}: #{output.strip}" unless $?.success?
35
+ end
36
+
37
+ if errors.empty?
38
+ puts "#{fixtures.size} XML fixtures validated OK"
39
+ else
40
+ abort "Invalid fixtures:\n#{errors.join("\n")}"
41
+ end
42
+ end
43
+
13
44
  desc "Run unit tests only"
14
45
  RSpec::Core::RakeTask.new(:unit) do |t|
15
46
  t.pattern = "spec/unit/**/*_spec.rb"
@@ -488,7 +488,7 @@ class MoxmlBenchmarkReport
488
488
  f.puts "- Can accept 99.20% pass rate (16 documented Ox limitations)"
489
489
  f.puts ""
490
490
  f.puts "**Note:** HeadedOx = Ox parsing speed + full XPath features."
491
- f.puts "See docs/HEADED_OX_LIMITATIONS.md for complete details."
491
+ f.puts "See docs/_pages/headed-ox-limitations.adoc for complete details."
492
492
  f.puts ""
493
493
  end
494
494
 
@@ -8,9 +8,54 @@ module Moxml
8
8
  class Base
9
9
  # include XmlUtils
10
10
 
11
+ # Entity marker for adapters that resolve entities during parsing.
12
+ # U+FFFC (Object Replacement Character) + U+FEFF (BOM) is a two-character
13
+ # sentinel chosen because this exact sequence followed by a valid entity
14
+ # name pattern is vanishingly unlikely in real XML content.
15
+ # Non-standard entities like &copy; are converted to this marker before
16
+ # parsing, then restored during serialization.
17
+ # Standard XML entities (&amp; &lt; &gt; &quot; &apos;) are NOT converted.
18
+ ENTITY_MARKER = "\u{FFFC}\u{FEFF}"
19
+ ENTITY_NAME_PATTERN = "[a-zA-Z_][\\w.:-]*"
20
+ ENTITY_NAME_RE = /&(#{ENTITY_NAME_PATTERN});/
21
+ ENTITY_MARKER_RE = /\u{FFFC}\u{FEFF}(#{ENTITY_NAME_PATTERN});/
22
+ SERIALIZED_ENTITY_MARKER_RE = /&#xFFFC;&#xFEFF;(#{ENTITY_NAME_PATTERN});/
23
+ STANDARD_ENTITIES = %w[amp lt gt quot apos].freeze
24
+
11
25
  class << self
12
26
  include XmlUtils
13
27
 
28
+ # Replace non-standard entity references with markers before parsing.
29
+ # Always returns a UTF-8 encoded string.
30
+ def preprocess_entities(xml)
31
+ return "" if xml.nil?
32
+
33
+ str = if xml.encoding == Encoding::BINARY
34
+ # Binary strings are assumed to be UTF-8. If the bytes are
35
+ # not valid UTF-8, fall back to encoding as UTF-8 with
36
+ # replacement to avoid raising on gsub.
37
+ dup = xml.dup.force_encoding("UTF-8")
38
+ dup.valid_encoding? ? dup : xml.dup.encode("UTF-8", "ASCII-8BIT", invalid: :replace, undef: :replace)
39
+ elsif xml.encoding == Encoding::UTF_8
40
+ xml
41
+ else
42
+ xml.encode("UTF-8")
43
+ end
44
+ str.gsub(ENTITY_NAME_RE) do |match|
45
+ STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
46
+ end
47
+ end
48
+
49
+ # Restore entity markers back to named entity references.
50
+ def restore_entities(text)
51
+ return text unless text.is_a?(String)
52
+
53
+ # Force UTF-8 encoding since markers are UTF-8 characters
54
+ str = text.encoding == Encoding::UTF_8 ? text : text.dup.force_encoding("UTF-8")
55
+ result = str.gsub(ENTITY_MARKER_RE, '&\1;')
56
+ result.gsub(SERIALIZED_ENTITY_MARKER_RE, '&\1;')
57
+ end
58
+
14
59
  def set_root(_doc, _element)
15
60
  raise Moxml::NotImplementedError.new(
16
61
  "set_root not implemented",
@@ -149,6 +194,40 @@ namespace_validation_mode: :strict)
149
194
  node
150
195
  end
151
196
 
197
+ # Check if the native document has an XML declaration
198
+ # @param native_doc the native document object
199
+ # @param wrapper [Moxml::Document] the wrapper with has_xml_declaration flag
200
+ # @return [Boolean]
201
+ def has_declaration?(_native_doc, wrapper)
202
+ wrapper.has_xml_declaration
203
+ end
204
+
205
+ # Return the actual native node after an add_child operation.
206
+ # Override for adapters where node identity may change (e.g., LibXML doc.root=).
207
+ def actual_native(child_native, _parent_native)
208
+ child_native
209
+ end
210
+
211
+ # Returns all namespaces in scope for this element, including
212
+ # inherited from ancestors. Adapters with native support (Nokogiri)
213
+ # override this. Default walks the ancestor chain.
214
+ def in_scope_namespaces(element)
215
+ namespaces = {}
216
+ node = element
217
+
218
+ while node
219
+ break unless node_type(node) == :element
220
+
221
+ namespace_definitions(node).each do |ns|
222
+ prefix = namespace_prefix(ns)
223
+ namespaces[prefix] = ns unless namespaces.key?(prefix)
224
+ end
225
+ node = parent(node)
226
+ end
227
+
228
+ namespaces.values
229
+ end
230
+
152
231
  protected
153
232
 
154
233
  def create_native_element(_name, _owner_doc = nil)
@@ -222,14 +301,6 @@ namespace_validation_mode: :strict)
222
301
  adapter: name,
223
302
  )
224
303
  end
225
-
226
- def in_scope_namespaces(_element)
227
- raise Moxml::NotImplementedError.new(
228
- "in_scope_namespaces not implemented",
229
- feature: "in_scope_namespaces",
230
- adapter: name,
231
- )
232
- end
233
304
  end
234
305
  end
235
306
  end
@@ -9,7 +9,7 @@ module Moxml
9
9
  # are read-only after creation. This wrapper allows mutation by
10
10
  # storing values internally and regenerating XML when needed.
11
11
  class Declaration
12
- attr_accessor :version, :encoding
12
+ attr_accessor :version, :encoding, :removed, :parent_doc
13
13
  attr_reader :native
14
14
 
15
15
  def initialize(native_doc, version = nil, encoding = nil,
@@ -7,12 +7,13 @@ module Moxml
7
7
  module CustomizedRexml
8
8
  # Custom REXML formatter that fixes indentation and wrapping issues
9
9
  class Formatter < ::REXML::Formatters::Pretty
10
- def initialize(indentation: 2, self_close_empty: false)
10
+ def initialize(indentation: 2, self_close_empty: false, adapter: nil)
11
11
  @indentation = " " * indentation
12
12
  @level = 0
13
13
  @compact = true
14
14
  @width = -1 # Disable line wrapping
15
15
  @self_close_empty = self_close_empty
16
+ @adapter = adapter
16
17
  end
17
18
 
18
19
  def write(node, output)
@@ -31,7 +32,13 @@ module Moxml
31
32
  output << "<#{node.expanded_name}"
32
33
  write_attributes(node, output)
33
34
 
34
- if node.children.empty? && @self_close_empty
35
+ # Check for entity refs stored in adapter attachments
36
+ entity_refs = @adapter&.attachments&.get(node, :entity_refs)
37
+ child_sequence = @adapter&.attachments&.get(node, :child_sequence)
38
+
39
+ has_no_children = node.children.empty? && !(entity_refs && !entity_refs.empty?)
40
+
41
+ if has_no_children && @self_close_empty
35
42
  output << "/>"
36
43
  return
37
44
  end
@@ -44,26 +51,41 @@ module Moxml
44
51
  mixed = has_text && has_elements
45
52
 
46
53
  # Handle children based on content type
47
- unless node.children.empty?
54
+ all_children_empty = node.children.empty? && !(entity_refs && !entity_refs.empty?)
55
+ unless all_children_empty
48
56
  @level += @indentation.length unless mixed
49
57
 
50
- node.children.each_with_index do |child, _index|
51
- # Skip insignificant whitespace
52
- next if child.is_a?(::REXML::Text) &&
53
- child.to_s.strip.empty? &&
54
- !(child.next_sibling.nil? && child.previous_sibling.nil?)
55
-
56
- # Indent non-text nodes in non-mixed content
57
- # if !mixed && !child.is_a?(::REXML::Text)
58
- # output << ' ' * @level
59
- # end
60
-
61
- write(child, output)
62
-
63
- # Add newlines between elements in non-mixed content
64
- # if !mixed && !child.is_a?(::REXML::Text) && index < node.children.size - 1
65
- # output << "\n"
66
- # end
58
+ if entity_refs && !entity_refs.empty? && child_sequence
59
+ # Interleave native children with entity refs using tracked sequence
60
+ eref_idx = 0
61
+ native_idx = 0
62
+ child_sequence.each do |type|
63
+ case type
64
+ when :native
65
+ if native_idx < node.children.size
66
+ child = node.children[native_idx]
67
+ native_idx += 1
68
+ next if child.is_a?(::REXML::Text) &&
69
+ child.to_s.strip.empty? &&
70
+ !(child.next_sibling.nil? && child.previous_sibling.nil?)
71
+ write(child, output)
72
+ end
73
+ when :eref
74
+ if eref_idx < entity_refs.size
75
+ write(entity_refs[eref_idx], output)
76
+ eref_idx += 1
77
+ end
78
+ end
79
+ end
80
+ else
81
+ node.children.each_with_index do |child, _index|
82
+ # Skip insignificant whitespace
83
+ next if child.is_a?(::REXML::Text) &&
84
+ child.to_s.strip.empty? &&
85
+ !(child.next_sibling.nil? && child.previous_sibling.nil?)
86
+
87
+ write(child, output)
88
+ end
67
89
  end
68
90
 
69
91
  # Reset indentation for closing tag in non-mixed content