moxml 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +117 -66
  3. data/Gemfile +1 -0
  4. data/README.adoc +11 -9
  5. data/Rakefile +34 -1
  6. data/TODO.remaining/1-entity-reference-adapter-support.md +157 -0
  7. data/TODO.remaining/2-entity-restoration-model-driven.md +169 -0
  8. data/TODO.remaining/3-entity-reference-test-coverage.md +170 -0
  9. data/TODO.remaining/4-lenient-entities-mode.md +106 -0
  10. data/TODO.remaining/5-fixture-integrity.md +65 -0
  11. data/TODO.remaining/6-ox-element-ordering-bug.md +36 -0
  12. data/TODO.remaining/7-headed-ox-limitations.md +95 -0
  13. data/TODO.remaining/8-xpath-predicate-gaps.md +68 -0
  14. data/TODO.remaining/9-cleanup-hygiene.md +42 -0
  15. data/TODO.remaining/README.md +54 -0
  16. data/benchmarks/generate_report.rb +1 -1
  17. data/docs/_pages/configuration.adoc +22 -19
  18. data/docs/_tutorials/namespace-handling.adoc +5 -5
  19. data/lib/moxml/adapter/base.rb +22 -3
  20. data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
  21. data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
  22. data/lib/moxml/adapter/customized_libxml.rb +18 -0
  23. data/lib/moxml/adapter/customized_oga.rb +10 -0
  24. data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
  25. data/lib/moxml/adapter/customized_ox.rb +12 -0
  26. data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
  27. data/lib/moxml/adapter/customized_rexml/formatter.rb +44 -20
  28. data/lib/moxml/adapter/customized_rexml.rb +11 -0
  29. data/lib/moxml/adapter/headed_ox.rb +37 -14
  30. data/lib/moxml/adapter/libxml.rb +233 -119
  31. data/lib/moxml/adapter/nokogiri.rb +22 -11
  32. data/lib/moxml/adapter/oga.rb +64 -25
  33. data/lib/moxml/adapter/ox.rb +198 -42
  34. data/lib/moxml/adapter/rexml.rb +64 -13
  35. data/lib/moxml/attribute.rb +3 -0
  36. data/lib/moxml/builder.rb +78 -24
  37. data/lib/moxml/config.rb +24 -7
  38. data/lib/moxml/declaration.rb +4 -2
  39. data/lib/moxml/document.rb +8 -1
  40. data/lib/moxml/document_builder.rb +44 -37
  41. data/lib/moxml/element.rb +18 -5
  42. data/lib/moxml/entity_registry.rb +51 -1
  43. data/lib/moxml/native_attachment.rb +65 -0
  44. data/lib/moxml/node.rb +39 -50
  45. data/lib/moxml/node_set.rb +43 -15
  46. data/lib/moxml/version.rb +1 -1
  47. data/lib/moxml/xml_utils.rb +1 -1
  48. data/lib/moxml/xpath/compiler.rb +4 -1
  49. data/lib/moxml.rb +1 -0
  50. data/scripts/format_xml.rb +16 -0
  51. data/scripts/pretty_format_xml.rb +14 -0
  52. data/spec/consistency/round_trip_spec.rb +3 -30
  53. data/spec/integration/all_adapters_spec.rb +1 -0
  54. data/spec/integration/headed_ox_integration_spec.rb +0 -2
  55. data/spec/integration/shared_examples/edge_cases.rb +7 -4
  56. data/spec/integration/shared_examples/integration_workflows.rb +3 -3
  57. data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +1 -1
  58. data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
  59. data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +1 -1
  60. data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
  61. data/spec/moxml/adapter/oga_spec.rb +46 -0
  62. data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
  63. data/spec/moxml/allocation_benchmark_spec.rb +96 -0
  64. data/spec/moxml/allocation_guard_spec.rb +282 -0
  65. data/spec/moxml/builder_spec.rb +256 -0
  66. data/spec/moxml/config_spec.rb +11 -11
  67. data/spec/moxml/doctype_spec.rb +41 -0
  68. data/spec/moxml/lazy_parse_spec.rb +115 -0
  69. data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
  70. data/spec/moxml/node_cache_spec.rb +110 -0
  71. data/spec/moxml/node_set_cache_spec.rb +90 -0
  72. data/spec/moxml/xml_utils_spec.rb +32 -0
  73. data/spec/moxml/xpath/axes_spec.rb +1 -1
  74. data/spec/moxml/xpath/compiler_spec.rb +2 -2
  75. data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
  76. data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
  77. data/spec/performance/memory_usage_spec.rb +0 -4
  78. data/spec/support/allocation_helper.rb +165 -0
  79. data/spec/support/w3c_namespace_helpers.rb +2 -1
  80. metadata +29 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c46c9d241e38351b1547ab8be8297ff2902886f022ccdfb3fc2f196d2594bf2b
4
- data.tar.gz: b1d34fb4306d96d940afea932d5d9f6c6bcffc74c6715e00a6f622302be2f878
3
+ metadata.gz: 378f1400934e3a65fb230779fc4b1783aab059efb449912a6dc2d97c8d82903e
4
+ data.tar.gz: 7cd2739dd2dc41c2edb69c129cc4ec175a7a6b8e455d4d63cfd56bd2a93e808f
5
5
  SHA512:
6
- metadata.gz: 9178c13199c195aa0b3ed628d5837fa6c1fdaeaa3e3cc462a8520a89ad6bb98963ff0a9c2b668038ff27a0653e2aa2fba98ce1c8bc10eccc74abab0daa26e455
7
- data.tar.gz: 5b1b06dea56ac0b28dd2b8ca072fa554039936c336e6b262430d40f45ab2972b1e6bf38f3c13cf192c9fc2a9cc9fe827abc99aa662d30ebdaae97d361d981475
6
+ metadata.gz: 311f4905dcf14fb3ec209491d9a5eae9b8fe460152f29c7f7b428db37b1c2adac09e538ce9c0a8a4eeff2b0af83a2e8b4a787adca59cb04d1c7f1b14b7fbf37d
7
+ data.tar.gz: 36cc3ce0e2328547137f1716d7b7ef3de4e07cbca160b08d8fbe74ef126edd6e61fe4dc0ed1d8767ed19f573792fc8fdc52c41e332802698218584db559576e0
data/.rubocop_todo.yml CHANGED
@@ -1,96 +1,97 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-04-05 03:01:25 UTC using RuboCop version 1.86.0.
3
+ # on 2026-04-22 01:41:34 UTC using RuboCop version 1.86.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 5
9
+ # Offense count: 18
10
10
  # This cop supports safe autocorrection (--autocorrect).
11
11
  # Configuration parameters: EnforcedStyle, IndentationWidth.
12
12
  # SupportedStyles: with_first_argument, with_fixed_indentation
13
13
  Layout/ArgumentAlignment:
14
14
  Exclude:
15
- - 'lib/moxml/config.rb'
16
- - 'lib/moxml/entity_registry.rb'
17
- - 'spec/moxml/entity_registry_spec.rb'
15
+ - 'spec/moxml/allocation_benchmark_spec.rb'
16
+ - 'spec/moxml/allocation_guard_spec.rb'
18
17
 
19
- # Offense count: 1
18
+ # Offense count: 14
20
19
  # This cop supports safe autocorrection (--autocorrect).
21
20
  # Configuration parameters: EnforcedStyleAlignWith.
22
21
  # SupportedStylesAlignWith: either, start_of_block, start_of_line
23
22
  Layout/BlockAlignment:
24
23
  Exclude:
25
- - 'spec/moxml/config_spec.rb'
24
+ - 'lib/moxml/adapter/ox.rb'
25
+ - 'spec/moxml/allocation_benchmark_spec.rb'
26
+ - 'spec/moxml/allocation_guard_spec.rb'
27
+ - 'spec/moxml/lazy_parse_spec.rb'
28
+ - 'spec/moxml/node_cache_spec.rb'
26
29
 
27
- # Offense count: 1
30
+ # Offense count: 7
28
31
  # This cop supports safe autocorrection (--autocorrect).
29
32
  Layout/BlockEndNewline:
30
33
  Exclude:
31
- - 'spec/moxml/config_spec.rb'
34
+ - 'lib/moxml/adapter/ox.rb'
35
+ - 'spec/moxml/allocation_benchmark_spec.rb'
36
+ - 'spec/moxml/allocation_guard_spec.rb'
37
+ - 'spec/moxml/lazy_parse_spec.rb'
38
+ - 'spec/moxml/node_cache_spec.rb'
32
39
 
33
- # Offense count: 1
40
+ # Offense count: 3
34
41
  # This cop supports safe autocorrection (--autocorrect).
35
42
  Layout/ClosingParenthesisIndentation:
36
43
  Exclude:
37
- - 'spec/moxml/entity_registry_spec.rb'
44
+ - 'spec/moxml/allocation_guard_spec.rb'
38
45
 
39
- # Offense count: 1
40
- # This cop supports safe autocorrection (--autocorrect).
41
- Layout/EmptyLineAfterGuardClause:
42
- Exclude:
43
- - 'lib/moxml/config.rb'
44
-
45
- # Offense count: 1
46
+ # Offense count: 3
46
47
  # This cop supports safe autocorrection (--autocorrect).
47
48
  # Configuration parameters: EnforcedStyle, IndentationWidth.
48
49
  # SupportedStyles: consistent, consistent_relative_to_receiver, special_for_inner_method_call, special_for_inner_method_call_in_parentheses
49
50
  Layout/FirstArgumentIndentation:
50
51
  Exclude:
51
- - 'spec/moxml/entity_registry_spec.rb'
52
-
53
- # Offense count: 2
54
- # This cop supports safe autocorrection (--autocorrect).
55
- # Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
56
- # SupportedHashRocketStyles: key, separator, table
57
- # SupportedColonStyles: key, separator, table
58
- # SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
59
- Layout/HashAlignment:
60
- Exclude:
61
- - 'spec/moxml/entity_registry_spec.rb'
52
+ - 'spec/moxml/allocation_guard_spec.rb'
62
53
 
63
- # Offense count: 2
54
+ # Offense count: 13
64
55
  # This cop supports safe autocorrection (--autocorrect).
65
56
  # Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
66
57
  # SupportedStylesAlignWith: start_of_line, relative_to_receiver
67
58
  Layout/IndentationWidth:
68
59
  Exclude:
69
- - 'spec/moxml/config_spec.rb'
60
+ - 'lib/moxml/adapter/ox.rb'
61
+ - 'spec/moxml/allocation_benchmark_spec.rb'
62
+ - 'spec/moxml/allocation_guard_spec.rb'
63
+ - 'spec/moxml/lazy_parse_spec.rb'
64
+ - 'spec/moxml/node_cache_spec.rb'
70
65
 
71
- # Offense count: 238
66
+ # Offense count: 307
72
67
  # This cop supports safe autocorrection (--autocorrect).
73
68
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
74
69
  # URISchemes: http, https
75
70
  Layout/LineLength:
76
71
  Enabled: false
77
72
 
78
- # Offense count: 1
73
+ # Offense count: 3
74
+ # This cop supports safe autocorrection (--autocorrect).
75
+ Layout/MultilineBlockLayout:
76
+ Exclude:
77
+ - 'spec/moxml/allocation_benchmark_spec.rb'
78
+ - 'spec/moxml/allocation_guard_spec.rb'
79
+
80
+ # Offense count: 3
79
81
  # This cop supports safe autocorrection (--autocorrect).
80
82
  # Configuration parameters: EnforcedStyle.
81
83
  # SupportedStyles: symmetrical, new_line, same_line
82
84
  Layout/MultilineMethodCallBraceLayout:
83
85
  Exclude:
84
- - 'spec/moxml/entity_registry_spec.rb'
86
+ - 'spec/moxml/allocation_guard_spec.rb'
85
87
 
86
- # Offense count: 4
88
+ # Offense count: 3
87
89
  # This cop supports safe autocorrection (--autocorrect).
88
90
  # Configuration parameters: AllowInHeredoc.
89
91
  Layout/TrailingWhitespace:
90
92
  Exclude:
91
- - 'lib/moxml/config.rb'
92
- - 'lib/moxml/entity_registry.rb'
93
- - 'spec/moxml/entity_registry_spec.rb'
93
+ - 'spec/moxml/allocation_benchmark_spec.rb'
94
+ - 'spec/moxml/allocation_guard_spec.rb'
94
95
 
95
96
  # Offense count: 7
96
97
  # Configuration parameters: AllowedMethods.
@@ -100,26 +101,28 @@ Lint/ConstantDefinitionInBlock:
100
101
  - 'spec/moxml/declaration_preservation_spec.rb'
101
102
  - 'spec/moxml/sax_spec.rb'
102
103
 
103
- # Offense count: 6
104
+ # Offense count: 8
104
105
  # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
105
106
  Lint/DuplicateBranch:
106
107
  Exclude:
107
108
  - 'benchmarks/generate_report.rb'
108
109
  - 'lib/moxml/adapter/customized_libxml/declaration.rb'
109
110
  - 'lib/moxml/adapter/libxml.rb'
111
+ - 'lib/moxml/adapter/ox.rb'
110
112
  - 'lib/moxml/document.rb'
111
113
 
112
- # Offense count: 3
114
+ # Offense count: 4
113
115
  Lint/DuplicateMethods:
114
116
  Exclude:
115
117
  - 'lib/moxml/config.rb'
116
118
  - 'lib/moxml/element.rb'
117
119
  - 'lib/moxml/node.rb'
118
120
 
119
- # Offense count: 2
121
+ # Offense count: 4
120
122
  # Configuration parameters: AllowComments, AllowEmptyLambdas.
121
123
  Lint/EmptyBlock:
122
124
  Exclude:
125
+ - 'spec/moxml/allocation_benchmark_spec.rb'
123
126
  - 'spec/moxml/xpath/axes_spec.rb'
124
127
 
125
128
  # Offense count: 1
@@ -153,7 +156,7 @@ Lint/NoReturnInBeginEndBlocks:
153
156
  Exclude:
154
157
  - 'examples/api_client/api_client.rb'
155
158
 
156
- # Offense count: 97
159
+ # Offense count: 100
157
160
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
158
161
  Metrics/AbcSize:
159
162
  Enabled: false
@@ -169,12 +172,12 @@ Metrics/BlockLength:
169
172
  Metrics/BlockNesting:
170
173
  Max: 4
171
174
 
172
- # Offense count: 65
175
+ # Offense count: 70
173
176
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
174
177
  Metrics/CyclomaticComplexity:
175
178
  Enabled: false
176
179
 
177
- # Offense count: 170
180
+ # Offense count: 182
178
181
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
179
182
  Metrics/MethodLength:
180
183
  Max: 110
@@ -184,11 +187,19 @@ Metrics/MethodLength:
184
187
  Metrics/ParameterLists:
185
188
  Max: 7
186
189
 
187
- # Offense count: 44
190
+ # Offense count: 47
188
191
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
189
192
  Metrics/PerceivedComplexity:
190
193
  Enabled: false
191
194
 
195
+ # Offense count: 2
196
+ # This cop supports unsafe autocorrection (--autocorrect-all).
197
+ # Configuration parameters: EnforcedStyleForLeadingUnderscores.
198
+ # SupportedStylesForLeadingUnderscores: disallowed, required, optional
199
+ Naming/MemoizedInstanceVariableName:
200
+ Exclude:
201
+ - 'lib/moxml/element.rb'
202
+
192
203
  # Offense count: 16
193
204
  # Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
194
205
  # AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
@@ -213,6 +224,33 @@ Naming/PredicateMethod:
213
224
  - 'lib/moxml/config.rb'
214
225
  - 'lib/moxml/xpath/ruby/node.rb'
215
226
 
227
+ # Offense count: 10
228
+ # Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
229
+ # SupportedStyles: snake_case, normalcase, non_integer
230
+ # AllowedIdentifiers: TLS1_1, TLS1_2, capture3, iso8601, rfc1123_date, rfc822, rfc2822, rfc3339, x86_64
231
+ Naming/VariableNumber:
232
+ Exclude:
233
+ - 'spec/moxml/allocation_guard_spec.rb'
234
+ - 'spec/support/allocation_helper.rb'
235
+
236
+ # Offense count: 1
237
+ # This cop supports unsafe autocorrection (--autocorrect-all).
238
+ Performance/TimesMap:
239
+ Exclude:
240
+ - 'spec/support/allocation_helper.rb'
241
+
242
+ # Offense count: 5
243
+ RSpec/BeforeAfterAll:
244
+ Exclude:
245
+ - '**/spec/spec_helper.rb'
246
+ - '**/spec/rails_helper.rb'
247
+ - '**/spec/support/**/*.rb'
248
+ - 'spec/moxml/allocation_benchmark_spec.rb'
249
+ - 'spec/moxml/allocation_guard_spec.rb'
250
+ - 'spec/moxml/lazy_parse_spec.rb'
251
+ - 'spec/moxml/node_cache_spec.rb'
252
+ - 'spec/moxml/node_set_cache_spec.rb'
253
+
216
254
  # Offense count: 46
217
255
  # Configuration parameters: Prefixes, AllowedPatterns.
218
256
  # Prefixes: when, with, without
@@ -226,12 +264,12 @@ RSpec/ContextWording:
226
264
  - 'spec/moxml/xpath/parser_spec.rb'
227
265
  - 'spec/performance/benchmark_spec.rb'
228
266
 
229
- # Offense count: 16
267
+ # Offense count: 23
230
268
  # Configuration parameters: IgnoredMetadata.
231
269
  RSpec/DescribeClass:
232
270
  Enabled: false
233
271
 
234
- # Offense count: 233
272
+ # Offense count: 271
235
273
  # Configuration parameters: CountAsOne.
236
274
  RSpec/ExampleLength:
237
275
  Max: 64
@@ -267,11 +305,11 @@ RSpec/LeakyConstantDeclaration:
267
305
  RSpec/MessageSpies:
268
306
  EnforcedStyle: receive
269
307
 
270
- # Offense count: 327
308
+ # Offense count: 356
271
309
  RSpec/MultipleExpectations:
272
310
  Max: 10
273
311
 
274
- # Offense count: 2
312
+ # Offense count: 4
275
313
  # Configuration parameters: AllowSubject.
276
314
  RSpec/MultipleMemoizedHelpers:
277
315
  Max: 7
@@ -333,7 +371,7 @@ Security/Eval:
333
371
  Exclude:
334
372
  - 'spec/moxml/xpath/ruby/generator_spec.rb'
335
373
 
336
- # Offense count: 3
374
+ # Offense count: 11
337
375
  # This cop supports safe autocorrection (--autocorrect).
338
376
  # Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
339
377
  # SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
@@ -342,8 +380,11 @@ Security/Eval:
342
380
  # AllowedMethods: lambda, proc, it
343
381
  Style/BlockDelimiters:
344
382
  Exclude:
345
- - 'spec/moxml/config_spec.rb'
346
- - 'spec/moxml/entity_registry_spec.rb'
383
+ - 'lib/moxml/adapter/ox.rb'
384
+ - 'spec/moxml/allocation_benchmark_spec.rb'
385
+ - 'spec/moxml/allocation_guard_spec.rb'
386
+ - 'spec/moxml/lazy_parse_spec.rb'
387
+ - 'spec/moxml/node_cache_spec.rb'
347
388
 
348
389
  # Offense count: 1
349
390
  Style/DocumentDynamicEvalDefinition:
@@ -356,28 +397,32 @@ Style/EvalWithLocation:
356
397
  Exclude:
357
398
  - 'spec/moxml/xpath/ruby/generator_spec.rb'
358
399
 
359
- # Offense count: 2
400
+ # Offense count: 4
360
401
  # Configuration parameters: MinBranchesCount.
361
402
  Style/HashLikeCase:
362
403
  Exclude:
363
404
  - 'lib/moxml/adapter/customized_rexml/formatter.rb'
405
+ - 'lib/moxml/adapter/ox.rb'
364
406
 
365
407
  # Offense count: 1
366
- Style/MissingRespondToMissing:
408
+ # This cop supports unsafe autocorrection (--autocorrect-all).
409
+ Style/MapToHash:
367
410
  Exclude:
368
- - 'lib/moxml/xpath/ruby/node.rb'
411
+ - 'spec/moxml/node_cache_spec.rb'
369
412
 
370
- # Offense count: 2
371
- # This cop supports safe autocorrection (--autocorrect).
372
- Style/MultilineIfModifier:
413
+ # Offense count: 1
414
+ Style/MissingRespondToMissing:
373
415
  Exclude:
374
- - 'lib/moxml/entity_registry.rb'
416
+ - 'lib/moxml/xpath/ruby/node.rb'
375
417
 
376
418
  # Offense count: 1
377
- # This cop supports safe autocorrection (--autocorrect).
378
- Style/NilLambda:
419
+ # This cop supports unsafe autocorrection (--autocorrect-all).
420
+ # Configuration parameters: EnforcedStyle, AllowedMethods, AllowedPatterns.
421
+ # SupportedStyles: predicate, comparison
422
+ Style/NumericPredicate:
379
423
  Exclude:
380
- - 'spec/moxml/entity_registry_spec.rb'
424
+ - 'spec/**/*'
425
+ - 'lib/moxml/node_set.rb'
381
426
 
382
427
  # Offense count: 5
383
428
  # Configuration parameters: AllowedClasses.
@@ -395,10 +440,16 @@ Style/OptionalBooleanParameter:
395
440
  - 'lib/moxml/adapter/libxml.rb'
396
441
  - 'lib/moxml/xpath/compiler.rb'
397
442
 
443
+ # Offense count: 1
444
+ # This cop supports unsafe autocorrection (--autocorrect-all).
445
+ Style/SelectByKind:
446
+ Exclude:
447
+ - 'lib/moxml/adapter/ox.rb'
448
+
398
449
  # Offense count: 1
399
450
  # This cop supports safe autocorrection (--autocorrect).
400
- # Configuration parameters: EnforcedStyleForMultiline.
401
- # SupportedStylesForMultiline: comma, consistent_comma, diff_comma, no_comma
402
- Style/TrailingCommaInArguments:
451
+ # Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
452
+ # SupportedStyles: single_quotes, double_quotes
453
+ Style/StringLiterals:
403
454
  Exclude:
404
- - 'lib/moxml/context.rb'
455
+ - 'spec/moxml/lazy_parse_spec.rb'
data/Gemfile CHANGED
@@ -22,6 +22,7 @@ gem "rubocop-performance"
22
22
  gem "rubocop-rake"
23
23
  gem "rubocop-rspec"
24
24
  gem "simplecov", require: false
25
+ gem "stackprof"
25
26
  gem "tempfile"
26
27
 
27
28
  # Needed by get_process_mem on Windows
data/README.adoc CHANGED
@@ -815,25 +815,27 @@ context = Moxml.new do |config|
815
815
  end
816
816
  ----
817
817
 
818
- === Namespace URI validation
818
+ === Namespace validation
819
819
 
820
820
  Moxml validates namespace URIs against
821
- https://www.rfc-editor.org/rfc/rfc3986[RFC 3986] by default, as required by the
822
- https://www.w3.org/TR/xml-names/[W3C Namespaces in XML] specification.
821
+ https://www.rfc-editor.org/rfc/rfc3986[RFC 3986] and namespace prefixes against
822
+ the https://www.w3.org/TR/xml-names/[W3C Namespaces in XML] NCName production
823
+ rules by default.
823
824
 
824
- For documents that use non-standard namespace identifiers, a lenient mode is
825
- available:
825
+ For documents that use non-standard namespace identifiers or prefixes, a lenient
826
+ mode is available:
826
827
 
827
828
  [source,ruby]
828
829
  ----
829
- # Strict mode (default) — rejects invalid URIs per RFC 3986
830
+ # Strict mode (default) — validates URI per RFC 3986 and prefix per NCName rules
830
831
  context = Moxml.new do |config|
831
- config.namespace_uri_mode = :strict
832
+ config.namespace_validation_mode = :strict
832
833
  end
833
834
 
834
- # Lenient mode — accepts any string as a namespace URI
835
+ # Lenient mode — accepts any URI string and defers prefix validation to the
836
+ # underlying XML parser
835
837
  context = Moxml.new do |config|
836
- config.namespace_uri_mode = :lenient
838
+ config.namespace_validation_mode = :lenient
837
839
  end
838
840
  ----
839
841
 
data/Rakefile CHANGED
@@ -10,6 +10,37 @@ require "rubocop/rake_task"
10
10
  RuboCop::RakeTask.new
11
11
 
12
12
  namespace :spec do
13
+ desc "Validate XML fixtures are well-formed (requires xmllint)"
14
+ task :validate_fixtures do
15
+ fixtures = Dir.glob("spec/fixtures/**/*.xml")
16
+ if fixtures.empty?
17
+ abort "No XML fixtures found in spec/fixtures/"
18
+ end
19
+
20
+ unless system("which xmllint > /dev/null 2>&1")
21
+ abort "xmllint not found. Install with: brew install libxml2 (macOS) or apt install libxml2-utils (Linux)"
22
+ end
23
+
24
+ # Intentionally malformed fixtures (W3C test cases for error handling)
25
+ exemptions = %w[
26
+ spec/fixtures/w3c/namespaces/1.0/035.xml
27
+ ]
28
+
29
+ errors = []
30
+ fixtures.each do |path|
31
+ next if exemptions.include?(path)
32
+
33
+ output = `xmllint --noout "#{path}" 2>&1`
34
+ errors << "#{path}: #{output.strip}" unless $?.success?
35
+ end
36
+
37
+ if errors.empty?
38
+ puts "#{fixtures.size} XML fixtures validated OK"
39
+ else
40
+ abort "Invalid fixtures:\n#{errors.join("\n")}"
41
+ end
42
+ end
43
+
13
44
  desc "Run unit tests only"
14
45
  RSpec::Core::RakeTask.new(:unit) do |t|
15
46
  t.pattern = "spec/unit/**/*_spec.rb"
@@ -36,7 +67,9 @@ namespace :spec do
36
67
  categories = ENV.fetch("CATEGORIES", "").split(",").map(&:strip)
37
68
  abort "Usage: CATEGORIES=metanorma,rfcxml rake spec:consistency:by_category" if categories.empty?
38
69
 
39
- include_filters = categories.map { |c| "--tag fixture_category:#{c}" }.join(" ")
70
+ include_filters = categories.map do |c|
71
+ "--tag fixture_category:#{c}"
72
+ end.join(" ")
40
73
  sh "bundle exec rspec spec/consistency/ --tag round_trip #{include_filters}"
41
74
  end
42
75
  end
@@ -0,0 +1,157 @@
1
+ # TODO 1: EntityReference Adapter Support for Ox, Oga, REXML, LibXML, HeadedOx
2
+
3
+ ## Problem
4
+
5
+ Only the Nokogiri adapter implements `create_native_entity_reference` and maps
6
+ its native type to `:entity_reference` in `node_type`. The other 5 adapters
7
+ will raise `NotImplementedError` if `restore_entities` is enabled or if any
8
+ code path calls `create_entity_reference`. This makes the entire
9
+ EntityReference feature **non-functional** outside Nokogiri.
10
+
11
+ ## Current State (verified)
12
+
13
+ | Adapter | `create_native_entity_reference` | `node_type` mapping | Serialization | Status |
14
+ |-----------|----------------------------------|---------------------|---------------|--------|
15
+ | Nokogiri | Done (`Nokogiri::XML::EntityReference.new`) | Done | Native | Working |
16
+ | Ox | Missing | Missing | Uses `Ox.dump` (C-level, won't handle custom types) | Broken |
17
+ | HeadedOx | Missing (inherits Ox) | Missing | Same as Ox | Broken |
18
+ | Oga | Missing | Missing | Uses `CustomizedOga::XmlGenerator` | Broken |
19
+ | REXML | Missing | Missing | Uses REXML's `write` | Broken |
20
+ | LibXML | Missing | Missing | Uses custom serializer with wrapper detection | Broken |
21
+
22
+ ## Architecture
23
+
24
+ EntityReference follows the same pattern as other non-native node types in Moxml:
25
+ a **wrapper class** that represents what the underlying library cannot express natively.
26
+
27
+ Each adapter needs three things:
28
+ 1. **Wrapper class** (`CustomizedXxx::EntityReference`) — holds the entity name
29
+ 2. **`node_type` mapping** — so `Node.wrap` can create the correct Moxml type
30
+ 3. **Serialization** — so `to_xml` outputs `&name;`
31
+
32
+ The existing pattern: `CustomizedOx::Text` extends `::Ox::Node`,
33
+ `CustomizedOx::Attribute` extends `::Ox::Node`. EntityReference should follow suit.
34
+
35
+ ### Serialization Challenge for Ox
36
+
37
+ Ox's `serialize` calls `::Ox.dump(node)` which is C-level — it only handles
38
+ Ox native types. For EntityReference wrappers to survive serialization, we need
39
+ one of:
40
+
41
+ - **Option A**: Custom serialization in the adapter that walks the tree manually,
42
+ detecting EntityReference wrappers and emitting `&name;` directly.
43
+ - **Option B**: Convert EntityReferences to their text equivalent before calling
44
+ `Ox.dump`, restoring them in a post-processing step. This is fragile.
45
+ - **Option C**: Override `serialize` for Element nodes to handle children
46
+ individually, using `Ox.dump` for native children but handling wrappers
47
+ directly.
48
+
49
+ **Recommended: Option A** — it's how `CustomizedOga::XmlGenerator` already works
50
+ for Oga. A similar tree-walking serializer for Ox gives full control.
51
+
52
+ For LibXML, the existing serializer already checks `node.respond_to?(:to_xml)`
53
+ for wrapper classes, so adding an EntityReference wrapper with `to_xml` returning
54
+ `"&#{name};"` should integrate cleanly.
55
+
56
+ ## Implementation Steps
57
+
58
+ ### Ox Adapter
59
+
60
+ 1. Create `lib/moxml/adapter/customized_ox/entity_reference.rb`:
61
+ ```ruby
62
+ module Moxml::Adapter::CustomizedOx
63
+ class EntityReference < ::Ox::Node
64
+ attr_reader :name
65
+
66
+ def initialize(name)
67
+ @name = name
68
+ super() # Ox::Node requires no args or a value
69
+ end
70
+
71
+ def to_xml
72
+ "&#{@name};"
73
+ end
74
+ alias to_s to_xml
75
+ end
76
+ end
77
+ ```
78
+
79
+ 2. Add to `lib/moxml/adapter/ox.rb`:
80
+ - `create_native_entity_reference(name)` → `CustomizedOx::EntityReference.new(name)`
81
+ - `node_type`: add `when CustomizedOx::EntityReference then :entity_reference`
82
+ - `patch_node`: handle EntityReference wrapper in child list
83
+ - `entity_reference_name(node)`: return `node.name`
84
+ - Serialization: handle EntityReference children when walking the tree
85
+
86
+ 3. Add to `lib/moxml/adapter/ox.rb` `unpatch_node`: return wrapper as-is
87
+ (it extends Ox::Node so it can stay in the tree)
88
+
89
+ ### HeadedOx Adapter
90
+
91
+ HeadedOx inherits from Ox, so it gets Ox's EntityReference support
92
+ automatically once Ox is done. Verify that the XPath engine doesn't
93
+ break when encountering EntityReference nodes in the tree.
94
+
95
+ ### Oga Adapter
96
+
97
+ 1. Create `lib/moxml/adapter/customized_oga/entity_reference.rb`:
98
+ ```ruby
99
+ module Moxml::Adapter::CustomizedOga
100
+ class EntityReference
101
+ attr_reader :name
102
+
103
+ def initialize(name)
104
+ @name = name
105
+ end
106
+
107
+ def to_xml
108
+ "&#{@name};"
109
+ end
110
+ end
111
+ end
112
+ ```
113
+
114
+ 2. Add to `lib/moxml/adapter/oga.rb`:
115
+ - `create_native_entity_reference(name)` → `CustomizedOga::EntityReference.new(name)`
116
+ - `node_type`: add `when CustomizedOga::EntityReference then :entity_reference`
117
+ - Update `CustomizedOga::XmlGenerator` to handle EntityReference children
118
+ - `entity_reference_name(node)`: return `node.name`
119
+
120
+ ### REXML Adapter
121
+
122
+ 1. Investigate: REXML has `REXML::Entity` and `REXML::EntityRef` classes.
123
+ Check if they can be used as native entity reference nodes, or if a
124
+ wrapper is needed.
125
+
126
+ 2. Add to `lib/moxml/adapter/rexml.rb`:
127
+ - `create_native_entity_reference(name)` — native or wrapper
128
+ - `node_type`: add mapping
129
+ - `entity_reference_name(node)`
130
+
131
+ ### LibXML Adapter
132
+
133
+ 1. Investigate: LibXML Ruby has `LibXML::XML::Node::ENTITY_REF_NODE` constant
134
+ (value 5). Check if native entity reference nodes can be created.
135
+
136
+ 2. Create `lib/moxml/adapter/customized_libxml/entity_reference.rb` if needed.
137
+
138
+ 3. Add to `lib/moxml/adapter/libxml.rb`:
139
+ - `create_native_entity_reference(name)`
140
+ - `node_type`: add `ENTITY_REF_NODE` mapping or wrapper mapping
141
+ - `entity_reference_name(node)`
142
+ - The existing serializer already handles wrappers with `to_xml` —
143
+ verify EntityReference works in this path.
144
+
145
+ ## Files to Create/Modify
146
+
147
+ ### New Files
148
+ - `lib/moxml/adapter/customized_ox/entity_reference.rb`
149
+ - `lib/moxml/adapter/customized_oga/entity_reference.rb`
150
+ - Possibly: `lib/moxml/adapter/customized_libxml/entity_reference.rb`
151
+
152
+ ### Modified Files
153
+ - `lib/moxml/adapter/ox.rb` — create_native_entity_reference, node_type, serialization
154
+ - `lib/moxml/adapter/oga.rb` — create_native_entity_reference, node_type, XmlGenerator
155
+ - `lib/moxml/adapter/rexml.rb` — create_native_entity_reference, node_type
156
+ - `lib/moxml/adapter/libxml.rb` — create_native_entity_reference, node_type
157
+ - `lib/moxml/adapter/headed_ox.rb` — verify inheritance works (likely no changes)