moxml 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/.rubocop_todo.yml +49 -133
- data/README.adoc +18 -0
- data/lib/moxml/adapter/base.rb +65 -8
- data/lib/moxml/adapter/headed_ox.rb +2 -1
- data/lib/moxml/adapter/libxml.rb +16 -6
- data/lib/moxml/adapter/nokogiri.rb +13 -7
- data/lib/moxml/adapter/oga.rb +35 -90
- data/lib/moxml/adapter/ox.rb +69 -19
- data/lib/moxml/adapter/rexml.rb +26 -9
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/config.rb +17 -2
- data/lib/moxml/element.rb +12 -8
- data/lib/moxml/node.rb +4 -1
- data/lib/moxml/text.rb +6 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +40 -21
- data/lib/moxml/xpath/parser.rb +12 -7
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/shared_examples/edge_cases.rb +85 -6
- data/spec/integration/shared_examples/entity_reference_whitespace.rb +124 -0
- data/spec/integration/shared_examples/high_level/document_builder_behavior.rb +8 -6
- data/spec/integration/shared_examples/integration_workflows.rb +1 -1
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +0 -7
- data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +135 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +0 -3
- data/spec/integration/shared_examples/node_wrappers/node_set_behavior.rb +3 -1
- data/spec/moxml/adapter/entity_restoration_spec.rb +97 -0
- data/spec/moxml/builder_spec.rb +16 -1
- data/spec/moxml/entity_preservation_spec.rb +130 -0
- data/spec/moxml/entity_reference_spec.rb +114 -0
- data/spec/moxml/entity_registry_spec.rb +68 -0
- data/spec/moxml/moxml_spec.rb +39 -0
- data/spec/moxml/xpath/axes_spec.rb +0 -1
- data/spec/moxml/xpath/compiler_spec.rb +0 -2
- data/spec/performance/benchmark_spec.rb +1 -1
- metadata +6 -12
- data/TODO.remaining/1-entity-reference-adapter-support.md +0 -157
- data/TODO.remaining/2-entity-restoration-model-driven.md +0 -169
- data/TODO.remaining/3-entity-reference-test-coverage.md +0 -170
- data/TODO.remaining/4-lenient-entities-mode.md +0 -106
- data/TODO.remaining/5-fixture-integrity.md +0 -65
- data/TODO.remaining/6-ox-element-ordering-bug.md +0 -36
- data/TODO.remaining/7-headed-ox-limitations.md +0 -95
- data/TODO.remaining/8-xpath-predicate-gaps.md +0 -68
- data/TODO.remaining/9-cleanup-hygiene.md +0 -42
- data/TODO.remaining/README.md +0 -54
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c1daf227e9effc582c66e780516135481aa48e467226a4267974633a4673f786
|
|
4
|
+
data.tar.gz: 5b230e79a208eb4b1c5e32175df364e5b42f9e766c4c8189eb18fcad09bb79bf
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e39941f6f51567655c246f1e8d6225c6ab572c68958fd9ca4a74ca191af4493e1bf74ff0feed84792cc87e5681cce1d8ae291533e992d07b70fdfb1063d96a67
|
|
7
|
+
data.tar.gz: 41ca4a3954bf2e713703124758309e5d1e056d44826cdacf3b4693c5bf37c463579462a6c0ecb3aa65e4c3cafaeef1e2ad0d8a5af9ebe2e1b9f4a695e7678ff4
|
data/.gitignore
CHANGED
|
@@ -28,6 +28,9 @@ libxml_*.txt
|
|
|
28
28
|
# Generated benchmark reports (machine-specific)
|
|
29
29
|
/benchmarks/PERFORMANCE_REPORT.md
|
|
30
30
|
|
|
31
|
+
# Local TODO tracking (kept locally, not committed)
|
|
32
|
+
TODO*
|
|
33
|
+
|
|
31
34
|
# IDE and editor files
|
|
32
35
|
.vscode/
|
|
33
36
|
.idea/
|
|
@@ -49,3 +52,6 @@ libxml_*.txt
|
|
|
49
52
|
/_site
|
|
50
53
|
/docs/_site
|
|
51
54
|
/docs/.jekyll-cache
|
|
55
|
+
|
|
56
|
+
# Utility scripts (local only)
|
|
57
|
+
/scripts/
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,97 +1,46 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-04-
|
|
3
|
+
# on 2026-04-23 07:48:23 UTC using RuboCop version 1.86.0.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
|
8
8
|
|
|
9
|
-
# Offense count:
|
|
10
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
11
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
12
|
-
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
13
|
-
Layout/ArgumentAlignment:
|
|
14
|
-
Exclude:
|
|
15
|
-
- 'spec/moxml/allocation_benchmark_spec.rb'
|
|
16
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
17
|
-
|
|
18
|
-
# Offense count: 14
|
|
9
|
+
# Offense count: 4
|
|
19
10
|
# This cop supports safe autocorrection (--autocorrect).
|
|
20
|
-
|
|
21
|
-
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
22
|
-
Layout/BlockAlignment:
|
|
11
|
+
Layout/EmptyLineAfterGuardClause:
|
|
23
12
|
Exclude:
|
|
24
|
-
- 'lib/moxml/adapter/
|
|
25
|
-
- '
|
|
26
|
-
- '
|
|
27
|
-
- 'spec/moxml/lazy_parse_spec.rb'
|
|
28
|
-
- 'spec/moxml/node_cache_spec.rb'
|
|
13
|
+
- 'lib/moxml/adapter/customized_rexml/formatter.rb'
|
|
14
|
+
- 'lib/moxml/adapter/libxml.rb'
|
|
15
|
+
- 'lib/moxml/entity_registry.rb'
|
|
29
16
|
|
|
30
|
-
# Offense count:
|
|
17
|
+
# Offense count: 1
|
|
31
18
|
# This cop supports safe autocorrection (--autocorrect).
|
|
32
|
-
|
|
19
|
+
# Configuration parameters: EmptyLineBetweenMethodDefs, EmptyLineBetweenClassDefs, EmptyLineBetweenModuleDefs, DefLikeMacros, AllowAdjacentOneLineDefs, NumberOfEmptyLines.
|
|
20
|
+
Layout/EmptyLineBetweenDefs:
|
|
33
21
|
Exclude:
|
|
34
22
|
- 'lib/moxml/adapter/ox.rb'
|
|
35
|
-
- 'spec/moxml/allocation_benchmark_spec.rb'
|
|
36
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
37
|
-
- 'spec/moxml/lazy_parse_spec.rb'
|
|
38
|
-
- 'spec/moxml/node_cache_spec.rb'
|
|
39
23
|
|
|
40
|
-
# Offense count:
|
|
41
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
42
|
-
Layout/ClosingParenthesisIndentation:
|
|
43
|
-
Exclude:
|
|
44
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
45
|
-
|
|
46
|
-
# Offense count: 3
|
|
47
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
48
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
49
|
-
# SupportedStyles: consistent, consistent_relative_to_receiver, special_for_inner_method_call, special_for_inner_method_call_in_parentheses
|
|
50
|
-
Layout/FirstArgumentIndentation:
|
|
51
|
-
Exclude:
|
|
52
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
53
|
-
|
|
54
|
-
# Offense count: 13
|
|
24
|
+
# Offense count: 1
|
|
55
25
|
# This cop supports safe autocorrection (--autocorrect).
|
|
56
|
-
|
|
57
|
-
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
58
|
-
Layout/IndentationWidth:
|
|
26
|
+
Layout/EmptyLines:
|
|
59
27
|
Exclude:
|
|
60
28
|
- 'lib/moxml/adapter/ox.rb'
|
|
61
|
-
- 'spec/moxml/allocation_benchmark_spec.rb'
|
|
62
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
63
|
-
- 'spec/moxml/lazy_parse_spec.rb'
|
|
64
|
-
- 'spec/moxml/node_cache_spec.rb'
|
|
65
29
|
|
|
66
|
-
# Offense count:
|
|
30
|
+
# Offense count: 330
|
|
67
31
|
# This cop supports safe autocorrection (--autocorrect).
|
|
68
32
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
69
33
|
# URISchemes: http, https
|
|
70
34
|
Layout/LineLength:
|
|
71
35
|
Enabled: false
|
|
72
36
|
|
|
73
|
-
# Offense count:
|
|
74
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
75
|
-
Layout/MultilineBlockLayout:
|
|
76
|
-
Exclude:
|
|
77
|
-
- 'spec/moxml/allocation_benchmark_spec.rb'
|
|
78
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
79
|
-
|
|
80
|
-
# Offense count: 3
|
|
81
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
82
|
-
# Configuration parameters: EnforcedStyle.
|
|
83
|
-
# SupportedStyles: symmetrical, new_line, same_line
|
|
84
|
-
Layout/MultilineMethodCallBraceLayout:
|
|
85
|
-
Exclude:
|
|
86
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
87
|
-
|
|
88
|
-
# Offense count: 3
|
|
37
|
+
# Offense count: 1
|
|
89
38
|
# This cop supports safe autocorrection (--autocorrect).
|
|
90
|
-
# Configuration parameters:
|
|
91
|
-
|
|
39
|
+
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
40
|
+
# SupportedStyles: aligned, indented
|
|
41
|
+
Layout/MultilineOperationIndentation:
|
|
92
42
|
Exclude:
|
|
93
|
-
- '
|
|
94
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
43
|
+
- 'lib/moxml/adapter/ox.rb'
|
|
95
44
|
|
|
96
45
|
# Offense count: 7
|
|
97
46
|
# Configuration parameters: AllowedMethods.
|
|
@@ -101,7 +50,7 @@ Lint/ConstantDefinitionInBlock:
|
|
|
101
50
|
- 'spec/moxml/declaration_preservation_spec.rb'
|
|
102
51
|
- 'spec/moxml/sax_spec.rb'
|
|
103
52
|
|
|
104
|
-
# Offense count:
|
|
53
|
+
# Offense count: 10
|
|
105
54
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
|
106
55
|
Lint/DuplicateBranch:
|
|
107
56
|
Exclude:
|
|
@@ -110,8 +59,9 @@ Lint/DuplicateBranch:
|
|
|
110
59
|
- 'lib/moxml/adapter/libxml.rb'
|
|
111
60
|
- 'lib/moxml/adapter/ox.rb'
|
|
112
61
|
- 'lib/moxml/document.rb'
|
|
62
|
+
- 'lib/moxml/entity_registry.rb'
|
|
113
63
|
|
|
114
|
-
# Offense count:
|
|
64
|
+
# Offense count: 5
|
|
115
65
|
Lint/DuplicateMethods:
|
|
116
66
|
Exclude:
|
|
117
67
|
- 'lib/moxml/config.rb'
|
|
@@ -138,6 +88,11 @@ Lint/EmptyWhen:
|
|
|
138
88
|
Exclude:
|
|
139
89
|
- 'lib/moxml/xpath/compiler.rb'
|
|
140
90
|
|
|
91
|
+
# Offense count: 3
|
|
92
|
+
Lint/HashCompareByIdentity:
|
|
93
|
+
Exclude:
|
|
94
|
+
- 'lib/moxml/native_attachment.rb'
|
|
95
|
+
|
|
141
96
|
# Offense count: 1
|
|
142
97
|
Lint/IneffectiveAccessModifier:
|
|
143
98
|
Exclude:
|
|
@@ -156,28 +111,28 @@ Lint/NoReturnInBeginEndBlocks:
|
|
|
156
111
|
Exclude:
|
|
157
112
|
- 'examples/api_client/api_client.rb'
|
|
158
113
|
|
|
159
|
-
# Offense count:
|
|
114
|
+
# Offense count: 104
|
|
160
115
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
161
116
|
Metrics/AbcSize:
|
|
162
117
|
Enabled: false
|
|
163
118
|
|
|
164
|
-
# Offense count:
|
|
119
|
+
# Offense count: 8
|
|
165
120
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
|
|
166
121
|
# AllowedMethods: refine
|
|
167
122
|
Metrics/BlockLength:
|
|
168
123
|
Max: 90
|
|
169
124
|
|
|
170
|
-
# Offense count:
|
|
125
|
+
# Offense count: 7
|
|
171
126
|
# Configuration parameters: CountBlocks, CountModifierForms.
|
|
172
127
|
Metrics/BlockNesting:
|
|
173
128
|
Max: 4
|
|
174
129
|
|
|
175
|
-
# Offense count:
|
|
130
|
+
# Offense count: 76
|
|
176
131
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
177
132
|
Metrics/CyclomaticComplexity:
|
|
178
133
|
Enabled: false
|
|
179
134
|
|
|
180
|
-
# Offense count:
|
|
135
|
+
# Offense count: 186
|
|
181
136
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
182
137
|
Metrics/MethodLength:
|
|
183
138
|
Max: 110
|
|
@@ -187,19 +142,11 @@ Metrics/MethodLength:
|
|
|
187
142
|
Metrics/ParameterLists:
|
|
188
143
|
Max: 7
|
|
189
144
|
|
|
190
|
-
# Offense count:
|
|
145
|
+
# Offense count: 52
|
|
191
146
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
192
147
|
Metrics/PerceivedComplexity:
|
|
193
148
|
Enabled: false
|
|
194
149
|
|
|
195
|
-
# Offense count: 2
|
|
196
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
197
|
-
# Configuration parameters: EnforcedStyleForLeadingUnderscores.
|
|
198
|
-
# SupportedStylesForLeadingUnderscores: disallowed, required, optional
|
|
199
|
-
Naming/MemoizedInstanceVariableName:
|
|
200
|
-
Exclude:
|
|
201
|
-
- 'lib/moxml/element.rb'
|
|
202
|
-
|
|
203
150
|
# Offense count: 16
|
|
204
151
|
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
|
205
152
|
# AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
|
|
@@ -233,12 +180,6 @@ Naming/VariableNumber:
|
|
|
233
180
|
- 'spec/moxml/allocation_guard_spec.rb'
|
|
234
181
|
- 'spec/support/allocation_helper.rb'
|
|
235
182
|
|
|
236
|
-
# Offense count: 1
|
|
237
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
238
|
-
Performance/TimesMap:
|
|
239
|
-
Exclude:
|
|
240
|
-
- 'spec/support/allocation_helper.rb'
|
|
241
|
-
|
|
242
183
|
# Offense count: 5
|
|
243
184
|
RSpec/BeforeAfterAll:
|
|
244
185
|
Exclude:
|
|
@@ -264,12 +205,12 @@ RSpec/ContextWording:
|
|
|
264
205
|
- 'spec/moxml/xpath/parser_spec.rb'
|
|
265
206
|
- 'spec/performance/benchmark_spec.rb'
|
|
266
207
|
|
|
267
|
-
# Offense count:
|
|
208
|
+
# Offense count: 24
|
|
268
209
|
# Configuration parameters: IgnoredMetadata.
|
|
269
210
|
RSpec/DescribeClass:
|
|
270
211
|
Enabled: false
|
|
271
212
|
|
|
272
|
-
# Offense count:
|
|
213
|
+
# Offense count: 295
|
|
273
214
|
# Configuration parameters: CountAsOne.
|
|
274
215
|
RSpec/ExampleLength:
|
|
275
216
|
Max: 64
|
|
@@ -305,7 +246,7 @@ RSpec/LeakyConstantDeclaration:
|
|
|
305
246
|
RSpec/MessageSpies:
|
|
306
247
|
EnforcedStyle: receive
|
|
307
248
|
|
|
308
|
-
# Offense count:
|
|
249
|
+
# Offense count: 390
|
|
309
250
|
RSpec/MultipleExpectations:
|
|
310
251
|
Max: 10
|
|
311
252
|
|
|
@@ -326,12 +267,6 @@ RSpec/NoExpectationExample:
|
|
|
326
267
|
Exclude:
|
|
327
268
|
- 'spec/performance/xpath_benchmark_spec.rb'
|
|
328
269
|
|
|
329
|
-
# Offense count: 6
|
|
330
|
-
RSpec/PendingWithoutReason:
|
|
331
|
-
Exclude:
|
|
332
|
-
- 'spec/moxml/xpath/functions/position_functions_spec.rb'
|
|
333
|
-
- 'spec/moxml/xpath/functions/special_functions_spec.rb'
|
|
334
|
-
|
|
335
270
|
# Offense count: 4
|
|
336
271
|
RSpec/RepeatedExample:
|
|
337
272
|
Exclude:
|
|
@@ -371,21 +306,6 @@ Security/Eval:
|
|
|
371
306
|
Exclude:
|
|
372
307
|
- 'spec/moxml/xpath/ruby/generator_spec.rb'
|
|
373
308
|
|
|
374
|
-
# Offense count: 11
|
|
375
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
376
|
-
# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
|
|
377
|
-
# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
|
|
378
|
-
# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
|
|
379
|
-
# FunctionalMethods: let, let!, subject, watch
|
|
380
|
-
# AllowedMethods: lambda, proc, it
|
|
381
|
-
Style/BlockDelimiters:
|
|
382
|
-
Exclude:
|
|
383
|
-
- 'lib/moxml/adapter/ox.rb'
|
|
384
|
-
- 'spec/moxml/allocation_benchmark_spec.rb'
|
|
385
|
-
- 'spec/moxml/allocation_guard_spec.rb'
|
|
386
|
-
- 'spec/moxml/lazy_parse_spec.rb'
|
|
387
|
-
- 'spec/moxml/node_cache_spec.rb'
|
|
388
|
-
|
|
389
309
|
# Offense count: 1
|
|
390
310
|
Style/DocumentDynamicEvalDefinition:
|
|
391
311
|
Exclude:
|
|
@@ -404,25 +324,17 @@ Style/HashLikeCase:
|
|
|
404
324
|
- 'lib/moxml/adapter/customized_rexml/formatter.rb'
|
|
405
325
|
- 'lib/moxml/adapter/ox.rb'
|
|
406
326
|
|
|
407
|
-
# Offense count: 1
|
|
408
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
409
|
-
Style/MapToHash:
|
|
410
|
-
Exclude:
|
|
411
|
-
- 'spec/moxml/node_cache_spec.rb'
|
|
412
|
-
|
|
413
327
|
# Offense count: 1
|
|
414
328
|
Style/MissingRespondToMissing:
|
|
415
329
|
Exclude:
|
|
416
330
|
- 'lib/moxml/xpath/ruby/node.rb'
|
|
417
331
|
|
|
418
332
|
# Offense count: 1
|
|
419
|
-
# This cop supports
|
|
420
|
-
# Configuration parameters:
|
|
421
|
-
|
|
422
|
-
Style/NumericPredicate:
|
|
333
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
334
|
+
# Configuration parameters: AllowMethodComparison, ComparisonsThreshold.
|
|
335
|
+
Style/MultipleComparison:
|
|
423
336
|
Exclude:
|
|
424
|
-
- '
|
|
425
|
-
- 'lib/moxml/node_set.rb'
|
|
337
|
+
- 'lib/moxml/xpath/compiler.rb'
|
|
426
338
|
|
|
427
339
|
# Offense count: 5
|
|
428
340
|
# Configuration parameters: AllowedClasses.
|
|
@@ -440,16 +352,20 @@ Style/OptionalBooleanParameter:
|
|
|
440
352
|
- 'lib/moxml/adapter/libxml.rb'
|
|
441
353
|
- 'lib/moxml/xpath/compiler.rb'
|
|
442
354
|
|
|
443
|
-
# Offense count:
|
|
444
|
-
# This cop supports
|
|
445
|
-
Style/
|
|
355
|
+
# Offense count: 2
|
|
356
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
357
|
+
Style/RedundantAssignment:
|
|
446
358
|
Exclude:
|
|
447
359
|
- 'lib/moxml/adapter/ox.rb'
|
|
448
360
|
|
|
449
361
|
# Offense count: 1
|
|
450
362
|
# This cop supports safe autocorrection (--autocorrect).
|
|
451
|
-
|
|
452
|
-
# SupportedStyles: single_quotes, double_quotes
|
|
453
|
-
Style/StringLiterals:
|
|
363
|
+
Style/RedundantConstantBase:
|
|
454
364
|
Exclude:
|
|
455
|
-
- 'spec/moxml/
|
|
365
|
+
- 'spec/moxml/adapter/headed_ox_spec.rb'
|
|
366
|
+
|
|
367
|
+
# Offense count: 1
|
|
368
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
369
|
+
Style/SelectByKind:
|
|
370
|
+
Exclude:
|
|
371
|
+
- 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
|
data/README.adoc
CHANGED
|
@@ -887,6 +887,18 @@ The Ox adapter provides maximum parsing speed but has XPath limitations.
|
|
|
887
887
|
doc.xpath("//book").find { |book| book["id"] == "123" }
|
|
888
888
|
----
|
|
889
889
|
|
|
890
|
+
**Upstream Ox gem limitations:**
|
|
891
|
+
|
|
892
|
+
These limitations exist in the Ox gem itself and cannot be worked around in Moxml
|
|
893
|
+
without changes to the Ox C extension:
|
|
894
|
+
|
|
895
|
+
* *Namespace introspection* — Ox stores `xmlns` attributes but does not expose
|
|
896
|
+
namespace accessors on `Ox::Element`. Methods like `node.namespace`,
|
|
897
|
+
`node.namespaces`, and namespace inheritance are unavailable.
|
|
898
|
+
* *Parent node reparenting* — Ox has no method to change a node's parent after
|
|
899
|
+
creation, preventing `node.parent=` functionality. Nodes are immutable with
|
|
900
|
+
respect to their parent relationship.
|
|
901
|
+
|
|
890
902
|
For complete Ox adapter documentation including all limitations and workarounds,
|
|
891
903
|
see link:docs/_pages/adapters/ox.adoc[Ox Adapter Guide].
|
|
892
904
|
|
|
@@ -912,6 +924,12 @@ comprehensive pure Ruby XPath 1.0 engine.
|
|
|
912
924
|
* Prefer pure Ruby XPath for debugging
|
|
913
925
|
* Basic namespace queries are sufficient
|
|
914
926
|
|
|
927
|
+
**Inherited Ox limitations:**
|
|
928
|
+
|
|
929
|
+
HeadedOx inherits the upstream Ox gem limitations described above (namespace
|
|
930
|
+
introspection and parent node reparenting). Additionally, some sibling axes
|
|
931
|
+
are not fully supported due to Ox's tree structure.
|
|
932
|
+
|
|
915
933
|
[source,ruby]
|
|
916
934
|
----
|
|
917
935
|
# Use HeadedOx adapter
|
data/lib/moxml/adapter/base.rb
CHANGED
|
@@ -8,9 +8,54 @@ module Moxml
|
|
|
8
8
|
class Base
|
|
9
9
|
# include XmlUtils
|
|
10
10
|
|
|
11
|
+
# Entity marker for adapters that resolve entities during parsing.
|
|
12
|
+
# U+FFFC (Object Replacement Character) + U+FEFF (BOM) is a two-character
|
|
13
|
+
# sentinel chosen because this exact sequence followed by a valid entity
|
|
14
|
+
# name pattern is vanishingly unlikely in real XML content.
|
|
15
|
+
# Non-standard entities like © are converted to this marker before
|
|
16
|
+
# parsing, then restored during serialization.
|
|
17
|
+
# Standard XML entities (& < > " ') are NOT converted.
|
|
18
|
+
ENTITY_MARKER = "\u{FFFC}\u{FEFF}"
|
|
19
|
+
ENTITY_NAME_PATTERN = "[a-zA-Z_][\\w.:-]*"
|
|
20
|
+
ENTITY_NAME_RE = /&(#{ENTITY_NAME_PATTERN});/
|
|
21
|
+
ENTITY_MARKER_RE = /\u{FFFC}\u{FEFF}(#{ENTITY_NAME_PATTERN});/
|
|
22
|
+
SERIALIZED_ENTITY_MARKER_RE = /(#{ENTITY_NAME_PATTERN});/
|
|
23
|
+
STANDARD_ENTITIES = %w[amp lt gt quot apos].freeze
|
|
24
|
+
|
|
11
25
|
class << self
|
|
12
26
|
include XmlUtils
|
|
13
27
|
|
|
28
|
+
# Replace non-standard entity references with markers before parsing.
|
|
29
|
+
# Always returns a UTF-8 encoded string.
|
|
30
|
+
def preprocess_entities(xml)
|
|
31
|
+
return "" if xml.nil?
|
|
32
|
+
|
|
33
|
+
str = if xml.encoding == Encoding::BINARY
|
|
34
|
+
# Binary strings are assumed to be UTF-8. If the bytes are
|
|
35
|
+
# not valid UTF-8, fall back to encoding as UTF-8 with
|
|
36
|
+
# replacement to avoid raising on gsub.
|
|
37
|
+
dup = xml.dup.force_encoding("UTF-8")
|
|
38
|
+
dup.valid_encoding? ? dup : xml.dup.encode("UTF-8", "ASCII-8BIT", invalid: :replace, undef: :replace)
|
|
39
|
+
elsif xml.encoding == Encoding::UTF_8
|
|
40
|
+
xml
|
|
41
|
+
else
|
|
42
|
+
xml.encode("UTF-8")
|
|
43
|
+
end
|
|
44
|
+
str.gsub(ENTITY_NAME_RE) do |match|
|
|
45
|
+
STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Restore entity markers back to named entity references.
|
|
50
|
+
def restore_entities(text)
|
|
51
|
+
return text unless text.is_a?(String)
|
|
52
|
+
|
|
53
|
+
# Force UTF-8 encoding since markers are UTF-8 characters
|
|
54
|
+
str = text.encoding == Encoding::UTF_8 ? text : text.dup.force_encoding("UTF-8")
|
|
55
|
+
result = str.gsub(ENTITY_MARKER_RE, '&\1;')
|
|
56
|
+
result.gsub(SERIALIZED_ENTITY_MARKER_RE, '&\1;')
|
|
57
|
+
end
|
|
58
|
+
|
|
14
59
|
def set_root(_doc, _element)
|
|
15
60
|
raise Moxml::NotImplementedError.new(
|
|
16
61
|
"set_root not implemented",
|
|
@@ -163,6 +208,26 @@ namespace_validation_mode: :strict)
|
|
|
163
208
|
child_native
|
|
164
209
|
end
|
|
165
210
|
|
|
211
|
+
# Returns all namespaces in scope for this element, including
|
|
212
|
+
# inherited from ancestors. Adapters with native support (Nokogiri)
|
|
213
|
+
# override this. Default walks the ancestor chain.
|
|
214
|
+
def in_scope_namespaces(element)
|
|
215
|
+
namespaces = {}
|
|
216
|
+
node = element
|
|
217
|
+
|
|
218
|
+
while node
|
|
219
|
+
break unless node_type(node) == :element
|
|
220
|
+
|
|
221
|
+
namespace_definitions(node).each do |ns|
|
|
222
|
+
prefix = namespace_prefix(ns)
|
|
223
|
+
namespaces[prefix] = ns unless namespaces.key?(prefix)
|
|
224
|
+
end
|
|
225
|
+
node = parent(node)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
namespaces.values
|
|
229
|
+
end
|
|
230
|
+
|
|
166
231
|
protected
|
|
167
232
|
|
|
168
233
|
def create_native_element(_name, _owner_doc = nil)
|
|
@@ -236,14 +301,6 @@ namespace_validation_mode: :strict)
|
|
|
236
301
|
adapter: name,
|
|
237
302
|
)
|
|
238
303
|
end
|
|
239
|
-
|
|
240
|
-
def in_scope_namespaces(_element)
|
|
241
|
-
raise Moxml::NotImplementedError.new(
|
|
242
|
-
"in_scope_namespaces not implemented",
|
|
243
|
-
feature: "in_scope_namespaces",
|
|
244
|
-
adapter: name,
|
|
245
|
-
)
|
|
246
|
-
end
|
|
247
304
|
end
|
|
248
305
|
end
|
|
249
306
|
end
|
|
@@ -30,8 +30,9 @@ module Moxml
|
|
|
30
30
|
# ~176K allocations per 100-element parse). Lazy parse defers wrapper
|
|
31
31
|
# creation until nodes are accessed, matching Ox adapter behavior.
|
|
32
32
|
def parse(xml, options = {}, _context = nil)
|
|
33
|
+
processed_xml = preprocess_entities(xml)
|
|
33
34
|
native_doc = begin
|
|
34
|
-
result = ::Ox.parse(
|
|
35
|
+
result = ::Ox.parse(processed_xml)
|
|
35
36
|
|
|
36
37
|
# result can be either Document or Element
|
|
37
38
|
if result.is_a?(::Ox::Document)
|
data/lib/moxml/adapter/libxml.rb
CHANGED
|
@@ -56,6 +56,11 @@ module Moxml
|
|
|
56
56
|
xml.to_s
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
+
# Preprocess entities before parsing.
|
|
60
|
+
# This converts the string to UTF-8; LibXML will use the encoding
|
|
61
|
+
# parameter or XML declaration for byte interpretation.
|
|
62
|
+
xml_string = preprocess_entities(xml_string)
|
|
63
|
+
|
|
59
64
|
# Extract DOCTYPE before parsing
|
|
60
65
|
doctype_match = xml_string.match(/<!DOCTYPE\s+(\S+)(?:\s+PUBLIC\s+"([^"]+)"\s+"([^"]+)"| \s+SYSTEM\s+"([^"]+)")?\s*>/i)
|
|
61
66
|
|
|
@@ -289,9 +294,6 @@ module Moxml
|
|
|
289
294
|
result = []
|
|
290
295
|
if native_node.children?
|
|
291
296
|
native_node.each_child do |child|
|
|
292
|
-
# Skip whitespace-only text nodes
|
|
293
|
-
next if child.text? && child.content.to_s.strip.empty?
|
|
294
|
-
|
|
295
297
|
result << patch_node(child)
|
|
296
298
|
end
|
|
297
299
|
end
|
|
@@ -842,9 +844,17 @@ module Moxml
|
|
|
842
844
|
return [] unless native_node
|
|
843
845
|
return [] unless native_node.is_a?(::LibXML::XML::Node)
|
|
844
846
|
|
|
845
|
-
native_node.namespaces
|
|
846
|
-
|
|
847
|
-
|
|
847
|
+
namespaces = native_node.namespaces
|
|
848
|
+
return [] unless namespaces
|
|
849
|
+
|
|
850
|
+
namespace_list =
|
|
851
|
+
if namespaces.respond_to?(:definitions)
|
|
852
|
+
namespaces.definitions
|
|
853
|
+
else
|
|
854
|
+
namespaces
|
|
855
|
+
end
|
|
856
|
+
|
|
857
|
+
namespace_list.to_a
|
|
848
858
|
end
|
|
849
859
|
|
|
850
860
|
# Doctype accessor methods
|
|
@@ -16,14 +16,18 @@ module Moxml
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def parse(xml, options = {}, _context = nil)
|
|
19
|
+
processed_xml = preprocess_entities(xml)
|
|
20
|
+
|
|
21
|
+
# preprocess_entities always returns UTF-8, so tell Nokogiri to
|
|
22
|
+
# parse as UTF-8 regardless of any original encoding option.
|
|
19
23
|
native_doc = begin
|
|
20
24
|
if options[:fragment]
|
|
21
|
-
::Nokogiri::XML::DocumentFragment.parse(
|
|
25
|
+
::Nokogiri::XML::DocumentFragment.parse(processed_xml) do |config|
|
|
22
26
|
config.strict.nonet
|
|
23
27
|
config.recover unless options[:strict]
|
|
24
28
|
end
|
|
25
29
|
else
|
|
26
|
-
::Nokogiri::XML(
|
|
30
|
+
::Nokogiri::XML(processed_xml, nil, "UTF-8") do |config|
|
|
27
31
|
config.strict.nonet
|
|
28
32
|
config.recover unless options[:strict]
|
|
29
33
|
end
|
|
@@ -178,10 +182,12 @@ module Moxml
|
|
|
178
182
|
end
|
|
179
183
|
|
|
180
184
|
def children(node)
|
|
181
|
-
node.children
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
+
node.children
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def adjacent_to_entity_reference?(node)
|
|
189
|
+
node.previous_sibling.is_a?(::Nokogiri::XML::EntityReference) ||
|
|
190
|
+
node.next_sibling.is_a?(::Nokogiri::XML::EntityReference)
|
|
185
191
|
end
|
|
186
192
|
|
|
187
193
|
def replace_children(node, new_children)
|
|
@@ -288,7 +294,7 @@ module Moxml
|
|
|
288
294
|
end
|
|
289
295
|
|
|
290
296
|
def text_content(node)
|
|
291
|
-
node.text
|
|
297
|
+
node.text.to_s
|
|
292
298
|
end
|
|
293
299
|
|
|
294
300
|
def inner_text(node)
|