moxml 0.1.19 → 0.1.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +181 -11
- data/docs/_guides/node-api-consistency.adoc +4 -0
- data/lib/moxml/adapter/base.rb +11 -1
- data/lib/moxml/adapter/customized_libxml/node.rb +3 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +6 -1
- data/lib/moxml/adapter/customized_ox/text.rb +15 -2
- data/lib/moxml/adapter/customized_rexml/formatter.rb +1 -0
- data/lib/moxml/adapter/libxml/entity_ref_registry.rb +105 -0
- data/lib/moxml/adapter/libxml/entity_restorer.rb +92 -0
- data/lib/moxml/adapter/libxml.rb +383 -359
- data/lib/moxml/adapter/oga.rb +6 -2
- data/lib/moxml/adapter/ox.rb +15 -8
- data/lib/moxml/builder.rb +12 -5
- data/lib/moxml/entity_registry.rb +1 -0
- data/lib/moxml/text.rb +4 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +2 -1
- data/spec/integration/shared_examples/edge_cases.rb +4 -2
- data/spec/integration/shared_examples/entity_reference_whitespace.rb +1 -1
- data/spec/integration/shared_examples/high_level/document_builder_behavior.rb +3 -1
- data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +10 -4
- data/spec/integration/shared_examples/node_wrappers/node_set_behavior.rb +1 -1
- data/spec/moxml/adapter/headed_ox_spec.rb +1 -1
- data/spec/moxml/adapter/libxml_internals_spec.rb +167 -0
- data/spec/moxml/text_spec.rb +23 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +2 -2
- data/spec/performance/benchmark_spec.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cd873f36f1ee8d7799299cedbc4bfa7da00d588e311693be8b6e718b9e09fa8d
|
|
4
|
+
data.tar.gz: 2597df5af105dfcfdc84586b98473e99af95681fbf1ef24b0c2c6698280e6dbb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b39f087ced4fc9ea76722e32c8b036ee7e4819798bec248af520f51bb20039cb76459837e1b5a65540f46bf7112ee5922eece22cac763ef25b3235a0c5ca60f6
|
|
7
|
+
data.tar.gz: 75847baca549e9cb0902203f2d8b9f8e4e764c048dc0cd57260d6995f1aa233c5db6ecdb6786344cdfb097b4ec73a088d1c12b4d79b2fcb5217c936ba4605ec1
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,11 +1,65 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-
|
|
3
|
+
# on 2026-05-03 12:53:32 UTC using RuboCop version 1.86.0.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
|
8
8
|
|
|
9
|
+
# Offense count: 5
|
|
10
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
11
|
+
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
12
|
+
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
13
|
+
Layout/ArgumentAlignment:
|
|
14
|
+
Exclude:
|
|
15
|
+
- 'lib/moxml/adapter/base.rb'
|
|
16
|
+
- 'lib/moxml/adapter/libxml.rb'
|
|
17
|
+
- 'lib/moxml/builder.rb'
|
|
18
|
+
|
|
19
|
+
# Offense count: 2
|
|
20
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
21
|
+
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
22
|
+
# SupportedStyles: with_first_element, with_fixed_indentation
|
|
23
|
+
Layout/ArrayAlignment:
|
|
24
|
+
Exclude:
|
|
25
|
+
- 'lib/moxml/xpath/compiler.rb'
|
|
26
|
+
|
|
27
|
+
# Offense count: 9
|
|
28
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
29
|
+
# Configuration parameters: EnforcedStyleAlignWith.
|
|
30
|
+
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
31
|
+
Layout/BlockAlignment:
|
|
32
|
+
Exclude:
|
|
33
|
+
- 'lib/moxml/adapter/libxml.rb'
|
|
34
|
+
- 'lib/moxml/adapter/ox.rb'
|
|
35
|
+
- 'spec/integration/shared_examples/edge_cases.rb'
|
|
36
|
+
- 'spec/integration/shared_examples/high_level/document_builder_behavior.rb'
|
|
37
|
+
- 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
|
|
38
|
+
- 'spec/moxml/xpath/functions/node_functions_spec.rb'
|
|
39
|
+
|
|
40
|
+
# Offense count: 9
|
|
41
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
42
|
+
Layout/BlockEndNewline:
|
|
43
|
+
Exclude:
|
|
44
|
+
- 'lib/moxml/adapter/libxml.rb'
|
|
45
|
+
- 'lib/moxml/adapter/ox.rb'
|
|
46
|
+
- 'spec/integration/shared_examples/edge_cases.rb'
|
|
47
|
+
- 'spec/integration/shared_examples/high_level/document_builder_behavior.rb'
|
|
48
|
+
- 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
|
|
49
|
+
- 'spec/moxml/xpath/functions/node_functions_spec.rb'
|
|
50
|
+
|
|
51
|
+
# Offense count: 2
|
|
52
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
53
|
+
Layout/ClosingParenthesisIndentation:
|
|
54
|
+
Exclude:
|
|
55
|
+
- 'lib/moxml/adapter/oga.rb'
|
|
56
|
+
|
|
57
|
+
# Offense count: 1
|
|
58
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
59
|
+
Layout/ElseAlignment:
|
|
60
|
+
Exclude:
|
|
61
|
+
- 'lib/moxml/adapter/base.rb'
|
|
62
|
+
|
|
9
63
|
# Offense count: 4
|
|
10
64
|
# This cop supports safe autocorrection (--autocorrect).
|
|
11
65
|
Layout/EmptyLineAfterGuardClause:
|
|
@@ -27,13 +81,67 @@ Layout/EmptyLines:
|
|
|
27
81
|
Exclude:
|
|
28
82
|
- 'lib/moxml/adapter/ox.rb'
|
|
29
83
|
|
|
30
|
-
# Offense count:
|
|
84
|
+
# Offense count: 2
|
|
85
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
86
|
+
Layout/EmptyLinesAroundMethodBody:
|
|
87
|
+
Exclude:
|
|
88
|
+
- 'lib/moxml/adapter/ox.rb'
|
|
89
|
+
|
|
90
|
+
# Offense count: 1
|
|
91
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
92
|
+
# Configuration parameters: EnforcedStyleAlignWith.
|
|
93
|
+
# SupportedStylesAlignWith: keyword, variable, start_of_line
|
|
94
|
+
Layout/EndAlignment:
|
|
95
|
+
Exclude:
|
|
96
|
+
- 'lib/moxml/adapter/base.rb'
|
|
97
|
+
|
|
98
|
+
# Offense count: 2
|
|
99
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
100
|
+
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
101
|
+
# SupportedStyles: consistent, consistent_relative_to_receiver, special_for_inner_method_call, special_for_inner_method_call_in_parentheses
|
|
102
|
+
Layout/FirstArgumentIndentation:
|
|
103
|
+
Exclude:
|
|
104
|
+
- 'lib/moxml/adapter/oga.rb'
|
|
105
|
+
|
|
106
|
+
# Offense count: 2
|
|
107
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
108
|
+
# Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
|
|
109
|
+
# SupportedHashRocketStyles: key, separator, table
|
|
110
|
+
# SupportedColonStyles: key, separator, table
|
|
111
|
+
# SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
|
|
112
|
+
Layout/HashAlignment:
|
|
113
|
+
Exclude:
|
|
114
|
+
- 'lib/moxml/builder.rb'
|
|
115
|
+
|
|
116
|
+
# Offense count: 20
|
|
117
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
118
|
+
# Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
|
|
119
|
+
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
120
|
+
Layout/IndentationWidth:
|
|
121
|
+
Exclude:
|
|
122
|
+
- 'lib/moxml/adapter/base.rb'
|
|
123
|
+
- 'lib/moxml/adapter/libxml.rb'
|
|
124
|
+
- 'lib/moxml/adapter/ox.rb'
|
|
125
|
+
- 'spec/integration/shared_examples/edge_cases.rb'
|
|
126
|
+
- 'spec/integration/shared_examples/high_level/document_builder_behavior.rb'
|
|
127
|
+
- 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
|
|
128
|
+
- 'spec/moxml/xpath/functions/node_functions_spec.rb'
|
|
129
|
+
|
|
130
|
+
# Offense count: 344
|
|
31
131
|
# This cop supports safe autocorrection (--autocorrect).
|
|
32
132
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
33
133
|
# URISchemes: http, https
|
|
34
134
|
Layout/LineLength:
|
|
35
135
|
Enabled: false
|
|
36
136
|
|
|
137
|
+
# Offense count: 2
|
|
138
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
139
|
+
# Configuration parameters: EnforcedStyle.
|
|
140
|
+
# SupportedStyles: symmetrical, new_line, same_line
|
|
141
|
+
Layout/MultilineMethodCallBraceLayout:
|
|
142
|
+
Exclude:
|
|
143
|
+
- 'lib/moxml/adapter/oga.rb'
|
|
144
|
+
|
|
37
145
|
# Offense count: 1
|
|
38
146
|
# This cop supports safe autocorrection (--autocorrect).
|
|
39
147
|
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
@@ -42,6 +150,17 @@ Layout/MultilineOperationIndentation:
|
|
|
42
150
|
Exclude:
|
|
43
151
|
- 'lib/moxml/adapter/ox.rb'
|
|
44
152
|
|
|
153
|
+
# Offense count: 10
|
|
154
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
155
|
+
# Configuration parameters: AllowInHeredoc.
|
|
156
|
+
Layout/TrailingWhitespace:
|
|
157
|
+
Exclude:
|
|
158
|
+
- 'lib/moxml/adapter/base.rb'
|
|
159
|
+
- 'lib/moxml/adapter/libxml.rb'
|
|
160
|
+
- 'lib/moxml/adapter/ox.rb'
|
|
161
|
+
- 'lib/moxml/builder.rb'
|
|
162
|
+
- 'lib/moxml/xpath/compiler.rb'
|
|
163
|
+
|
|
45
164
|
# Offense count: 7
|
|
46
165
|
# Configuration parameters: AllowedMethods.
|
|
47
166
|
# AllowedMethods: enums
|
|
@@ -61,11 +180,10 @@ Lint/DuplicateBranch:
|
|
|
61
180
|
- 'lib/moxml/document.rb'
|
|
62
181
|
- 'lib/moxml/entity_registry.rb'
|
|
63
182
|
|
|
64
|
-
# Offense count:
|
|
183
|
+
# Offense count: 4
|
|
65
184
|
Lint/DuplicateMethods:
|
|
66
185
|
Exclude:
|
|
67
186
|
- 'lib/moxml/config.rb'
|
|
68
|
-
- 'lib/moxml/element.rb'
|
|
69
187
|
- 'lib/moxml/node.rb'
|
|
70
188
|
|
|
71
189
|
# Offense count: 4
|
|
@@ -91,7 +209,7 @@ Lint/EmptyWhen:
|
|
|
91
209
|
# Offense count: 3
|
|
92
210
|
Lint/HashCompareByIdentity:
|
|
93
211
|
Exclude:
|
|
94
|
-
- 'lib/moxml/native_attachment.rb'
|
|
212
|
+
- 'lib/moxml/native_attachment/native.rb'
|
|
95
213
|
|
|
96
214
|
# Offense count: 1
|
|
97
215
|
Lint/IneffectiveAccessModifier:
|
|
@@ -127,12 +245,12 @@ Metrics/BlockLength:
|
|
|
127
245
|
Metrics/BlockNesting:
|
|
128
246
|
Max: 4
|
|
129
247
|
|
|
130
|
-
# Offense count:
|
|
248
|
+
# Offense count: 75
|
|
131
249
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
132
250
|
Metrics/CyclomaticComplexity:
|
|
133
251
|
Enabled: false
|
|
134
252
|
|
|
135
|
-
# Offense count:
|
|
253
|
+
# Offense count: 188
|
|
136
254
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
137
255
|
Metrics/MethodLength:
|
|
138
256
|
Max: 110
|
|
@@ -180,6 +298,12 @@ Naming/VariableNumber:
|
|
|
180
298
|
- 'spec/moxml/allocation_guard_spec.rb'
|
|
181
299
|
- 'spec/support/allocation_helper.rb'
|
|
182
300
|
|
|
301
|
+
# Offense count: 1
|
|
302
|
+
# Configuration parameters: MinSize.
|
|
303
|
+
Performance/CollectionLiteralInLoop:
|
|
304
|
+
Exclude:
|
|
305
|
+
- 'lib/moxml/xpath/compiler.rb'
|
|
306
|
+
|
|
183
307
|
# Offense count: 5
|
|
184
308
|
RSpec/BeforeAfterAll:
|
|
185
309
|
Exclude:
|
|
@@ -205,12 +329,12 @@ RSpec/ContextWording:
|
|
|
205
329
|
- 'spec/moxml/xpath/parser_spec.rb'
|
|
206
330
|
- 'spec/performance/benchmark_spec.rb'
|
|
207
331
|
|
|
208
|
-
# Offense count:
|
|
332
|
+
# Offense count: 25
|
|
209
333
|
# Configuration parameters: IgnoredMetadata.
|
|
210
334
|
RSpec/DescribeClass:
|
|
211
335
|
Enabled: false
|
|
212
336
|
|
|
213
|
-
# Offense count:
|
|
337
|
+
# Offense count: 328
|
|
214
338
|
# Configuration parameters: CountAsOne.
|
|
215
339
|
RSpec/ExampleLength:
|
|
216
340
|
Max: 64
|
|
@@ -240,13 +364,13 @@ RSpec/LeakyConstantDeclaration:
|
|
|
240
364
|
- 'spec/moxml/declaration_preservation_spec.rb'
|
|
241
365
|
- 'spec/moxml/sax_spec.rb'
|
|
242
366
|
|
|
243
|
-
# Offense count:
|
|
367
|
+
# Offense count: 4
|
|
244
368
|
# Configuration parameters: .
|
|
245
369
|
# SupportedStyles: have_received, receive
|
|
246
370
|
RSpec/MessageSpies:
|
|
247
371
|
EnforcedStyle: receive
|
|
248
372
|
|
|
249
|
-
# Offense count:
|
|
373
|
+
# Offense count: 414
|
|
250
374
|
RSpec/MultipleExpectations:
|
|
251
375
|
Max: 10
|
|
252
376
|
|
|
@@ -306,6 +430,22 @@ Security/Eval:
|
|
|
306
430
|
Exclude:
|
|
307
431
|
- 'spec/moxml/xpath/ruby/generator_spec.rb'
|
|
308
432
|
|
|
433
|
+
# Offense count: 12
|
|
434
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
435
|
+
# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
|
|
436
|
+
# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
|
|
437
|
+
# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
|
|
438
|
+
# FunctionalMethods: let, let!, subject, watch
|
|
439
|
+
# AllowedMethods: lambda, proc, it
|
|
440
|
+
Style/BlockDelimiters:
|
|
441
|
+
Exclude:
|
|
442
|
+
- 'lib/moxml/adapter/libxml.rb'
|
|
443
|
+
- 'lib/moxml/adapter/ox.rb'
|
|
444
|
+
- 'spec/integration/shared_examples/edge_cases.rb'
|
|
445
|
+
- 'spec/integration/shared_examples/high_level/document_builder_behavior.rb'
|
|
446
|
+
- 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
|
|
447
|
+
- 'spec/moxml/xpath/functions/node_functions_spec.rb'
|
|
448
|
+
|
|
309
449
|
# Offense count: 1
|
|
310
450
|
Style/DocumentDynamicEvalDefinition:
|
|
311
451
|
Exclude:
|
|
@@ -329,6 +469,18 @@ Style/MissingRespondToMissing:
|
|
|
329
469
|
Exclude:
|
|
330
470
|
- 'lib/moxml/xpath/ruby/node.rb'
|
|
331
471
|
|
|
472
|
+
# Offense count: 1
|
|
473
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
474
|
+
Style/MultilineIfModifier:
|
|
475
|
+
Exclude:
|
|
476
|
+
- 'lib/moxml/builder.rb'
|
|
477
|
+
|
|
478
|
+
# Offense count: 1
|
|
479
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
480
|
+
Style/MultilineTernaryOperator:
|
|
481
|
+
Exclude:
|
|
482
|
+
- 'lib/moxml/adapter/base.rb'
|
|
483
|
+
|
|
332
484
|
# Offense count: 1
|
|
333
485
|
# This cop supports safe autocorrection (--autocorrect).
|
|
334
486
|
# Configuration parameters: AllowMethodComparison, ComparisonsThreshold.
|
|
@@ -365,7 +517,25 @@ Style/RedundantConstantBase:
|
|
|
365
517
|
- 'spec/moxml/adapter/headed_ox_spec.rb'
|
|
366
518
|
|
|
367
519
|
# Offense count: 1
|
|
520
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
521
|
+
Style/RedundantParentheses:
|
|
522
|
+
Exclude:
|
|
523
|
+
- 'lib/moxml/xpath/compiler.rb'
|
|
524
|
+
|
|
525
|
+
# Offense count: 8
|
|
368
526
|
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
369
527
|
Style/SelectByKind:
|
|
370
528
|
Exclude:
|
|
529
|
+
- 'spec/integration/shared_examples/edge_cases.rb'
|
|
530
|
+
- 'spec/integration/shared_examples/entity_reference_whitespace.rb'
|
|
371
531
|
- 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
|
|
532
|
+
- 'spec/integration/shared_examples/node_wrappers/node_set_behavior.rb'
|
|
533
|
+
- 'spec/moxml/xpath/functions/node_functions_spec.rb'
|
|
534
|
+
|
|
535
|
+
# Offense count: 2
|
|
536
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
537
|
+
# Configuration parameters: EnforcedStyle, MinSize.
|
|
538
|
+
# SupportedStyles: percent, brackets
|
|
539
|
+
Style/SymbolArray:
|
|
540
|
+
Exclude:
|
|
541
|
+
- 'lib/moxml/xpath/compiler.rb'
|
data/lib/moxml/adapter/base.rb
CHANGED
|
@@ -35,12 +35,22 @@ module Moxml
|
|
|
35
35
|
# not valid UTF-8, fall back to encoding as UTF-8 with
|
|
36
36
|
# replacement to avoid raising on gsub.
|
|
37
37
|
dup = xml.dup.force_encoding("UTF-8")
|
|
38
|
-
dup.valid_encoding?
|
|
38
|
+
if dup.valid_encoding?
|
|
39
|
+
dup
|
|
40
|
+
else
|
|
41
|
+
xml.dup.encode("UTF-8",
|
|
42
|
+
"ASCII-8BIT", invalid: :replace, undef: :replace)
|
|
43
|
+
end
|
|
39
44
|
elsif xml.encoding == Encoding::UTF_8
|
|
40
45
|
xml
|
|
41
46
|
else
|
|
42
47
|
xml.encode("UTF-8")
|
|
43
48
|
end
|
|
49
|
+
# Fast path: no `&` means no entity references to mark — skip
|
|
50
|
+
# the regex scan and string allocation entirely. The vast
|
|
51
|
+
# majority of XML payloads contain no entity references.
|
|
52
|
+
return str unless str.include?("&")
|
|
53
|
+
|
|
44
54
|
str.gsub(ENTITY_NAME_RE) do |match|
|
|
45
55
|
STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
|
|
46
56
|
end
|
|
@@ -8,6 +8,9 @@ module Moxml
|
|
|
8
8
|
# This wrapper hides LibXML's strict document ownership model,
|
|
9
9
|
# allowing nodes to be moved between documents transparently.
|
|
10
10
|
# Similar pattern to Ox adapter's customized classes.
|
|
11
|
+
#
|
|
12
|
+
# The Libxml adapter owns wrapper type mapping in one place so the
|
|
13
|
+
# wrapper classes do not duplicate node-type knowledge.
|
|
11
14
|
class Node
|
|
12
15
|
attr_reader :native
|
|
13
16
|
|
|
@@ -19,7 +19,12 @@ module Moxml
|
|
|
19
19
|
# LibXML's .content already contains escaped text, but it over-escapes
|
|
20
20
|
# quotes which don't need escaping in text nodes (only in attributes)
|
|
21
21
|
def to_xml
|
|
22
|
-
@native.content
|
|
22
|
+
content = @native.content
|
|
23
|
+
# Skip the gsub allocation entirely when there's nothing to undo —
|
|
24
|
+
# the common case for parsed text without literal quotes.
|
|
25
|
+
return content unless content.include?(""")
|
|
26
|
+
|
|
27
|
+
content.gsub(""", '"')
|
|
23
28
|
end
|
|
24
29
|
end
|
|
25
30
|
end
|
|
@@ -3,8 +3,21 @@
|
|
|
3
3
|
module Moxml
|
|
4
4
|
module Adapter
|
|
5
5
|
module CustomizedOx
|
|
6
|
-
# Ox uses Strings, but a
|
|
7
|
-
|
|
6
|
+
# Ox uses Strings for text content, but a String cannot carry a @parent
|
|
7
|
+
# back-reference. We subclass ::Ox::Node so a Text wrapper can hold one.
|
|
8
|
+
#
|
|
9
|
+
# ::Ox::Node subclasses that are neither ::Ox::Element nor ::Ox::Document
|
|
10
|
+
# are unknown to Ox.dump's standard XML emitter, so they fall through to
|
|
11
|
+
# Ox's generic object-marshalling format. The serializer in
|
|
12
|
+
# Moxml::Adapter::Ox#serialize special-cases this class to emit the value
|
|
13
|
+
# with proper XML escaping. The #to_s override ensures string
|
|
14
|
+
# interpolation (`"#{text}"`) produces the text content rather than the
|
|
15
|
+
# default Object representation.
|
|
16
|
+
class Text < ::Ox::Node
|
|
17
|
+
def to_s
|
|
18
|
+
value.to_s
|
|
19
|
+
end
|
|
20
|
+
end
|
|
8
21
|
end
|
|
9
22
|
end
|
|
10
23
|
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module Adapter
|
|
5
|
+
class Libxml < Base
|
|
6
|
+
# Tracks entity-reference insertions that cannot live in LibXML's native
|
|
7
|
+
# node tree, plus the child sequence needed to serialize them in order.
|
|
8
|
+
class EntityRefRegistry
|
|
9
|
+
ENTITY_REFS_KEY = :_entity_ref_pairs
|
|
10
|
+
CHILD_SEQUENCE_KEY = :_child_seq_pairs
|
|
11
|
+
NON_WHITESPACE_RE = /\S/
|
|
12
|
+
private_constant :ENTITY_REFS_KEY, :CHILD_SEQUENCE_KEY, :NON_WHITESPACE_RE
|
|
13
|
+
|
|
14
|
+
def initialize(attachments, doc)
|
|
15
|
+
@attachments = attachments
|
|
16
|
+
@doc = doc
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def active?
|
|
20
|
+
@doc ? @attachments.key?(@doc, ENTITY_REFS_KEY) : false
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def register(element, ref)
|
|
24
|
+
return unless @doc && element
|
|
25
|
+
|
|
26
|
+
path = path_for(element)
|
|
27
|
+
|
|
28
|
+
refs_by_path = @attachments.get(@doc, ENTITY_REFS_KEY) || {}
|
|
29
|
+
(refs_by_path[path] ||= []) << ref
|
|
30
|
+
@attachments.set(@doc, ENTITY_REFS_KEY, refs_by_path)
|
|
31
|
+
|
|
32
|
+
seq_by_path = @attachments.get(@doc, CHILD_SEQUENCE_KEY) || {}
|
|
33
|
+
existing = seq_by_path[path]
|
|
34
|
+
if existing
|
|
35
|
+
existing << :eref
|
|
36
|
+
else
|
|
37
|
+
seq_by_path[path] = Array.new(count_native_children(element), :native)
|
|
38
|
+
seq_by_path[path] << :eref
|
|
39
|
+
@attachments.set(@doc, CHILD_SEQUENCE_KEY, seq_by_path)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def append_native(element)
|
|
44
|
+
return unless @doc && element
|
|
45
|
+
|
|
46
|
+
seq_by_path = @attachments.get(@doc, CHILD_SEQUENCE_KEY)
|
|
47
|
+
return unless seq_by_path
|
|
48
|
+
|
|
49
|
+
seq = seq_by_path[path_for(element)]
|
|
50
|
+
return unless seq
|
|
51
|
+
|
|
52
|
+
seq << :native
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def refs_for(element)
|
|
56
|
+
return nil unless @doc && element
|
|
57
|
+
|
|
58
|
+
refs_by_path = @attachments.get(@doc, ENTITY_REFS_KEY)
|
|
59
|
+
refs_by_path && refs_by_path[path_for(element)]
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def sequence_for(element)
|
|
63
|
+
return nil unless @doc && element
|
|
64
|
+
|
|
65
|
+
seq_by_path = @attachments.get(@doc, CHILD_SEQUENCE_KEY)
|
|
66
|
+
seq_by_path && seq_by_path[path_for(element)]
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def serialization_for(element)
|
|
70
|
+
refs = refs_for(element)
|
|
71
|
+
return [nil, nil] unless refs && !refs.empty?
|
|
72
|
+
|
|
73
|
+
seq = sequence_for(element)
|
|
74
|
+
return [nil, nil] unless seq
|
|
75
|
+
|
|
76
|
+
[refs, seq]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
def path_for(element)
|
|
82
|
+
element.path
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def count_native_children(element)
|
|
86
|
+
return 0 unless element.is_a?(::LibXML::XML::Node) && element.children?
|
|
87
|
+
|
|
88
|
+
count = 0
|
|
89
|
+
element.each_child do |child|
|
|
90
|
+
count += 1 unless blank_text_node?(child)
|
|
91
|
+
end
|
|
92
|
+
count
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def blank_text_node?(child)
|
|
96
|
+
child.text? && blank_content?(child.content)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def blank_content?(content)
|
|
100
|
+
content.nil? || !content.match?(NON_WHITESPACE_RE)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module Adapter
|
|
5
|
+
class Libxml < Base
|
|
6
|
+
# Restores configured character entities into explicit Moxml
|
|
7
|
+
# EntityReference nodes after LibXML has parsed the native tree.
|
|
8
|
+
class EntityRestorer
|
|
9
|
+
def initialize(doc, adapter: Libxml)
|
|
10
|
+
@doc = doc
|
|
11
|
+
@ctx = doc.context
|
|
12
|
+
@registry = @ctx.entity_registry
|
|
13
|
+
@config = @ctx.config
|
|
14
|
+
@adapter = adapter
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def run
|
|
18
|
+
return unless @registry && @doc.root
|
|
19
|
+
|
|
20
|
+
walk(@doc.root)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def walk(element)
|
|
26
|
+
# Snapshot because we may add/remove siblings during the walk.
|
|
27
|
+
element.children.to_a.each do |child|
|
|
28
|
+
if child.is_a?(::Moxml::Text)
|
|
29
|
+
restore_text_node(child)
|
|
30
|
+
elsif child.is_a?(::Moxml::Element)
|
|
31
|
+
walk(child)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Matches DocumentBuilder's previous behavior, including the libxml
|
|
37
|
+
# limitation that adjacent native text nodes get merged.
|
|
38
|
+
def restore_text_node(text_node)
|
|
39
|
+
content = text_node.content
|
|
40
|
+
return unless content
|
|
41
|
+
|
|
42
|
+
chunks = chunk_text(content)
|
|
43
|
+
return if chunks.size == 1 && chunks.first.first == :text
|
|
44
|
+
|
|
45
|
+
parent = text_node.parent
|
|
46
|
+
return unless parent
|
|
47
|
+
|
|
48
|
+
text_node.remove
|
|
49
|
+
chunks.each { |type, payload| append_chunk(parent, type, payload) }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def chunk_text(content)
|
|
53
|
+
chunks = []
|
|
54
|
+
buffer = +""
|
|
55
|
+
restorable = @registry.restorable_codepoints
|
|
56
|
+
|
|
57
|
+
content.each_char do |char|
|
|
58
|
+
cp = char.ord
|
|
59
|
+
if restorable.include?(cp) &&
|
|
60
|
+
(name = @registry.primary_name_for_codepoint(cp)) &&
|
|
61
|
+
@registry.should_restore?(cp, config: @config)
|
|
62
|
+
unless buffer.empty?
|
|
63
|
+
chunks << [:text, buffer.dup]
|
|
64
|
+
buffer.clear
|
|
65
|
+
end
|
|
66
|
+
chunks << [:eref, name]
|
|
67
|
+
else
|
|
68
|
+
buffer << char
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
chunks << [:text, buffer.dup] unless buffer.empty?
|
|
73
|
+
chunks
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def append_chunk(parent, type, payload)
|
|
77
|
+
case type
|
|
78
|
+
when :text
|
|
79
|
+
parent.add_child(::Moxml::Text.new(@adapter.create_native_text(payload), @ctx))
|
|
80
|
+
when :eref
|
|
81
|
+
parent.add_child(
|
|
82
|
+
::Moxml::EntityReference.new(
|
|
83
|
+
@adapter.create_native_entity_reference(payload),
|
|
84
|
+
@ctx,
|
|
85
|
+
),
|
|
86
|
+
)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|