moxml 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 76508cc0d2699469ade87f48e38ea73289abc4f95cb7a313f6d71548477248f3
4
- data.tar.gz: 1752cb433953a869cedd1d88fc565c53f77bd08201c5977247150e1e7b75a386
3
+ metadata.gz: cd873f36f1ee8d7799299cedbc4bfa7da00d588e311693be8b6e718b9e09fa8d
4
+ data.tar.gz: 2597df5af105dfcfdc84586b98473e99af95681fbf1ef24b0c2c6698280e6dbb
5
5
  SHA512:
6
- metadata.gz: a4219577f2e00a9e4f00f1ce1bbaf4f03d6c841dfe8adb9983162a6389b1e276725984dc010f451cac55fbd8049a81451d641fc073fc1cf8417f90ee6e50cdf5
7
- data.tar.gz: 44f3e08b174e2689fed3a60ee3ef0b805fdc14cbeeb905ebde15e5241be3e3f168441dc95afe99dd244fc1047c581fce912ec3e061299e4e101f63329f00def9
6
+ metadata.gz: b39f087ced4fc9ea76722e32c8b036ee7e4819798bec248af520f51bb20039cb76459837e1b5a65540f46bf7112ee5922eece22cac763ef25b3235a0c5ca60f6
7
+ data.tar.gz: 75847baca549e9cb0902203f2d8b9f8e4e764c048dc0cd57260d6995f1aa233c5db6ecdb6786344cdfb097b4ec73a088d1c12b4d79b2fcb5217c936ba4605ec1
data/.rubocop_todo.yml CHANGED
@@ -1,11 +1,65 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-04-23 07:48:23 UTC using RuboCop version 1.86.0.
3
+ # on 2026-05-03 12:53:32 UTC using RuboCop version 1.86.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
+ # Offense count: 5
10
+ # This cop supports safe autocorrection (--autocorrect).
11
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
12
+ # SupportedStyles: with_first_argument, with_fixed_indentation
13
+ Layout/ArgumentAlignment:
14
+ Exclude:
15
+ - 'lib/moxml/adapter/base.rb'
16
+ - 'lib/moxml/adapter/libxml.rb'
17
+ - 'lib/moxml/builder.rb'
18
+
19
+ # Offense count: 2
20
+ # This cop supports safe autocorrection (--autocorrect).
21
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
22
+ # SupportedStyles: with_first_element, with_fixed_indentation
23
+ Layout/ArrayAlignment:
24
+ Exclude:
25
+ - 'lib/moxml/xpath/compiler.rb'
26
+
27
+ # Offense count: 9
28
+ # This cop supports safe autocorrection (--autocorrect).
29
+ # Configuration parameters: EnforcedStyleAlignWith.
30
+ # SupportedStylesAlignWith: either, start_of_block, start_of_line
31
+ Layout/BlockAlignment:
32
+ Exclude:
33
+ - 'lib/moxml/adapter/libxml.rb'
34
+ - 'lib/moxml/adapter/ox.rb'
35
+ - 'spec/integration/shared_examples/edge_cases.rb'
36
+ - 'spec/integration/shared_examples/high_level/document_builder_behavior.rb'
37
+ - 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
38
+ - 'spec/moxml/xpath/functions/node_functions_spec.rb'
39
+
40
+ # Offense count: 9
41
+ # This cop supports safe autocorrection (--autocorrect).
42
+ Layout/BlockEndNewline:
43
+ Exclude:
44
+ - 'lib/moxml/adapter/libxml.rb'
45
+ - 'lib/moxml/adapter/ox.rb'
46
+ - 'spec/integration/shared_examples/edge_cases.rb'
47
+ - 'spec/integration/shared_examples/high_level/document_builder_behavior.rb'
48
+ - 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
49
+ - 'spec/moxml/xpath/functions/node_functions_spec.rb'
50
+
51
+ # Offense count: 2
52
+ # This cop supports safe autocorrection (--autocorrect).
53
+ Layout/ClosingParenthesisIndentation:
54
+ Exclude:
55
+ - 'lib/moxml/adapter/oga.rb'
56
+
57
+ # Offense count: 1
58
+ # This cop supports safe autocorrection (--autocorrect).
59
+ Layout/ElseAlignment:
60
+ Exclude:
61
+ - 'lib/moxml/adapter/base.rb'
62
+
9
63
  # Offense count: 4
10
64
  # This cop supports safe autocorrection (--autocorrect).
11
65
  Layout/EmptyLineAfterGuardClause:
@@ -27,13 +81,67 @@ Layout/EmptyLines:
27
81
  Exclude:
28
82
  - 'lib/moxml/adapter/ox.rb'
29
83
 
30
- # Offense count: 330
84
+ # Offense count: 2
85
+ # This cop supports safe autocorrection (--autocorrect).
86
+ Layout/EmptyLinesAroundMethodBody:
87
+ Exclude:
88
+ - 'lib/moxml/adapter/ox.rb'
89
+
90
+ # Offense count: 1
91
+ # This cop supports safe autocorrection (--autocorrect).
92
+ # Configuration parameters: EnforcedStyleAlignWith.
93
+ # SupportedStylesAlignWith: keyword, variable, start_of_line
94
+ Layout/EndAlignment:
95
+ Exclude:
96
+ - 'lib/moxml/adapter/base.rb'
97
+
98
+ # Offense count: 2
99
+ # This cop supports safe autocorrection (--autocorrect).
100
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
101
+ # SupportedStyles: consistent, consistent_relative_to_receiver, special_for_inner_method_call, special_for_inner_method_call_in_parentheses
102
+ Layout/FirstArgumentIndentation:
103
+ Exclude:
104
+ - 'lib/moxml/adapter/oga.rb'
105
+
106
+ # Offense count: 2
107
+ # This cop supports safe autocorrection (--autocorrect).
108
+ # Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
109
+ # SupportedHashRocketStyles: key, separator, table
110
+ # SupportedColonStyles: key, separator, table
111
+ # SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
112
+ Layout/HashAlignment:
113
+ Exclude:
114
+ - 'lib/moxml/builder.rb'
115
+
116
+ # Offense count: 20
117
+ # This cop supports safe autocorrection (--autocorrect).
118
+ # Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
119
+ # SupportedStylesAlignWith: start_of_line, relative_to_receiver
120
+ Layout/IndentationWidth:
121
+ Exclude:
122
+ - 'lib/moxml/adapter/base.rb'
123
+ - 'lib/moxml/adapter/libxml.rb'
124
+ - 'lib/moxml/adapter/ox.rb'
125
+ - 'spec/integration/shared_examples/edge_cases.rb'
126
+ - 'spec/integration/shared_examples/high_level/document_builder_behavior.rb'
127
+ - 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
128
+ - 'spec/moxml/xpath/functions/node_functions_spec.rb'
129
+
130
+ # Offense count: 344
31
131
  # This cop supports safe autocorrection (--autocorrect).
32
132
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
33
133
  # URISchemes: http, https
34
134
  Layout/LineLength:
35
135
  Enabled: false
36
136
 
137
+ # Offense count: 2
138
+ # This cop supports safe autocorrection (--autocorrect).
139
+ # Configuration parameters: EnforcedStyle.
140
+ # SupportedStyles: symmetrical, new_line, same_line
141
+ Layout/MultilineMethodCallBraceLayout:
142
+ Exclude:
143
+ - 'lib/moxml/adapter/oga.rb'
144
+
37
145
  # Offense count: 1
38
146
  # This cop supports safe autocorrection (--autocorrect).
39
147
  # Configuration parameters: EnforcedStyle, IndentationWidth.
@@ -42,6 +150,17 @@ Layout/MultilineOperationIndentation:
42
150
  Exclude:
43
151
  - 'lib/moxml/adapter/ox.rb'
44
152
 
153
+ # Offense count: 10
154
+ # This cop supports safe autocorrection (--autocorrect).
155
+ # Configuration parameters: AllowInHeredoc.
156
+ Layout/TrailingWhitespace:
157
+ Exclude:
158
+ - 'lib/moxml/adapter/base.rb'
159
+ - 'lib/moxml/adapter/libxml.rb'
160
+ - 'lib/moxml/adapter/ox.rb'
161
+ - 'lib/moxml/builder.rb'
162
+ - 'lib/moxml/xpath/compiler.rb'
163
+
45
164
  # Offense count: 7
46
165
  # Configuration parameters: AllowedMethods.
47
166
  # AllowedMethods: enums
@@ -61,11 +180,10 @@ Lint/DuplicateBranch:
61
180
  - 'lib/moxml/document.rb'
62
181
  - 'lib/moxml/entity_registry.rb'
63
182
 
64
- # Offense count: 5
183
+ # Offense count: 4
65
184
  Lint/DuplicateMethods:
66
185
  Exclude:
67
186
  - 'lib/moxml/config.rb'
68
- - 'lib/moxml/element.rb'
69
187
  - 'lib/moxml/node.rb'
70
188
 
71
189
  # Offense count: 4
@@ -91,7 +209,7 @@ Lint/EmptyWhen:
91
209
  # Offense count: 3
92
210
  Lint/HashCompareByIdentity:
93
211
  Exclude:
94
- - 'lib/moxml/native_attachment.rb'
212
+ - 'lib/moxml/native_attachment/native.rb'
95
213
 
96
214
  # Offense count: 1
97
215
  Lint/IneffectiveAccessModifier:
@@ -127,12 +245,12 @@ Metrics/BlockLength:
127
245
  Metrics/BlockNesting:
128
246
  Max: 4
129
247
 
130
- # Offense count: 76
248
+ # Offense count: 75
131
249
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
132
250
  Metrics/CyclomaticComplexity:
133
251
  Enabled: false
134
252
 
135
- # Offense count: 186
253
+ # Offense count: 188
136
254
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
137
255
  Metrics/MethodLength:
138
256
  Max: 110
@@ -180,6 +298,12 @@ Naming/VariableNumber:
180
298
  - 'spec/moxml/allocation_guard_spec.rb'
181
299
  - 'spec/support/allocation_helper.rb'
182
300
 
301
+ # Offense count: 1
302
+ # Configuration parameters: MinSize.
303
+ Performance/CollectionLiteralInLoop:
304
+ Exclude:
305
+ - 'lib/moxml/xpath/compiler.rb'
306
+
183
307
  # Offense count: 5
184
308
  RSpec/BeforeAfterAll:
185
309
  Exclude:
@@ -205,12 +329,12 @@ RSpec/ContextWording:
205
329
  - 'spec/moxml/xpath/parser_spec.rb'
206
330
  - 'spec/performance/benchmark_spec.rb'
207
331
 
208
- # Offense count: 24
332
+ # Offense count: 25
209
333
  # Configuration parameters: IgnoredMetadata.
210
334
  RSpec/DescribeClass:
211
335
  Enabled: false
212
336
 
213
- # Offense count: 295
337
+ # Offense count: 328
214
338
  # Configuration parameters: CountAsOne.
215
339
  RSpec/ExampleLength:
216
340
  Max: 64
@@ -240,13 +364,13 @@ RSpec/LeakyConstantDeclaration:
240
364
  - 'spec/moxml/declaration_preservation_spec.rb'
241
365
  - 'spec/moxml/sax_spec.rb'
242
366
 
243
- # Offense count: 2
367
+ # Offense count: 4
244
368
  # Configuration parameters: .
245
369
  # SupportedStyles: have_received, receive
246
370
  RSpec/MessageSpies:
247
371
  EnforcedStyle: receive
248
372
 
249
- # Offense count: 390
373
+ # Offense count: 414
250
374
  RSpec/MultipleExpectations:
251
375
  Max: 10
252
376
 
@@ -306,6 +430,22 @@ Security/Eval:
306
430
  Exclude:
307
431
  - 'spec/moxml/xpath/ruby/generator_spec.rb'
308
432
 
433
+ # Offense count: 12
434
+ # This cop supports safe autocorrection (--autocorrect).
435
+ # Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
436
+ # SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
437
+ # ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
438
+ # FunctionalMethods: let, let!, subject, watch
439
+ # AllowedMethods: lambda, proc, it
440
+ Style/BlockDelimiters:
441
+ Exclude:
442
+ - 'lib/moxml/adapter/libxml.rb'
443
+ - 'lib/moxml/adapter/ox.rb'
444
+ - 'spec/integration/shared_examples/edge_cases.rb'
445
+ - 'spec/integration/shared_examples/high_level/document_builder_behavior.rb'
446
+ - 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
447
+ - 'spec/moxml/xpath/functions/node_functions_spec.rb'
448
+
309
449
  # Offense count: 1
310
450
  Style/DocumentDynamicEvalDefinition:
311
451
  Exclude:
@@ -329,6 +469,18 @@ Style/MissingRespondToMissing:
329
469
  Exclude:
330
470
  - 'lib/moxml/xpath/ruby/node.rb'
331
471
 
472
+ # Offense count: 1
473
+ # This cop supports safe autocorrection (--autocorrect).
474
+ Style/MultilineIfModifier:
475
+ Exclude:
476
+ - 'lib/moxml/builder.rb'
477
+
478
+ # Offense count: 1
479
+ # This cop supports safe autocorrection (--autocorrect).
480
+ Style/MultilineTernaryOperator:
481
+ Exclude:
482
+ - 'lib/moxml/adapter/base.rb'
483
+
332
484
  # Offense count: 1
333
485
  # This cop supports safe autocorrection (--autocorrect).
334
486
  # Configuration parameters: AllowMethodComparison, ComparisonsThreshold.
@@ -365,7 +517,25 @@ Style/RedundantConstantBase:
365
517
  - 'spec/moxml/adapter/headed_ox_spec.rb'
366
518
 
367
519
  # Offense count: 1
520
+ # This cop supports safe autocorrection (--autocorrect).
521
+ Style/RedundantParentheses:
522
+ Exclude:
523
+ - 'lib/moxml/xpath/compiler.rb'
524
+
525
+ # Offense count: 8
368
526
  # This cop supports unsafe autocorrection (--autocorrect-all).
369
527
  Style/SelectByKind:
370
528
  Exclude:
529
+ - 'spec/integration/shared_examples/edge_cases.rb'
530
+ - 'spec/integration/shared_examples/entity_reference_whitespace.rb'
371
531
  - 'spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb'
532
+ - 'spec/integration/shared_examples/node_wrappers/node_set_behavior.rb'
533
+ - 'spec/moxml/xpath/functions/node_functions_spec.rb'
534
+
535
+ # Offense count: 2
536
+ # This cop supports safe autocorrection (--autocorrect).
537
+ # Configuration parameters: EnforcedStyle, MinSize.
538
+ # SupportedStyles: percent, brackets
539
+ Style/SymbolArray:
540
+ Exclude:
541
+ - 'lib/moxml/xpath/compiler.rb'
@@ -390,6 +390,10 @@ Text nodes contain character data.
390
390
  | `#text`
391
391
  | Alias for #content
392
392
  | ✅ Yes
393
+
394
+ | `#to_s`
395
+ | Returns the text content (same as `#content`)
396
+ | ✅ Yes
393
397
  |===
394
398
 
395
399
  ==== Identity Methods
@@ -35,12 +35,22 @@ module Moxml
35
35
  # not valid UTF-8, fall back to encoding as UTF-8 with
36
36
  # replacement to avoid raising on gsub.
37
37
  dup = xml.dup.force_encoding("UTF-8")
38
- dup.valid_encoding? ? dup : xml.dup.encode("UTF-8", "ASCII-8BIT", invalid: :replace, undef: :replace)
38
+ if dup.valid_encoding?
39
+ dup
40
+ else
41
+ xml.dup.encode("UTF-8",
42
+ "ASCII-8BIT", invalid: :replace, undef: :replace)
43
+ end
39
44
  elsif xml.encoding == Encoding::UTF_8
40
45
  xml
41
46
  else
42
47
  xml.encode("UTF-8")
43
48
  end
49
+ # Fast path: no `&` means no entity references to mark — skip
50
+ # the regex scan and string allocation entirely. The vast
51
+ # majority of XML payloads contain no entity references.
52
+ return str unless str.include?("&")
53
+
44
54
  str.gsub(ENTITY_NAME_RE) do |match|
45
55
  STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
46
56
  end
@@ -8,6 +8,9 @@ module Moxml
8
8
  # This wrapper hides LibXML's strict document ownership model,
9
9
  # allowing nodes to be moved between documents transparently.
10
10
  # Similar pattern to Ox adapter's customized classes.
11
+ #
12
+ # The Libxml adapter owns wrapper type mapping in one place so the
13
+ # wrapper classes do not duplicate node-type knowledge.
11
14
  class Node
12
15
  attr_reader :native
13
16
 
@@ -19,7 +19,12 @@ module Moxml
19
19
  # LibXML's .content already contains escaped text, but it over-escapes
20
20
  # quotes which don't need escaping in text nodes (only in attributes)
21
21
  def to_xml
22
- @native.content.gsub(""", '"')
22
+ content = @native.content
23
+ # Skip the gsub allocation entirely when there's nothing to undo —
24
+ # the common case for parsed text without literal quotes.
25
+ return content unless content.include?(""")
26
+
27
+ content.gsub(""", '"')
23
28
  end
24
29
  end
25
30
  end
@@ -3,8 +3,21 @@
3
3
  module Moxml
4
4
  module Adapter
5
5
  module CustomizedOx
6
- # Ox uses Strings, but a string cannot have a parent reference
7
- class Text < ::Ox::Node; end
6
+ # Ox uses Strings for text content, but a String cannot carry a @parent
7
+ # back-reference. We subclass ::Ox::Node so a Text wrapper can hold one.
8
+ #
9
+ # ::Ox::Node subclasses that are neither ::Ox::Element nor ::Ox::Document
10
+ # are unknown to Ox.dump's standard XML emitter, so they fall through to
11
+ # Ox's generic object-marshalling format. The serializer in
12
+ # Moxml::Adapter::Ox#serialize special-cases this class to emit the value
13
+ # with proper XML escaping. The #to_s override ensures string
14
+ # interpolation (`"#{text}"`) produces the text content rather than the
15
+ # default Object representation.
16
+ class Text < ::Ox::Node
17
+ def to_s
18
+ value.to_s
19
+ end
20
+ end
8
21
  end
9
22
  end
10
23
  end
@@ -68,6 +68,7 @@ module Moxml
68
68
  next if child.is_a?(::REXML::Text) &&
69
69
  child.to_s.strip.empty? &&
70
70
  !(child.next_sibling.nil? && child.previous_sibling.nil?)
71
+
71
72
  write(child, output)
72
73
  end
73
74
  when :eref
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ class Libxml < Base
6
+ # Tracks entity-reference insertions that cannot live in LibXML's native
7
+ # node tree, plus the child sequence needed to serialize them in order.
8
+ class EntityRefRegistry
9
+ ENTITY_REFS_KEY = :_entity_ref_pairs
10
+ CHILD_SEQUENCE_KEY = :_child_seq_pairs
11
+ NON_WHITESPACE_RE = /\S/
12
+ private_constant :ENTITY_REFS_KEY, :CHILD_SEQUENCE_KEY, :NON_WHITESPACE_RE
13
+
14
+ def initialize(attachments, doc)
15
+ @attachments = attachments
16
+ @doc = doc
17
+ end
18
+
19
+ def active?
20
+ @doc ? @attachments.key?(@doc, ENTITY_REFS_KEY) : false
21
+ end
22
+
23
+ def register(element, ref)
24
+ return unless @doc && element
25
+
26
+ path = path_for(element)
27
+
28
+ refs_by_path = @attachments.get(@doc, ENTITY_REFS_KEY) || {}
29
+ (refs_by_path[path] ||= []) << ref
30
+ @attachments.set(@doc, ENTITY_REFS_KEY, refs_by_path)
31
+
32
+ seq_by_path = @attachments.get(@doc, CHILD_SEQUENCE_KEY) || {}
33
+ existing = seq_by_path[path]
34
+ if existing
35
+ existing << :eref
36
+ else
37
+ seq_by_path[path] = Array.new(count_native_children(element), :native)
38
+ seq_by_path[path] << :eref
39
+ @attachments.set(@doc, CHILD_SEQUENCE_KEY, seq_by_path)
40
+ end
41
+ end
42
+
43
+ def append_native(element)
44
+ return unless @doc && element
45
+
46
+ seq_by_path = @attachments.get(@doc, CHILD_SEQUENCE_KEY)
47
+ return unless seq_by_path
48
+
49
+ seq = seq_by_path[path_for(element)]
50
+ return unless seq
51
+
52
+ seq << :native
53
+ end
54
+
55
+ def refs_for(element)
56
+ return nil unless @doc && element
57
+
58
+ refs_by_path = @attachments.get(@doc, ENTITY_REFS_KEY)
59
+ refs_by_path && refs_by_path[path_for(element)]
60
+ end
61
+
62
+ def sequence_for(element)
63
+ return nil unless @doc && element
64
+
65
+ seq_by_path = @attachments.get(@doc, CHILD_SEQUENCE_KEY)
66
+ seq_by_path && seq_by_path[path_for(element)]
67
+ end
68
+
69
+ def serialization_for(element)
70
+ refs = refs_for(element)
71
+ return [nil, nil] unless refs && !refs.empty?
72
+
73
+ seq = sequence_for(element)
74
+ return [nil, nil] unless seq
75
+
76
+ [refs, seq]
77
+ end
78
+
79
+ private
80
+
81
+ def path_for(element)
82
+ element.path
83
+ end
84
+
85
+ def count_native_children(element)
86
+ return 0 unless element.is_a?(::LibXML::XML::Node) && element.children?
87
+
88
+ count = 0
89
+ element.each_child do |child|
90
+ count += 1 unless blank_text_node?(child)
91
+ end
92
+ count
93
+ end
94
+
95
+ def blank_text_node?(child)
96
+ child.text? && blank_content?(child.content)
97
+ end
98
+
99
+ def blank_content?(content)
100
+ content.nil? || !content.match?(NON_WHITESPACE_RE)
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ class Libxml < Base
6
+ # Restores configured character entities into explicit Moxml
7
+ # EntityReference nodes after LibXML has parsed the native tree.
8
+ class EntityRestorer
9
+ def initialize(doc, adapter: Libxml)
10
+ @doc = doc
11
+ @ctx = doc.context
12
+ @registry = @ctx.entity_registry
13
+ @config = @ctx.config
14
+ @adapter = adapter
15
+ end
16
+
17
+ def run
18
+ return unless @registry && @doc.root
19
+
20
+ walk(@doc.root)
21
+ end
22
+
23
+ private
24
+
25
+ def walk(element)
26
+ # Snapshot because we may add/remove siblings during the walk.
27
+ element.children.to_a.each do |child|
28
+ if child.is_a?(::Moxml::Text)
29
+ restore_text_node(child)
30
+ elsif child.is_a?(::Moxml::Element)
31
+ walk(child)
32
+ end
33
+ end
34
+ end
35
+
36
+ # Matches DocumentBuilder's previous behavior, including the libxml
37
+ # limitation that adjacent native text nodes get merged.
38
+ def restore_text_node(text_node)
39
+ content = text_node.content
40
+ return unless content
41
+
42
+ chunks = chunk_text(content)
43
+ return if chunks.size == 1 && chunks.first.first == :text
44
+
45
+ parent = text_node.parent
46
+ return unless parent
47
+
48
+ text_node.remove
49
+ chunks.each { |type, payload| append_chunk(parent, type, payload) }
50
+ end
51
+
52
+ def chunk_text(content)
53
+ chunks = []
54
+ buffer = +""
55
+ restorable = @registry.restorable_codepoints
56
+
57
+ content.each_char do |char|
58
+ cp = char.ord
59
+ if restorable.include?(cp) &&
60
+ (name = @registry.primary_name_for_codepoint(cp)) &&
61
+ @registry.should_restore?(cp, config: @config)
62
+ unless buffer.empty?
63
+ chunks << [:text, buffer.dup]
64
+ buffer.clear
65
+ end
66
+ chunks << [:eref, name]
67
+ else
68
+ buffer << char
69
+ end
70
+ end
71
+
72
+ chunks << [:text, buffer.dup] unless buffer.empty?
73
+ chunks
74
+ end
75
+
76
+ def append_chunk(parent, type, payload)
77
+ case type
78
+ when :text
79
+ parent.add_child(::Moxml::Text.new(@adapter.create_native_text(payload), @ctx))
80
+ when :eref
81
+ parent.add_child(
82
+ ::Moxml::EntityReference.new(
83
+ @adapter.create_native_entity_reference(payload),
84
+ @ctx,
85
+ ),
86
+ )
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end