coradoc 1.1.8 → 2.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of coradoc might be problematic. Click here for more details.

Files changed (225) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +1 -1
  3. data/Rakefile +3 -12
  4. data/exe/coradoc +21 -2
  5. data/lib/coradoc/cli.rb +185 -91
  6. data/lib/coradoc/configurable.rb +527 -0
  7. data/lib/coradoc/coradoc.rb +463 -0
  8. data/lib/coradoc/core_model/annotation_block.rb +57 -0
  9. data/lib/coradoc/core_model/base.rb +172 -0
  10. data/lib/coradoc/core_model/bibliography.rb +41 -0
  11. data/lib/coradoc/core_model/bibliography_entry.rb +48 -0
  12. data/lib/coradoc/core_model/block.rb +63 -0
  13. data/lib/coradoc/core_model/children_content.rb +53 -0
  14. data/lib/coradoc/core_model/comment_block.rb +10 -0
  15. data/lib/coradoc/core_model/definition_item.rb +46 -0
  16. data/lib/coradoc/core_model/definition_list.rb +28 -0
  17. data/lib/coradoc/core_model/element_attribute.rb +26 -0
  18. data/lib/coradoc/core_model/example_block.rb +10 -0
  19. data/lib/coradoc/core_model/footnote.rb +92 -0
  20. data/lib/coradoc/core_model/horizontal_rule_block.rb +10 -0
  21. data/lib/coradoc/core_model/id_generator.rb +16 -0
  22. data/lib/coradoc/core_model/image.rb +66 -0
  23. data/lib/coradoc/core_model/inline_element.rb +140 -0
  24. data/lib/coradoc/core_model/list_block.rb +135 -0
  25. data/lib/coradoc/core_model/list_item.rb +142 -0
  26. data/lib/coradoc/core_model/listing_block.rb +13 -0
  27. data/lib/coradoc/core_model/literal_block.rb +10 -0
  28. data/lib/coradoc/core_model/metadata.rb +79 -0
  29. data/lib/coradoc/core_model/open_block.rb +10 -0
  30. data/lib/coradoc/core_model/paragraph_block.rb +10 -0
  31. data/lib/coradoc/core_model/pass_block.rb +10 -0
  32. data/lib/coradoc/core_model/quote_block.rb +12 -0
  33. data/lib/coradoc/core_model/reviewer_block.rb +10 -0
  34. data/lib/coradoc/core_model/sidebar_block.rb +10 -0
  35. data/lib/coradoc/core_model/source_block.rb +10 -0
  36. data/lib/coradoc/core_model/structural_element.rb +94 -0
  37. data/lib/coradoc/core_model/table.rb +148 -0
  38. data/lib/coradoc/core_model/term.rb +53 -0
  39. data/lib/coradoc/core_model/text_content.rb +22 -0
  40. data/lib/coradoc/core_model/toc.rb +105 -0
  41. data/lib/coradoc/core_model/toc_generator.rb +151 -0
  42. data/lib/coradoc/core_model/verse_block.rb +12 -0
  43. data/lib/coradoc/core_model.rb +77 -0
  44. data/lib/coradoc/document_builder.rb +184 -0
  45. data/lib/coradoc/document_manipulator.rb +203 -0
  46. data/lib/coradoc/errors.rb +312 -0
  47. data/lib/coradoc/format_module.rb +49 -0
  48. data/lib/coradoc/hooks.rb +176 -0
  49. data/lib/coradoc/input.rb +17 -7
  50. data/lib/coradoc/logger.rb +54 -0
  51. data/lib/coradoc/output.rb +17 -6
  52. data/lib/coradoc/performance_regression.rb +109 -0
  53. data/lib/coradoc/processor_registry.rb +50 -0
  54. data/lib/coradoc/query.rb +455 -0
  55. data/lib/coradoc/registry.rb +156 -0
  56. data/lib/coradoc/serializer/registry.rb +150 -0
  57. data/lib/coradoc/transform.rb +11 -0
  58. data/lib/coradoc/validation.rb +646 -0
  59. data/lib/coradoc/version.rb +1 -1
  60. data/lib/coradoc/visitor.rb +283 -0
  61. data/lib/coradoc.rb +40 -19
  62. metadata +67 -277
  63. data/.editorconfig +0 -15
  64. data/.envrc +0 -1
  65. data/.irbrc +0 -1
  66. data/.pryrc.sample +0 -1
  67. data/.rubocop.yml +0 -14
  68. data/.rubocop_todo.yml +0 -179
  69. data/CHANGELOG.md +0 -9
  70. data/CODE_OF_CONDUCT.md +0 -84
  71. data/Dockerfile +0 -19
  72. data/Gemfile +0 -16
  73. data/LICENSE.txt +0 -21
  74. data/Makefile +0 -35
  75. data/README.Docker.adoc +0 -57
  76. data/README.adoc +0 -119
  77. data/coradoc.gemspec +0 -40
  78. data/docker-compose.yml +0 -14
  79. data/exe/reverse_adoc +0 -81
  80. data/exe/w2a +0 -60
  81. data/flake.lock +0 -114
  82. data/flake.nix +0 -135
  83. data/lib/coradoc/converter.rb +0 -144
  84. data/lib/coradoc/document.rb +0 -77
  85. data/lib/coradoc/element/admonition.rb +0 -18
  86. data/lib/coradoc/element/attribute.rb +0 -36
  87. data/lib/coradoc/element/attribute_list.rb +0 -138
  88. data/lib/coradoc/element/audio.rb +0 -33
  89. data/lib/coradoc/element/author.rb +0 -24
  90. data/lib/coradoc/element/base.rb +0 -92
  91. data/lib/coradoc/element/bibliography.rb +0 -24
  92. data/lib/coradoc/element/bibliography_entry.rb +0 -24
  93. data/lib/coradoc/element/block/core.rb +0 -76
  94. data/lib/coradoc/element/block/example.rb +0 -23
  95. data/lib/coradoc/element/block/listing.rb +0 -21
  96. data/lib/coradoc/element/block/literal.rb +0 -21
  97. data/lib/coradoc/element/block/open.rb +0 -22
  98. data/lib/coradoc/element/block/pass.rb +0 -21
  99. data/lib/coradoc/element/block/quote.rb +0 -19
  100. data/lib/coradoc/element/block/reviewer_comment.rb +0 -19
  101. data/lib/coradoc/element/block/side.rb +0 -19
  102. data/lib/coradoc/element/block/sourcecode.rb +0 -21
  103. data/lib/coradoc/element/block.rb +0 -17
  104. data/lib/coradoc/element/break.rb +0 -11
  105. data/lib/coradoc/element/comment_block.rb +0 -22
  106. data/lib/coradoc/element/comment_line.rb +0 -18
  107. data/lib/coradoc/element/document_attributes.rb +0 -33
  108. data/lib/coradoc/element/header.rb +0 -22
  109. data/lib/coradoc/element/image/block_image.rb +0 -32
  110. data/lib/coradoc/element/image/core.rb +0 -58
  111. data/lib/coradoc/element/image/inline_image.rb +0 -12
  112. data/lib/coradoc/element/image.rb +0 -10
  113. data/lib/coradoc/element/include.rb +0 -18
  114. data/lib/coradoc/element/inline/anchor.rb +0 -19
  115. data/lib/coradoc/element/inline/attribute_reference.rb +0 -19
  116. data/lib/coradoc/element/inline/bold.rb +0 -25
  117. data/lib/coradoc/element/inline/cross_reference.rb +0 -46
  118. data/lib/coradoc/element/inline/footnote.rb +0 -24
  119. data/lib/coradoc/element/inline/hard_line_break.rb +0 -11
  120. data/lib/coradoc/element/inline/highlight.rb +0 -25
  121. data/lib/coradoc/element/inline/italic.rb +0 -25
  122. data/lib/coradoc/element/inline/link.rb +0 -42
  123. data/lib/coradoc/element/inline/monospace.rb +0 -25
  124. data/lib/coradoc/element/inline/quotation.rb +0 -20
  125. data/lib/coradoc/element/inline/small.rb +0 -19
  126. data/lib/coradoc/element/inline/span.rb +0 -37
  127. data/lib/coradoc/element/inline/subscript.rb +0 -20
  128. data/lib/coradoc/element/inline/superscript.rb +0 -20
  129. data/lib/coradoc/element/inline/underline.rb +0 -19
  130. data/lib/coradoc/element/inline.rb +0 -23
  131. data/lib/coradoc/element/list/core.rb +0 -51
  132. data/lib/coradoc/element/list/definition.rb +0 -29
  133. data/lib/coradoc/element/list/ordered.rb +0 -17
  134. data/lib/coradoc/element/list/unordered.rb +0 -17
  135. data/lib/coradoc/element/list.rb +0 -13
  136. data/lib/coradoc/element/list_item.rb +0 -98
  137. data/lib/coradoc/element/list_item_definition.rb +0 -32
  138. data/lib/coradoc/element/paragraph.rb +0 -37
  139. data/lib/coradoc/element/revision.rb +0 -27
  140. data/lib/coradoc/element/section.rb +0 -62
  141. data/lib/coradoc/element/table.rb +0 -91
  142. data/lib/coradoc/element/tag.rb +0 -19
  143. data/lib/coradoc/element/term.rb +0 -22
  144. data/lib/coradoc/element/text_element.rb +0 -92
  145. data/lib/coradoc/element/title.rb +0 -62
  146. data/lib/coradoc/element/video.rb +0 -50
  147. data/lib/coradoc/generator.rb +0 -19
  148. data/lib/coradoc/input/adoc.rb +0 -30
  149. data/lib/coradoc/input/docx.rb +0 -64
  150. data/lib/coradoc/input/html/LICENSE.txt +0 -25
  151. data/lib/coradoc/input/html/README.adoc +0 -308
  152. data/lib/coradoc/input/html/cleaner.rb +0 -142
  153. data/lib/coradoc/input/html/config.rb +0 -77
  154. data/lib/coradoc/input/html/converters/a.rb +0 -52
  155. data/lib/coradoc/input/html/converters/aside.rb +0 -16
  156. data/lib/coradoc/input/html/converters/audio.rb +0 -29
  157. data/lib/coradoc/input/html/converters/base.rb +0 -108
  158. data/lib/coradoc/input/html/converters/blockquote.rb +0 -22
  159. data/lib/coradoc/input/html/converters/br.rb +0 -15
  160. data/lib/coradoc/input/html/converters/bypass.rb +0 -81
  161. data/lib/coradoc/input/html/converters/code.rb +0 -23
  162. data/lib/coradoc/input/html/converters/div.rb +0 -19
  163. data/lib/coradoc/input/html/converters/dl.rb +0 -62
  164. data/lib/coradoc/input/html/converters/drop.rb +0 -26
  165. data/lib/coradoc/input/html/converters/em.rb +0 -21
  166. data/lib/coradoc/input/html/converters/figure.rb +0 -25
  167. data/lib/coradoc/input/html/converters/h.rb +0 -42
  168. data/lib/coradoc/input/html/converters/head.rb +0 -23
  169. data/lib/coradoc/input/html/converters/hr.rb +0 -15
  170. data/lib/coradoc/input/html/converters/ignore.rb +0 -20
  171. data/lib/coradoc/input/html/converters/img.rb +0 -110
  172. data/lib/coradoc/input/html/converters/li.rb +0 -17
  173. data/lib/coradoc/input/html/converters/mark.rb +0 -19
  174. data/lib/coradoc/input/html/converters/markup.rb +0 -31
  175. data/lib/coradoc/input/html/converters/math.rb +0 -38
  176. data/lib/coradoc/input/html/converters/ol.rb +0 -65
  177. data/lib/coradoc/input/html/converters/p.rb +0 -23
  178. data/lib/coradoc/input/html/converters/pass_through.rb +0 -17
  179. data/lib/coradoc/input/html/converters/pre.rb +0 -55
  180. data/lib/coradoc/input/html/converters/q.rb +0 -16
  181. data/lib/coradoc/input/html/converters/strong.rb +0 -20
  182. data/lib/coradoc/input/html/converters/sub.rb +0 -22
  183. data/lib/coradoc/input/html/converters/sup.rb +0 -22
  184. data/lib/coradoc/input/html/converters/table.rb +0 -319
  185. data/lib/coradoc/input/html/converters/td.rb +0 -81
  186. data/lib/coradoc/input/html/converters/text.rb +0 -32
  187. data/lib/coradoc/input/html/converters/th.rb +0 -18
  188. data/lib/coradoc/input/html/converters/tr.rb +0 -22
  189. data/lib/coradoc/input/html/converters/video.rb +0 -29
  190. data/lib/coradoc/input/html/converters.rb +0 -59
  191. data/lib/coradoc/input/html/errors.rb +0 -14
  192. data/lib/coradoc/input/html/html_converter.rb +0 -168
  193. data/lib/coradoc/input/html/plugin.rb +0 -131
  194. data/lib/coradoc/input/html/plugins/plateau.rb +0 -213
  195. data/lib/coradoc/input/html/postprocessor.rb +0 -220
  196. data/lib/coradoc/input/html.rb +0 -61
  197. data/lib/coradoc/legacy_parser.rb +0 -200
  198. data/lib/coradoc/oscal.rb +0 -99
  199. data/lib/coradoc/output/adoc.rb +0 -19
  200. data/lib/coradoc/output/coradoc_tree_debug.rb +0 -21
  201. data/lib/coradoc/parser/asciidoc/admonition.rb +0 -24
  202. data/lib/coradoc/parser/asciidoc/attribute_list.rb +0 -89
  203. data/lib/coradoc/parser/asciidoc/base.rb +0 -87
  204. data/lib/coradoc/parser/asciidoc/bibliography.rb +0 -29
  205. data/lib/coradoc/parser/asciidoc/block.rb +0 -94
  206. data/lib/coradoc/parser/asciidoc/citation.rb +0 -30
  207. data/lib/coradoc/parser/asciidoc/content.rb +0 -64
  208. data/lib/coradoc/parser/asciidoc/document_attributes.rb +0 -25
  209. data/lib/coradoc/parser/asciidoc/header.rb +0 -29
  210. data/lib/coradoc/parser/asciidoc/inline.rb +0 -195
  211. data/lib/coradoc/parser/asciidoc/list.rb +0 -115
  212. data/lib/coradoc/parser/asciidoc/paragraph.rb +0 -54
  213. data/lib/coradoc/parser/asciidoc/section.rb +0 -61
  214. data/lib/coradoc/parser/asciidoc/table.rb +0 -32
  215. data/lib/coradoc/parser/asciidoc/term.rb +0 -41
  216. data/lib/coradoc/parser/asciidoc/text.rb +0 -158
  217. data/lib/coradoc/parser/base.rb +0 -40
  218. data/lib/coradoc/parser.rb +0 -11
  219. data/lib/coradoc/reverse_adoc.rb +0 -18
  220. data/lib/coradoc/transformer.rb +0 -476
  221. data/lib/coradoc/util.rb +0 -12
  222. data/lib/reverse_adoc.rb +0 -20
  223. data/utils/inspect_asciidoc.rb +0 -29
  224. data/utils/parser_analyzer.rb +0 -66
  225. data/utils/round_trip.rb +0 -53
@@ -0,0 +1,455 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ # Document querying and introspection API.
5
+ #
6
+ # Provides CSS-like selectors for navigating and querying document trees.
7
+ module Query
8
+ # Selector parsing and matching
9
+ class Selector
10
+ attr_reader :element_type, :id, :classes, :attributes, :pseudo_classes
11
+
12
+ def self.parse(selector)
13
+ new.parse(selector)
14
+ end
15
+
16
+ def initialize
17
+ @element_type = nil
18
+ @id = nil
19
+ @classes = []
20
+ @attributes = {}
21
+ @pseudo_classes = []
22
+ end
23
+
24
+ def parse(selector)
25
+ @original = selector.to_s.strip
26
+ return self if @original.empty?
27
+
28
+ @original.sub!(/\A([a-z_][a-z0-9_-]*)/i) do |match|
29
+ @element_type = match.downcase
30
+ ''
31
+ end
32
+
33
+ @original.sub!(/#([a-z_][a-z0-9_-]*)/i) do
34
+ @id = ::Regexp.last_match(1)
35
+ ''
36
+ end
37
+
38
+ @original.gsub!(/\.([a-z_][a-z0-9_-]*)/i) do
39
+ @classes << ::Regexp.last_match(1)
40
+ ''
41
+ end
42
+
43
+ @original.gsub!(/\[([^\]]+)\]/) do
44
+ attr_expr = ::Regexp.last_match(1)
45
+ parse_attribute(attr_expr)
46
+ ''
47
+ end
48
+
49
+ @original.gsub!(/:([a-z-]+)(?:\(([^)]+)\))?/i) do
50
+ name = ::Regexp.last_match(1).downcase
51
+ arg = ::Regexp.last_match(2)
52
+ @pseudo_classes << { name: name, argument: arg }
53
+ ''
54
+ end
55
+
56
+ self
57
+ end
58
+
59
+ def matches?(element)
60
+ return false unless element
61
+ return false if @element_type && !type_matches?(element)
62
+ return false if @id && element.id != @id
63
+ return false if @classes.any? && !classes_match?(element)
64
+ return false if @attributes.any? && !attributes_match?(element)
65
+
66
+ true
67
+ end
68
+
69
+ def matches_pseudo_classes?(element, siblings:, index:)
70
+ @pseudo_classes.all? do |pseudo|
71
+ case pseudo[:name]
72
+ when 'first-child'
73
+ index.zero?
74
+ when 'last-child'
75
+ index == siblings.length - 1
76
+ when 'nth-child'
77
+ n = pseudo[:argument].to_i
78
+ index == n - 1
79
+ when 'only-child'
80
+ siblings.length == 1
81
+ when 'empty'
82
+ empty_element?(element)
83
+ else
84
+ true
85
+ end
86
+ end
87
+ end
88
+
89
+ def universal?
90
+ @element_type == '*' || @original == '*'
91
+ end
92
+
93
+ private
94
+
95
+ def parse_attribute(expr)
96
+ case expr
97
+ when /(\w+)\s*=\s*["']?([^"']+)["']?/
98
+ @attributes[::Regexp.last_match(1).to_sym] = {
99
+ operator: :equals,
100
+ value: ::Regexp.last_match(2)
101
+ }
102
+ when /(\w+)\s*~=\s*["']?([^"']+)["']?/
103
+ @attributes[::Regexp.last_match(1).to_sym] = {
104
+ operator: :includes,
105
+ value: ::Regexp.last_match(2)
106
+ }
107
+ when /(\w+)\s*\^=\s*["']?([^"']+)["']?/
108
+ @attributes[::Regexp.last_match(1).to_sym] = {
109
+ operator: :starts_with,
110
+ value: ::Regexp.last_match(2)
111
+ }
112
+ when /(\w+)\s*\$=\s*["']?([^"']+)["']?/
113
+ @attributes[::Regexp.last_match(1).to_sym] = {
114
+ operator: :ends_with,
115
+ value: ::Regexp.last_match(2)
116
+ }
117
+ when /(\w+)\s*\*=\s*["']?([^"']+)["']?/
118
+ @attributes[::Regexp.last_match(1).to_sym] = {
119
+ operator: :contains,
120
+ value: ::Regexp.last_match(2)
121
+ }
122
+ when /(\w+)/
123
+ @attributes[::Regexp.last_match(1).to_sym] = { operator: :present }
124
+ end
125
+ end
126
+
127
+ def type_matches?(element)
128
+ return true if @element_type == '*'
129
+
130
+ return element.element_type.to_s.downcase == @element_type.downcase if (element.is_a?(CoreModel::StructuralElement) || element.is_a?(CoreModel::Block)) && element.element_type
131
+
132
+ class_name = class_to_query_name(element.class)
133
+ class_name == @element_type
134
+ end
135
+
136
+ def class_to_query_name(klass)
137
+ klass.name
138
+ .to_s
139
+ .split('::')
140
+ .last
141
+ .gsub(/([A-Z])/) { "_#{::Regexp.last_match(1).downcase}" }
142
+ .sub(/^_/, '')
143
+ .downcase
144
+ end
145
+
146
+ def classes_match?(element)
147
+ element_classes = if element.is_a?(CoreModel::StructuralElement) && element.element_type
148
+ [element.element_type]
149
+ elsif element.is_a?(CoreModel::Base)
150
+ []
151
+ else
152
+ extract_role(element)
153
+ end
154
+
155
+ @classes.all? { |c| element_classes.include?(c.downcase) }
156
+ end
157
+
158
+ def extract_role(element)
159
+ role = element.public_send(:role)
160
+ role ? role.to_s.split.map(&:downcase) : []
161
+ rescue NoMethodError
162
+ []
163
+ end
164
+
165
+ def attributes_match?(element)
166
+ @attributes.all? do |attr_name, condition|
167
+ value = get_attribute_value(element, attr_name)
168
+ match_attribute_condition(value, condition)
169
+ end
170
+ end
171
+
172
+ def get_attribute_value(element, attr_name)
173
+ case attr_name
174
+ when :id, :title
175
+ element.public_send(attr_name)
176
+ when :level
177
+ if element.is_a?(CoreModel::StructuralElement)
178
+ element.level
179
+ else
180
+ element.public_send(:level)
181
+ end
182
+ when :element_type
183
+ element.element_type if element.is_a?(CoreModel::StructuralElement) || element.is_a?(CoreModel::Block)
184
+ when :type
185
+ element.type if element.is_a?(CoreModel::AnnotationBlock) || element.is_a?(CoreModel::InlineElement)
186
+ else
187
+ element.public_send(attr_name) if element.is_a?(CoreModel::Base) && element.class.attributes.key?(attr_name)
188
+ end
189
+ rescue NoMethodError
190
+ nil
191
+ end
192
+
193
+ def match_attribute_condition(value, condition)
194
+ case condition[:operator]
195
+ when :present
196
+ !value.nil?
197
+ when :equals
198
+ value.to_s == condition[:value]
199
+ when :includes
200
+ value.to_s.split.map(&:downcase).include?(condition[:value].downcase)
201
+ when :starts_with
202
+ value.to_s.start_with?(condition[:value])
203
+ when :ends_with
204
+ value.to_s.end_with?(condition[:value])
205
+ when :contains
206
+ value.to_s.include?(condition[:value])
207
+ else
208
+ false
209
+ end
210
+ end
211
+
212
+ def empty_element?(element)
213
+ return true unless element.is_a?(CoreModel::Block) || element.is_a?(CoreModel::StructuralElement)
214
+
215
+ content = element.content
216
+ case content
217
+ when String
218
+ content.strip.empty?
219
+ when Array
220
+ content.empty?
221
+ else
222
+ content.nil?
223
+ end
224
+ end
225
+ end
226
+
227
+ # Query result set - collection of matched elements
228
+ class ResultSet
229
+ include Enumerable
230
+
231
+ attr_reader :elements
232
+
233
+ def initialize(elements = [])
234
+ @elements = Array(elements).compact
235
+ end
236
+
237
+ def each(&block)
238
+ @elements.each(&block)
239
+ end
240
+
241
+ def [](index)
242
+ @elements[index]
243
+ end
244
+
245
+ def length
246
+ @elements.length
247
+ end
248
+ alias size length
249
+
250
+ def empty?
251
+ @elements.empty?
252
+ end
253
+
254
+ def first
255
+ @elements.first
256
+ end
257
+
258
+ def last
259
+ @elements.last
260
+ end
261
+
262
+ def filter(selector)
263
+ parsed = Selector.parse(selector)
264
+ filtered = @elements.select { |e| parsed.matches?(e) }
265
+ ResultSet.new(filtered)
266
+ end
267
+
268
+ def query(selector)
269
+ results = @elements.flat_map do |element|
270
+ Query.query_within(element, selector).to_a
271
+ end
272
+ ResultSet.new(results.uniq)
273
+ end
274
+
275
+ def map(&block)
276
+ ResultSet.new(@elements.map(&block))
277
+ end
278
+
279
+ def select(&block)
280
+ ResultSet.new(@elements.select(&block))
281
+ end
282
+
283
+ def reject(&block)
284
+ ResultSet.new(@elements.reject(&block))
285
+ end
286
+
287
+ def to_a
288
+ @elements.dup
289
+ end
290
+
291
+ def inspect
292
+ "#<Coradoc::Query::ResultSet count=#{length}>"
293
+ end
294
+ end
295
+
296
+ # Query engine for executing selectors
297
+ class Engine
298
+ def self.query(document, selector)
299
+ new.query(document, selector)
300
+ end
301
+
302
+ def query(document, selector)
303
+ return ResultSet.new if document.nil? || selector.to_s.strip.empty?
304
+
305
+ return query_multiple(document, selector.split(',').map(&:strip)) if selector.include?(',')
306
+
307
+ return query_with_combinators(document, selector) if selector.include?('>') || selector.include?(' ')
308
+
309
+ parsed = Selector.parse(selector)
310
+ results = []
311
+
312
+ traverse(document) do |element, siblings, index|
313
+ if parsed.matches?(element)
314
+ next if parsed.pseudo_classes.any? && !parsed.matches_pseudo_classes?(element, siblings: siblings,
315
+ index: index)
316
+
317
+ results << element
318
+ end
319
+ end
320
+
321
+ ResultSet.new(results)
322
+ end
323
+
324
+ private
325
+
326
+ def query_multiple(document, selectors)
327
+ results = selectors.flat_map do |sel|
328
+ query(document, sel).to_a
329
+ end
330
+ ResultSet.new(results.uniq)
331
+ end
332
+
333
+ def query_with_combinators(document, selector)
334
+ parts = parse_combinator_selector(selector)
335
+ results = []
336
+
337
+ first_results = query(document, parts[:first])
338
+ return ResultSet.new if first_results.empty?
339
+
340
+ first_results.each do |parent|
341
+ find_matching_descendants(parent, parts[:rest]).each do |match|
342
+ results << match
343
+ end
344
+ end
345
+
346
+ ResultSet.new(results.uniq)
347
+ end
348
+
349
+ def parse_combinator_selector(selector)
350
+ if selector.include?(' > ')
351
+ parts = selector.split(' > ', 2)
352
+ { first: parts[0], rest: [{ combinator: :child, selector: parts[1] }] }
353
+ elsif selector.include?(' ')
354
+ parts = selector.split(' ', 2)
355
+ { first: parts[0], rest: [{ combinator: :descendant, selector: parts[1] }] }
356
+ else
357
+ { first: selector, rest: [] }
358
+ end
359
+ end
360
+
361
+ def find_matching_descendants(parent, parts)
362
+ return [parent] if parts.empty?
363
+
364
+ part = parts.first
365
+ remaining = parts[1..]
366
+
367
+ parsed = Selector.parse(part[:selector])
368
+ results = []
369
+
370
+ siblings = get_children(parent)
371
+ siblings.each_with_index do |child, index|
372
+ case part[:combinator]
373
+ when :child
374
+ results.concat(find_matching_descendants(child, remaining)) if parsed.matches?(child) && pseudo_matches?(
375
+ parsed, child, siblings, index
376
+ )
377
+ when :descendant
378
+ results.concat(find_matching_descendants(child, remaining)) if parsed.matches?(child) && pseudo_matches?(
379
+ parsed, child, siblings, index
380
+ )
381
+ results.concat(find_matching_descendants(child, parts))
382
+ end
383
+ end
384
+
385
+ results
386
+ end
387
+
388
+ def pseudo_matches?(parsed, element, siblings, index)
389
+ return true if parsed.pseudo_classes.empty?
390
+
391
+ parsed.matches_pseudo_classes?(element, siblings: siblings, index: index)
392
+ end
393
+
394
+ def traverse(element, siblings: [], index: 0, &block)
395
+ return unless element
396
+
397
+ yield(element, siblings, index)
398
+
399
+ children = get_children(element)
400
+ children.each_with_index do |child, i|
401
+ traverse(child, siblings: children, index: i, &block)
402
+ end
403
+ end
404
+
405
+ def get_children(element)
406
+ Query.get_children(element)
407
+ end
408
+ end
409
+
410
+ # Module-level query methods
411
+ class << self
412
+ def query(document, selector)
413
+ Engine.query(document, selector)
414
+ end
415
+
416
+ def query_within(element, selector)
417
+ parsed = Selector.parse(selector)
418
+ results = []
419
+
420
+ traverse_children(element) do |child, siblings, index|
421
+ if parsed.matches?(child)
422
+ next if parsed.pseudo_classes.any? && !parsed.matches_pseudo_classes?(child, siblings: siblings,
423
+ index: index)
424
+
425
+ results << child
426
+ end
427
+ end
428
+
429
+ ResultSet.new(results)
430
+ end
431
+
432
+ def get_children(element)
433
+ return [] unless element
434
+
435
+ if element.is_a?(CoreModel::StructuralElement) && element.children&.any?
436
+ element.children
437
+ elsif element.is_a?(CoreModel::Block) && element.content
438
+ Array(element.content).select { |c| c.is_a?(CoreModel::Base) }
439
+ else
440
+ []
441
+ end
442
+ end
443
+
444
+ private
445
+
446
+ def traverse_children(element, siblings: [], index: 0, &block)
447
+ children = get_children(element)
448
+ children.each_with_index do |child, i|
449
+ yield(child, children, i)
450
+ traverse_children(child, &block)
451
+ end
452
+ end
453
+ end
454
+ end
455
+ end
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ # General-purpose named-item registry.
5
+ #
6
+ # Used by the format registry (Coradoc.registry), Input processors,
7
+ # and Output processors. Each instance stores items keyed by symbol
8
+ # name, with optional per-item options.
9
+ #
10
+ # @example Format registry
11
+ # registry = Registry.new
12
+ # registry.register(:html, Coradoc::Html)
13
+ # registry.get(:html) # => Coradoc::Html
14
+ #
15
+ # @example Processor registry (self-identifying items)
16
+ # registry = Registry.new(error_label: "input processor")
17
+ # registry.define(MyProcessor) # uses MyProcessor.processor_id
18
+ # registry.for_file("doc.html") # checks processor_match? on each item
19
+ #
20
+ class Registry
21
+ attr_reader :error_label
22
+
23
+ # @param error_label [String, nil] label for error messages in #process
24
+ def initialize(error_label: nil)
25
+ @items = {}
26
+ @options = {}
27
+ @error_label = error_label
28
+ end
29
+
30
+ # Register an item by explicit name
31
+ #
32
+ # @param name [Symbol] the item name
33
+ # @param item [Object] the item to register
34
+ # @param opts [Hash] optional per-item configuration
35
+ # @raise [ArgumentError] if name is not a Symbol
36
+ def register(name, item, opts = {})
37
+ raise ArgumentError, "Name must be a Symbol, got #{name.class}" unless name.is_a?(Symbol)
38
+
39
+ @items[name] = item
40
+ @options[name] = opts
41
+ end
42
+
43
+ # Register a self-identifying item (extracts name via processor_id)
44
+ #
45
+ # @param item [Object] item that responds to #processor_id
46
+ # @param options [Hash] optional per-item configuration
47
+ # @return [void]
48
+ def define(item, **opts)
49
+ register(item.processor_id, item, opts)
50
+ end
51
+
52
+ # Get a registered item by name
53
+ #
54
+ # @param name [Symbol, String] the item name (strings are coerced to symbols)
55
+ # @return [Object, nil]
56
+ def get(name)
57
+ @items[name.to_sym]
58
+ end
59
+ alias [] get
60
+
61
+ # Get options for a registered item
62
+ #
63
+ # @param name [Symbol]
64
+ # @return [Hash, nil]
65
+ def options_for(name)
66
+ @options[name]
67
+ end
68
+
69
+ # Check if an item is registered
70
+ #
71
+ # @param name [Symbol]
72
+ # @return [Boolean]
73
+ def registered?(name)
74
+ @items.key?(name)
75
+ end
76
+
77
+ # List all registered item names
78
+ #
79
+ # @return [Array<Symbol>]
80
+ def list
81
+ @items.keys
82
+ end
83
+
84
+ # Direct access to the items hash (for backward compatibility)
85
+ # @return [Hash<Symbol, Object>]
86
+ attr_reader :items
87
+
88
+ # Number of registered items
89
+ #
90
+ # @return [Integer]
91
+ def size
92
+ @items.size
93
+ end
94
+
95
+ # Remove all registered items
96
+ def clear
97
+ @items.clear
98
+ @options.clear
99
+ end
100
+
101
+ # Iterate over all items
102
+ #
103
+ # @yield [Symbol, Object] name and item
104
+ # @return [Enumerator]
105
+ def each(&block)
106
+ @items.each(&block)
107
+ end
108
+
109
+ # Iterate over item values
110
+ #
111
+ # @yield [Object]
112
+ # @return [Enumerator]
113
+ def each_value(&block)
114
+ @items.each_value(&block)
115
+ end
116
+
117
+ # Iterate over item names
118
+ #
119
+ # @yield [Symbol]
120
+ # @return [Enumerator]
121
+ def each_key(&block)
122
+ @items.each_key(&block)
123
+ end
124
+
125
+ # Find an item whose processor_match? returns true for the given filename
126
+ #
127
+ # @param filename [String]
128
+ # @return [Object, nil]
129
+ def for_file(filename)
130
+ @items.values.find do |item|
131
+ item.processor_match?(filename)
132
+ rescue NoMethodError
133
+ false
134
+ end
135
+ end
136
+
137
+ # Resolve and execute: find item by format or filename, call processor_execute
138
+ #
139
+ # @param content [Object] content to process
140
+ # @param options [Hash] :format or :filename for resolution
141
+ # @return [Object] result of processor_execute
142
+ # @raise [ArgumentError] if no matching item found
143
+ def process(content, options = {})
144
+ item = if options[:format]
145
+ get(options[:format])
146
+ elsif options[:filename]
147
+ for_file(options[:filename])
148
+ end
149
+
150
+ label = @error_label || 'processor'
151
+ raise ArgumentError, "No #{label} found for: #{options}" unless item
152
+
153
+ item.processor_execute(content, options)
154
+ end
155
+ end
156
+ end