coradoc 1.1.8 → 2.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of coradoc might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.rspec +1 -1
- data/Rakefile +3 -12
- data/exe/coradoc +21 -2
- data/lib/coradoc/cli.rb +185 -91
- data/lib/coradoc/configurable.rb +527 -0
- data/lib/coradoc/coradoc.rb +463 -0
- data/lib/coradoc/core_model/annotation_block.rb +57 -0
- data/lib/coradoc/core_model/base.rb +172 -0
- data/lib/coradoc/core_model/bibliography.rb +41 -0
- data/lib/coradoc/core_model/bibliography_entry.rb +48 -0
- data/lib/coradoc/core_model/block.rb +63 -0
- data/lib/coradoc/core_model/children_content.rb +53 -0
- data/lib/coradoc/core_model/comment_block.rb +10 -0
- data/lib/coradoc/core_model/definition_item.rb +46 -0
- data/lib/coradoc/core_model/definition_list.rb +28 -0
- data/lib/coradoc/core_model/element_attribute.rb +26 -0
- data/lib/coradoc/core_model/example_block.rb +10 -0
- data/lib/coradoc/core_model/footnote.rb +92 -0
- data/lib/coradoc/core_model/horizontal_rule_block.rb +10 -0
- data/lib/coradoc/core_model/id_generator.rb +16 -0
- data/lib/coradoc/core_model/image.rb +66 -0
- data/lib/coradoc/core_model/inline_element.rb +140 -0
- data/lib/coradoc/core_model/list_block.rb +135 -0
- data/lib/coradoc/core_model/list_item.rb +142 -0
- data/lib/coradoc/core_model/listing_block.rb +13 -0
- data/lib/coradoc/core_model/literal_block.rb +10 -0
- data/lib/coradoc/core_model/metadata.rb +79 -0
- data/lib/coradoc/core_model/open_block.rb +10 -0
- data/lib/coradoc/core_model/paragraph_block.rb +10 -0
- data/lib/coradoc/core_model/pass_block.rb +10 -0
- data/lib/coradoc/core_model/quote_block.rb +12 -0
- data/lib/coradoc/core_model/reviewer_block.rb +10 -0
- data/lib/coradoc/core_model/sidebar_block.rb +10 -0
- data/lib/coradoc/core_model/source_block.rb +10 -0
- data/lib/coradoc/core_model/structural_element.rb +94 -0
- data/lib/coradoc/core_model/table.rb +148 -0
- data/lib/coradoc/core_model/term.rb +53 -0
- data/lib/coradoc/core_model/text_content.rb +22 -0
- data/lib/coradoc/core_model/toc.rb +105 -0
- data/lib/coradoc/core_model/toc_generator.rb +151 -0
- data/lib/coradoc/core_model/verse_block.rb +12 -0
- data/lib/coradoc/core_model.rb +77 -0
- data/lib/coradoc/document_builder.rb +184 -0
- data/lib/coradoc/document_manipulator.rb +203 -0
- data/lib/coradoc/errors.rb +312 -0
- data/lib/coradoc/format_module.rb +49 -0
- data/lib/coradoc/hooks.rb +176 -0
- data/lib/coradoc/input.rb +17 -7
- data/lib/coradoc/logger.rb +54 -0
- data/lib/coradoc/output.rb +17 -6
- data/lib/coradoc/performance_regression.rb +109 -0
- data/lib/coradoc/processor_registry.rb +50 -0
- data/lib/coradoc/query.rb +455 -0
- data/lib/coradoc/registry.rb +156 -0
- data/lib/coradoc/serializer/registry.rb +150 -0
- data/lib/coradoc/transform.rb +11 -0
- data/lib/coradoc/validation.rb +646 -0
- data/lib/coradoc/version.rb +1 -1
- data/lib/coradoc/visitor.rb +283 -0
- data/lib/coradoc.rb +40 -19
- metadata +67 -277
- data/.editorconfig +0 -15
- data/.envrc +0 -1
- data/.irbrc +0 -1
- data/.pryrc.sample +0 -1
- data/.rubocop.yml +0 -14
- data/.rubocop_todo.yml +0 -179
- data/CHANGELOG.md +0 -9
- data/CODE_OF_CONDUCT.md +0 -84
- data/Dockerfile +0 -19
- data/Gemfile +0 -16
- data/LICENSE.txt +0 -21
- data/Makefile +0 -35
- data/README.Docker.adoc +0 -57
- data/README.adoc +0 -119
- data/coradoc.gemspec +0 -40
- data/docker-compose.yml +0 -14
- data/exe/reverse_adoc +0 -81
- data/exe/w2a +0 -60
- data/flake.lock +0 -114
- data/flake.nix +0 -135
- data/lib/coradoc/converter.rb +0 -144
- data/lib/coradoc/document.rb +0 -77
- data/lib/coradoc/element/admonition.rb +0 -18
- data/lib/coradoc/element/attribute.rb +0 -36
- data/lib/coradoc/element/attribute_list.rb +0 -138
- data/lib/coradoc/element/audio.rb +0 -33
- data/lib/coradoc/element/author.rb +0 -24
- data/lib/coradoc/element/base.rb +0 -92
- data/lib/coradoc/element/bibliography.rb +0 -24
- data/lib/coradoc/element/bibliography_entry.rb +0 -24
- data/lib/coradoc/element/block/core.rb +0 -76
- data/lib/coradoc/element/block/example.rb +0 -23
- data/lib/coradoc/element/block/listing.rb +0 -21
- data/lib/coradoc/element/block/literal.rb +0 -21
- data/lib/coradoc/element/block/open.rb +0 -22
- data/lib/coradoc/element/block/pass.rb +0 -21
- data/lib/coradoc/element/block/quote.rb +0 -19
- data/lib/coradoc/element/block/reviewer_comment.rb +0 -19
- data/lib/coradoc/element/block/side.rb +0 -19
- data/lib/coradoc/element/block/sourcecode.rb +0 -21
- data/lib/coradoc/element/block.rb +0 -17
- data/lib/coradoc/element/break.rb +0 -11
- data/lib/coradoc/element/comment_block.rb +0 -22
- data/lib/coradoc/element/comment_line.rb +0 -18
- data/lib/coradoc/element/document_attributes.rb +0 -33
- data/lib/coradoc/element/header.rb +0 -22
- data/lib/coradoc/element/image/block_image.rb +0 -32
- data/lib/coradoc/element/image/core.rb +0 -58
- data/lib/coradoc/element/image/inline_image.rb +0 -12
- data/lib/coradoc/element/image.rb +0 -10
- data/lib/coradoc/element/include.rb +0 -18
- data/lib/coradoc/element/inline/anchor.rb +0 -19
- data/lib/coradoc/element/inline/attribute_reference.rb +0 -19
- data/lib/coradoc/element/inline/bold.rb +0 -25
- data/lib/coradoc/element/inline/cross_reference.rb +0 -46
- data/lib/coradoc/element/inline/footnote.rb +0 -24
- data/lib/coradoc/element/inline/hard_line_break.rb +0 -11
- data/lib/coradoc/element/inline/highlight.rb +0 -25
- data/lib/coradoc/element/inline/italic.rb +0 -25
- data/lib/coradoc/element/inline/link.rb +0 -42
- data/lib/coradoc/element/inline/monospace.rb +0 -25
- data/lib/coradoc/element/inline/quotation.rb +0 -20
- data/lib/coradoc/element/inline/small.rb +0 -19
- data/lib/coradoc/element/inline/span.rb +0 -37
- data/lib/coradoc/element/inline/subscript.rb +0 -20
- data/lib/coradoc/element/inline/superscript.rb +0 -20
- data/lib/coradoc/element/inline/underline.rb +0 -19
- data/lib/coradoc/element/inline.rb +0 -23
- data/lib/coradoc/element/list/core.rb +0 -51
- data/lib/coradoc/element/list/definition.rb +0 -29
- data/lib/coradoc/element/list/ordered.rb +0 -17
- data/lib/coradoc/element/list/unordered.rb +0 -17
- data/lib/coradoc/element/list.rb +0 -13
- data/lib/coradoc/element/list_item.rb +0 -98
- data/lib/coradoc/element/list_item_definition.rb +0 -32
- data/lib/coradoc/element/paragraph.rb +0 -37
- data/lib/coradoc/element/revision.rb +0 -27
- data/lib/coradoc/element/section.rb +0 -62
- data/lib/coradoc/element/table.rb +0 -91
- data/lib/coradoc/element/tag.rb +0 -19
- data/lib/coradoc/element/term.rb +0 -22
- data/lib/coradoc/element/text_element.rb +0 -92
- data/lib/coradoc/element/title.rb +0 -62
- data/lib/coradoc/element/video.rb +0 -50
- data/lib/coradoc/generator.rb +0 -19
- data/lib/coradoc/input/adoc.rb +0 -30
- data/lib/coradoc/input/docx.rb +0 -64
- data/lib/coradoc/input/html/LICENSE.txt +0 -25
- data/lib/coradoc/input/html/README.adoc +0 -308
- data/lib/coradoc/input/html/cleaner.rb +0 -142
- data/lib/coradoc/input/html/config.rb +0 -77
- data/lib/coradoc/input/html/converters/a.rb +0 -52
- data/lib/coradoc/input/html/converters/aside.rb +0 -16
- data/lib/coradoc/input/html/converters/audio.rb +0 -29
- data/lib/coradoc/input/html/converters/base.rb +0 -108
- data/lib/coradoc/input/html/converters/blockquote.rb +0 -22
- data/lib/coradoc/input/html/converters/br.rb +0 -15
- data/lib/coradoc/input/html/converters/bypass.rb +0 -81
- data/lib/coradoc/input/html/converters/code.rb +0 -23
- data/lib/coradoc/input/html/converters/div.rb +0 -19
- data/lib/coradoc/input/html/converters/dl.rb +0 -62
- data/lib/coradoc/input/html/converters/drop.rb +0 -26
- data/lib/coradoc/input/html/converters/em.rb +0 -21
- data/lib/coradoc/input/html/converters/figure.rb +0 -25
- data/lib/coradoc/input/html/converters/h.rb +0 -42
- data/lib/coradoc/input/html/converters/head.rb +0 -23
- data/lib/coradoc/input/html/converters/hr.rb +0 -15
- data/lib/coradoc/input/html/converters/ignore.rb +0 -20
- data/lib/coradoc/input/html/converters/img.rb +0 -110
- data/lib/coradoc/input/html/converters/li.rb +0 -17
- data/lib/coradoc/input/html/converters/mark.rb +0 -19
- data/lib/coradoc/input/html/converters/markup.rb +0 -31
- data/lib/coradoc/input/html/converters/math.rb +0 -38
- data/lib/coradoc/input/html/converters/ol.rb +0 -65
- data/lib/coradoc/input/html/converters/p.rb +0 -23
- data/lib/coradoc/input/html/converters/pass_through.rb +0 -17
- data/lib/coradoc/input/html/converters/pre.rb +0 -55
- data/lib/coradoc/input/html/converters/q.rb +0 -16
- data/lib/coradoc/input/html/converters/strong.rb +0 -20
- data/lib/coradoc/input/html/converters/sub.rb +0 -22
- data/lib/coradoc/input/html/converters/sup.rb +0 -22
- data/lib/coradoc/input/html/converters/table.rb +0 -319
- data/lib/coradoc/input/html/converters/td.rb +0 -81
- data/lib/coradoc/input/html/converters/text.rb +0 -32
- data/lib/coradoc/input/html/converters/th.rb +0 -18
- data/lib/coradoc/input/html/converters/tr.rb +0 -22
- data/lib/coradoc/input/html/converters/video.rb +0 -29
- data/lib/coradoc/input/html/converters.rb +0 -59
- data/lib/coradoc/input/html/errors.rb +0 -14
- data/lib/coradoc/input/html/html_converter.rb +0 -168
- data/lib/coradoc/input/html/plugin.rb +0 -131
- data/lib/coradoc/input/html/plugins/plateau.rb +0 -213
- data/lib/coradoc/input/html/postprocessor.rb +0 -220
- data/lib/coradoc/input/html.rb +0 -61
- data/lib/coradoc/legacy_parser.rb +0 -200
- data/lib/coradoc/oscal.rb +0 -99
- data/lib/coradoc/output/adoc.rb +0 -19
- data/lib/coradoc/output/coradoc_tree_debug.rb +0 -21
- data/lib/coradoc/parser/asciidoc/admonition.rb +0 -24
- data/lib/coradoc/parser/asciidoc/attribute_list.rb +0 -89
- data/lib/coradoc/parser/asciidoc/base.rb +0 -87
- data/lib/coradoc/parser/asciidoc/bibliography.rb +0 -29
- data/lib/coradoc/parser/asciidoc/block.rb +0 -94
- data/lib/coradoc/parser/asciidoc/citation.rb +0 -30
- data/lib/coradoc/parser/asciidoc/content.rb +0 -64
- data/lib/coradoc/parser/asciidoc/document_attributes.rb +0 -25
- data/lib/coradoc/parser/asciidoc/header.rb +0 -29
- data/lib/coradoc/parser/asciidoc/inline.rb +0 -195
- data/lib/coradoc/parser/asciidoc/list.rb +0 -115
- data/lib/coradoc/parser/asciidoc/paragraph.rb +0 -54
- data/lib/coradoc/parser/asciidoc/section.rb +0 -61
- data/lib/coradoc/parser/asciidoc/table.rb +0 -32
- data/lib/coradoc/parser/asciidoc/term.rb +0 -41
- data/lib/coradoc/parser/asciidoc/text.rb +0 -158
- data/lib/coradoc/parser/base.rb +0 -40
- data/lib/coradoc/parser.rb +0 -11
- data/lib/coradoc/reverse_adoc.rb +0 -18
- data/lib/coradoc/transformer.rb +0 -476
- data/lib/coradoc/util.rb +0 -12
- data/lib/reverse_adoc.rb +0 -20
- data/utils/inspect_asciidoc.rb +0 -29
- data/utils/parser_analyzer.rb +0 -66
- data/utils/round_trip.rb +0 -53
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
# Document querying and introspection API.
|
|
5
|
+
#
|
|
6
|
+
# Provides CSS-like selectors for navigating and querying document trees.
|
|
7
|
+
module Query
|
|
8
|
+
# Selector parsing and matching
|
|
9
|
+
class Selector
|
|
10
|
+
attr_reader :element_type, :id, :classes, :attributes, :pseudo_classes
|
|
11
|
+
|
|
12
|
+
def self.parse(selector)
|
|
13
|
+
new.parse(selector)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def initialize
|
|
17
|
+
@element_type = nil
|
|
18
|
+
@id = nil
|
|
19
|
+
@classes = []
|
|
20
|
+
@attributes = {}
|
|
21
|
+
@pseudo_classes = []
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def parse(selector)
|
|
25
|
+
@original = selector.to_s.strip
|
|
26
|
+
return self if @original.empty?
|
|
27
|
+
|
|
28
|
+
@original.sub!(/\A([a-z_][a-z0-9_-]*)/i) do |match|
|
|
29
|
+
@element_type = match.downcase
|
|
30
|
+
''
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
@original.sub!(/#([a-z_][a-z0-9_-]*)/i) do
|
|
34
|
+
@id = ::Regexp.last_match(1)
|
|
35
|
+
''
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
@original.gsub!(/\.([a-z_][a-z0-9_-]*)/i) do
|
|
39
|
+
@classes << ::Regexp.last_match(1)
|
|
40
|
+
''
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
@original.gsub!(/\[([^\]]+)\]/) do
|
|
44
|
+
attr_expr = ::Regexp.last_match(1)
|
|
45
|
+
parse_attribute(attr_expr)
|
|
46
|
+
''
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
@original.gsub!(/:([a-z-]+)(?:\(([^)]+)\))?/i) do
|
|
50
|
+
name = ::Regexp.last_match(1).downcase
|
|
51
|
+
arg = ::Regexp.last_match(2)
|
|
52
|
+
@pseudo_classes << { name: name, argument: arg }
|
|
53
|
+
''
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
self
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def matches?(element)
|
|
60
|
+
return false unless element
|
|
61
|
+
return false if @element_type && !type_matches?(element)
|
|
62
|
+
return false if @id && element.id != @id
|
|
63
|
+
return false if @classes.any? && !classes_match?(element)
|
|
64
|
+
return false if @attributes.any? && !attributes_match?(element)
|
|
65
|
+
|
|
66
|
+
true
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def matches_pseudo_classes?(element, siblings:, index:)
|
|
70
|
+
@pseudo_classes.all? do |pseudo|
|
|
71
|
+
case pseudo[:name]
|
|
72
|
+
when 'first-child'
|
|
73
|
+
index.zero?
|
|
74
|
+
when 'last-child'
|
|
75
|
+
index == siblings.length - 1
|
|
76
|
+
when 'nth-child'
|
|
77
|
+
n = pseudo[:argument].to_i
|
|
78
|
+
index == n - 1
|
|
79
|
+
when 'only-child'
|
|
80
|
+
siblings.length == 1
|
|
81
|
+
when 'empty'
|
|
82
|
+
empty_element?(element)
|
|
83
|
+
else
|
|
84
|
+
true
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def universal?
|
|
90
|
+
@element_type == '*' || @original == '*'
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
def parse_attribute(expr)
|
|
96
|
+
case expr
|
|
97
|
+
when /(\w+)\s*=\s*["']?([^"']+)["']?/
|
|
98
|
+
@attributes[::Regexp.last_match(1).to_sym] = {
|
|
99
|
+
operator: :equals,
|
|
100
|
+
value: ::Regexp.last_match(2)
|
|
101
|
+
}
|
|
102
|
+
when /(\w+)\s*~=\s*["']?([^"']+)["']?/
|
|
103
|
+
@attributes[::Regexp.last_match(1).to_sym] = {
|
|
104
|
+
operator: :includes,
|
|
105
|
+
value: ::Regexp.last_match(2)
|
|
106
|
+
}
|
|
107
|
+
when /(\w+)\s*\^=\s*["']?([^"']+)["']?/
|
|
108
|
+
@attributes[::Regexp.last_match(1).to_sym] = {
|
|
109
|
+
operator: :starts_with,
|
|
110
|
+
value: ::Regexp.last_match(2)
|
|
111
|
+
}
|
|
112
|
+
when /(\w+)\s*\$=\s*["']?([^"']+)["']?/
|
|
113
|
+
@attributes[::Regexp.last_match(1).to_sym] = {
|
|
114
|
+
operator: :ends_with,
|
|
115
|
+
value: ::Regexp.last_match(2)
|
|
116
|
+
}
|
|
117
|
+
when /(\w+)\s*\*=\s*["']?([^"']+)["']?/
|
|
118
|
+
@attributes[::Regexp.last_match(1).to_sym] = {
|
|
119
|
+
operator: :contains,
|
|
120
|
+
value: ::Regexp.last_match(2)
|
|
121
|
+
}
|
|
122
|
+
when /(\w+)/
|
|
123
|
+
@attributes[::Regexp.last_match(1).to_sym] = { operator: :present }
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def type_matches?(element)
|
|
128
|
+
return true if @element_type == '*'
|
|
129
|
+
|
|
130
|
+
return element.element_type.to_s.downcase == @element_type.downcase if (element.is_a?(CoreModel::StructuralElement) || element.is_a?(CoreModel::Block)) && element.element_type
|
|
131
|
+
|
|
132
|
+
class_name = class_to_query_name(element.class)
|
|
133
|
+
class_name == @element_type
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def class_to_query_name(klass)
|
|
137
|
+
klass.name
|
|
138
|
+
.to_s
|
|
139
|
+
.split('::')
|
|
140
|
+
.last
|
|
141
|
+
.gsub(/([A-Z])/) { "_#{::Regexp.last_match(1).downcase}" }
|
|
142
|
+
.sub(/^_/, '')
|
|
143
|
+
.downcase
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def classes_match?(element)
|
|
147
|
+
element_classes = if element.is_a?(CoreModel::StructuralElement) && element.element_type
|
|
148
|
+
[element.element_type]
|
|
149
|
+
elsif element.is_a?(CoreModel::Base)
|
|
150
|
+
[]
|
|
151
|
+
else
|
|
152
|
+
extract_role(element)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
@classes.all? { |c| element_classes.include?(c.downcase) }
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def extract_role(element)
|
|
159
|
+
role = element.public_send(:role)
|
|
160
|
+
role ? role.to_s.split.map(&:downcase) : []
|
|
161
|
+
rescue NoMethodError
|
|
162
|
+
[]
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def attributes_match?(element)
|
|
166
|
+
@attributes.all? do |attr_name, condition|
|
|
167
|
+
value = get_attribute_value(element, attr_name)
|
|
168
|
+
match_attribute_condition(value, condition)
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def get_attribute_value(element, attr_name)
|
|
173
|
+
case attr_name
|
|
174
|
+
when :id, :title
|
|
175
|
+
element.public_send(attr_name)
|
|
176
|
+
when :level
|
|
177
|
+
if element.is_a?(CoreModel::StructuralElement)
|
|
178
|
+
element.level
|
|
179
|
+
else
|
|
180
|
+
element.public_send(:level)
|
|
181
|
+
end
|
|
182
|
+
when :element_type
|
|
183
|
+
element.element_type if element.is_a?(CoreModel::StructuralElement) || element.is_a?(CoreModel::Block)
|
|
184
|
+
when :type
|
|
185
|
+
element.type if element.is_a?(CoreModel::AnnotationBlock) || element.is_a?(CoreModel::InlineElement)
|
|
186
|
+
else
|
|
187
|
+
element.public_send(attr_name) if element.is_a?(CoreModel::Base) && element.class.attributes.key?(attr_name)
|
|
188
|
+
end
|
|
189
|
+
rescue NoMethodError
|
|
190
|
+
nil
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def match_attribute_condition(value, condition)
|
|
194
|
+
case condition[:operator]
|
|
195
|
+
when :present
|
|
196
|
+
!value.nil?
|
|
197
|
+
when :equals
|
|
198
|
+
value.to_s == condition[:value]
|
|
199
|
+
when :includes
|
|
200
|
+
value.to_s.split.map(&:downcase).include?(condition[:value].downcase)
|
|
201
|
+
when :starts_with
|
|
202
|
+
value.to_s.start_with?(condition[:value])
|
|
203
|
+
when :ends_with
|
|
204
|
+
value.to_s.end_with?(condition[:value])
|
|
205
|
+
when :contains
|
|
206
|
+
value.to_s.include?(condition[:value])
|
|
207
|
+
else
|
|
208
|
+
false
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def empty_element?(element)
|
|
213
|
+
return true unless element.is_a?(CoreModel::Block) || element.is_a?(CoreModel::StructuralElement)
|
|
214
|
+
|
|
215
|
+
content = element.content
|
|
216
|
+
case content
|
|
217
|
+
when String
|
|
218
|
+
content.strip.empty?
|
|
219
|
+
when Array
|
|
220
|
+
content.empty?
|
|
221
|
+
else
|
|
222
|
+
content.nil?
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Query result set - collection of matched elements
|
|
228
|
+
class ResultSet
|
|
229
|
+
include Enumerable
|
|
230
|
+
|
|
231
|
+
attr_reader :elements
|
|
232
|
+
|
|
233
|
+
def initialize(elements = [])
|
|
234
|
+
@elements = Array(elements).compact
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def each(&block)
|
|
238
|
+
@elements.each(&block)
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def [](index)
|
|
242
|
+
@elements[index]
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def length
|
|
246
|
+
@elements.length
|
|
247
|
+
end
|
|
248
|
+
alias size length
|
|
249
|
+
|
|
250
|
+
def empty?
|
|
251
|
+
@elements.empty?
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def first
|
|
255
|
+
@elements.first
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def last
|
|
259
|
+
@elements.last
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def filter(selector)
|
|
263
|
+
parsed = Selector.parse(selector)
|
|
264
|
+
filtered = @elements.select { |e| parsed.matches?(e) }
|
|
265
|
+
ResultSet.new(filtered)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
def query(selector)
|
|
269
|
+
results = @elements.flat_map do |element|
|
|
270
|
+
Query.query_within(element, selector).to_a
|
|
271
|
+
end
|
|
272
|
+
ResultSet.new(results.uniq)
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def map(&block)
|
|
276
|
+
ResultSet.new(@elements.map(&block))
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def select(&block)
|
|
280
|
+
ResultSet.new(@elements.select(&block))
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def reject(&block)
|
|
284
|
+
ResultSet.new(@elements.reject(&block))
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def to_a
|
|
288
|
+
@elements.dup
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def inspect
|
|
292
|
+
"#<Coradoc::Query::ResultSet count=#{length}>"
|
|
293
|
+
end
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Query engine for executing selectors
|
|
297
|
+
class Engine
|
|
298
|
+
def self.query(document, selector)
|
|
299
|
+
new.query(document, selector)
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def query(document, selector)
|
|
303
|
+
return ResultSet.new if document.nil? || selector.to_s.strip.empty?
|
|
304
|
+
|
|
305
|
+
return query_multiple(document, selector.split(',').map(&:strip)) if selector.include?(',')
|
|
306
|
+
|
|
307
|
+
return query_with_combinators(document, selector) if selector.include?('>') || selector.include?(' ')
|
|
308
|
+
|
|
309
|
+
parsed = Selector.parse(selector)
|
|
310
|
+
results = []
|
|
311
|
+
|
|
312
|
+
traverse(document) do |element, siblings, index|
|
|
313
|
+
if parsed.matches?(element)
|
|
314
|
+
next if parsed.pseudo_classes.any? && !parsed.matches_pseudo_classes?(element, siblings: siblings,
|
|
315
|
+
index: index)
|
|
316
|
+
|
|
317
|
+
results << element
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
ResultSet.new(results)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
private
|
|
325
|
+
|
|
326
|
+
def query_multiple(document, selectors)
|
|
327
|
+
results = selectors.flat_map do |sel|
|
|
328
|
+
query(document, sel).to_a
|
|
329
|
+
end
|
|
330
|
+
ResultSet.new(results.uniq)
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def query_with_combinators(document, selector)
|
|
334
|
+
parts = parse_combinator_selector(selector)
|
|
335
|
+
results = []
|
|
336
|
+
|
|
337
|
+
first_results = query(document, parts[:first])
|
|
338
|
+
return ResultSet.new if first_results.empty?
|
|
339
|
+
|
|
340
|
+
first_results.each do |parent|
|
|
341
|
+
find_matching_descendants(parent, parts[:rest]).each do |match|
|
|
342
|
+
results << match
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
ResultSet.new(results.uniq)
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def parse_combinator_selector(selector)
|
|
350
|
+
if selector.include?(' > ')
|
|
351
|
+
parts = selector.split(' > ', 2)
|
|
352
|
+
{ first: parts[0], rest: [{ combinator: :child, selector: parts[1] }] }
|
|
353
|
+
elsif selector.include?(' ')
|
|
354
|
+
parts = selector.split(' ', 2)
|
|
355
|
+
{ first: parts[0], rest: [{ combinator: :descendant, selector: parts[1] }] }
|
|
356
|
+
else
|
|
357
|
+
{ first: selector, rest: [] }
|
|
358
|
+
end
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
def find_matching_descendants(parent, parts)
|
|
362
|
+
return [parent] if parts.empty?
|
|
363
|
+
|
|
364
|
+
part = parts.first
|
|
365
|
+
remaining = parts[1..]
|
|
366
|
+
|
|
367
|
+
parsed = Selector.parse(part[:selector])
|
|
368
|
+
results = []
|
|
369
|
+
|
|
370
|
+
siblings = get_children(parent)
|
|
371
|
+
siblings.each_with_index do |child, index|
|
|
372
|
+
case part[:combinator]
|
|
373
|
+
when :child
|
|
374
|
+
results.concat(find_matching_descendants(child, remaining)) if parsed.matches?(child) && pseudo_matches?(
|
|
375
|
+
parsed, child, siblings, index
|
|
376
|
+
)
|
|
377
|
+
when :descendant
|
|
378
|
+
results.concat(find_matching_descendants(child, remaining)) if parsed.matches?(child) && pseudo_matches?(
|
|
379
|
+
parsed, child, siblings, index
|
|
380
|
+
)
|
|
381
|
+
results.concat(find_matching_descendants(child, parts))
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
results
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
def pseudo_matches?(parsed, element, siblings, index)
|
|
389
|
+
return true if parsed.pseudo_classes.empty?
|
|
390
|
+
|
|
391
|
+
parsed.matches_pseudo_classes?(element, siblings: siblings, index: index)
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def traverse(element, siblings: [], index: 0, &block)
|
|
395
|
+
return unless element
|
|
396
|
+
|
|
397
|
+
yield(element, siblings, index)
|
|
398
|
+
|
|
399
|
+
children = get_children(element)
|
|
400
|
+
children.each_with_index do |child, i|
|
|
401
|
+
traverse(child, siblings: children, index: i, &block)
|
|
402
|
+
end
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def get_children(element)
|
|
406
|
+
Query.get_children(element)
|
|
407
|
+
end
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Module-level query methods
|
|
411
|
+
class << self
|
|
412
|
+
def query(document, selector)
|
|
413
|
+
Engine.query(document, selector)
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
def query_within(element, selector)
|
|
417
|
+
parsed = Selector.parse(selector)
|
|
418
|
+
results = []
|
|
419
|
+
|
|
420
|
+
traverse_children(element) do |child, siblings, index|
|
|
421
|
+
if parsed.matches?(child)
|
|
422
|
+
next if parsed.pseudo_classes.any? && !parsed.matches_pseudo_classes?(child, siblings: siblings,
|
|
423
|
+
index: index)
|
|
424
|
+
|
|
425
|
+
results << child
|
|
426
|
+
end
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
ResultSet.new(results)
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
def get_children(element)
|
|
433
|
+
return [] unless element
|
|
434
|
+
|
|
435
|
+
if element.is_a?(CoreModel::StructuralElement) && element.children&.any?
|
|
436
|
+
element.children
|
|
437
|
+
elsif element.is_a?(CoreModel::Block) && element.content
|
|
438
|
+
Array(element.content).select { |c| c.is_a?(CoreModel::Base) }
|
|
439
|
+
else
|
|
440
|
+
[]
|
|
441
|
+
end
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
private
|
|
445
|
+
|
|
446
|
+
def traverse_children(element, siblings: [], index: 0, &block)
|
|
447
|
+
children = get_children(element)
|
|
448
|
+
children.each_with_index do |child, i|
|
|
449
|
+
yield(child, children, i)
|
|
450
|
+
traverse_children(child, &block)
|
|
451
|
+
end
|
|
452
|
+
end
|
|
453
|
+
end
|
|
454
|
+
end
|
|
455
|
+
end
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
# General-purpose named-item registry.
|
|
5
|
+
#
|
|
6
|
+
# Used by the format registry (Coradoc.registry), Input processors,
|
|
7
|
+
# and Output processors. Each instance stores items keyed by symbol
|
|
8
|
+
# name, with optional per-item options.
|
|
9
|
+
#
|
|
10
|
+
# @example Format registry
|
|
11
|
+
# registry = Registry.new
|
|
12
|
+
# registry.register(:html, Coradoc::Html)
|
|
13
|
+
# registry.get(:html) # => Coradoc::Html
|
|
14
|
+
#
|
|
15
|
+
# @example Processor registry (self-identifying items)
|
|
16
|
+
# registry = Registry.new(error_label: "input processor")
|
|
17
|
+
# registry.define(MyProcessor) # uses MyProcessor.processor_id
|
|
18
|
+
# registry.for_file("doc.html") # checks processor_match? on each item
|
|
19
|
+
#
|
|
20
|
+
class Registry
|
|
21
|
+
attr_reader :error_label
|
|
22
|
+
|
|
23
|
+
# @param error_label [String, nil] label for error messages in #process
|
|
24
|
+
def initialize(error_label: nil)
|
|
25
|
+
@items = {}
|
|
26
|
+
@options = {}
|
|
27
|
+
@error_label = error_label
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Register an item by explicit name
|
|
31
|
+
#
|
|
32
|
+
# @param name [Symbol] the item name
|
|
33
|
+
# @param item [Object] the item to register
|
|
34
|
+
# @param opts [Hash] optional per-item configuration
|
|
35
|
+
# @raise [ArgumentError] if name is not a Symbol
|
|
36
|
+
def register(name, item, opts = {})
|
|
37
|
+
raise ArgumentError, "Name must be a Symbol, got #{name.class}" unless name.is_a?(Symbol)
|
|
38
|
+
|
|
39
|
+
@items[name] = item
|
|
40
|
+
@options[name] = opts
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Register a self-identifying item (extracts name via processor_id)
|
|
44
|
+
#
|
|
45
|
+
# @param item [Object] item that responds to #processor_id
|
|
46
|
+
# @param options [Hash] optional per-item configuration
|
|
47
|
+
# @return [void]
|
|
48
|
+
def define(item, **opts)
|
|
49
|
+
register(item.processor_id, item, opts)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Get a registered item by name
|
|
53
|
+
#
|
|
54
|
+
# @param name [Symbol, String] the item name (strings are coerced to symbols)
|
|
55
|
+
# @return [Object, nil]
|
|
56
|
+
def get(name)
|
|
57
|
+
@items[name.to_sym]
|
|
58
|
+
end
|
|
59
|
+
alias [] get
|
|
60
|
+
|
|
61
|
+
# Get options for a registered item
|
|
62
|
+
#
|
|
63
|
+
# @param name [Symbol]
|
|
64
|
+
# @return [Hash, nil]
|
|
65
|
+
def options_for(name)
|
|
66
|
+
@options[name]
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Check if an item is registered
|
|
70
|
+
#
|
|
71
|
+
# @param name [Symbol]
|
|
72
|
+
# @return [Boolean]
|
|
73
|
+
def registered?(name)
|
|
74
|
+
@items.key?(name)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# List all registered item names
|
|
78
|
+
#
|
|
79
|
+
# @return [Array<Symbol>]
|
|
80
|
+
def list
|
|
81
|
+
@items.keys
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Direct access to the items hash (for backward compatibility)
|
|
85
|
+
# @return [Hash<Symbol, Object>]
|
|
86
|
+
attr_reader :items
|
|
87
|
+
|
|
88
|
+
# Number of registered items
|
|
89
|
+
#
|
|
90
|
+
# @return [Integer]
|
|
91
|
+
def size
|
|
92
|
+
@items.size
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Remove all registered items
|
|
96
|
+
def clear
|
|
97
|
+
@items.clear
|
|
98
|
+
@options.clear
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Iterate over all items
|
|
102
|
+
#
|
|
103
|
+
# @yield [Symbol, Object] name and item
|
|
104
|
+
# @return [Enumerator]
|
|
105
|
+
def each(&block)
|
|
106
|
+
@items.each(&block)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Iterate over item values
|
|
110
|
+
#
|
|
111
|
+
# @yield [Object]
|
|
112
|
+
# @return [Enumerator]
|
|
113
|
+
def each_value(&block)
|
|
114
|
+
@items.each_value(&block)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Iterate over item names
|
|
118
|
+
#
|
|
119
|
+
# @yield [Symbol]
|
|
120
|
+
# @return [Enumerator]
|
|
121
|
+
def each_key(&block)
|
|
122
|
+
@items.each_key(&block)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Find an item whose processor_match? returns true for the given filename
|
|
126
|
+
#
|
|
127
|
+
# @param filename [String]
|
|
128
|
+
# @return [Object, nil]
|
|
129
|
+
def for_file(filename)
|
|
130
|
+
@items.values.find do |item|
|
|
131
|
+
item.processor_match?(filename)
|
|
132
|
+
rescue NoMethodError
|
|
133
|
+
false
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Resolve and execute: find item by format or filename, call processor_execute
|
|
138
|
+
#
|
|
139
|
+
# @param content [Object] content to process
|
|
140
|
+
# @param options [Hash] :format or :filename for resolution
|
|
141
|
+
# @return [Object] result of processor_execute
|
|
142
|
+
# @raise [ArgumentError] if no matching item found
|
|
143
|
+
def process(content, options = {})
|
|
144
|
+
item = if options[:format]
|
|
145
|
+
get(options[:format])
|
|
146
|
+
elsif options[:filename]
|
|
147
|
+
for_file(options[:filename])
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
label = @error_label || 'processor'
|
|
151
|
+
raise ArgumentError, "No #{label} found for: #{options}" unless item
|
|
152
|
+
|
|
153
|
+
item.processor_execute(content, options)
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|