markdown_composer 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +23 -0
- data/LICENSE.txt +21 -0
- data/README.md +278 -0
- data/ROADMAP.md +80 -0
- data/docs/_md_composer_architecture.md +50 -0
- data/docs/_md_composer_cheatsheet.md +72 -0
- data/docs/_md_composer_concepts.md +64 -0
- data/docs/_md_composer_dev_guide.md +55 -0
- data/docs/_md_composer_getting_started.md +114 -0
- data/docs/_md_composer_readme.md +93 -0
- data/docs/_md_composer_user_guide.md +65 -0
- data/docs/ai/md_composer_ai_audit.md +35 -0
- data/docs/ai/md_composer_ai_canonical_docs.md +44 -0
- data/docs/ai/md_composer_ai_source_map.md +39 -0
- data/docs/compose/md_composer_compose_actions.md +338 -0
- data/docs/compose/md_composer_compose_anatomy.md +156 -0
- data/docs/compose/md_composer_compose_buffer.md +81 -0
- data/docs/compose/md_composer_compose_examples.md +31 -0
- data/docs/compose/md_composer_compose_include.md +136 -0
- data/docs/compose/md_composer_compose_select.md +198 -0
- data/docs/compose/md_composer_compose_sources.md +161 -0
- data/docs/compose/md_composer_compose_targets.md +194 -0
- data/docs/examples/md_composer_example_basic_compose.md +57 -0
- data/docs/examples/md_composer_example_buffer_target_actions.md +83 -0
- data/docs/examples/md_composer_example_fixtures.md +62 -0
- data/docs/examples/md_composer_example_html_output.md +50 -0
- data/docs/examples/md_composer_example_modify.md +77 -0
- data/docs/examples/md_composer_example_multi_row_compose.md +67 -0
- data/docs/examples/md_composer_example_ruby_plans.md +62 -0
- data/docs/examples/md_composer_example_structured_data.md +68 -0
- data/docs/examples/md_composer_example_transforms.md +68 -0
- data/docs/examples/md_composer_example_yaml_json_rows.md +56 -0
- data/docs/examples/md_composer_examples_readme.md +45 -0
- data/docs/examples/md_composer_runnable_examples.md +374 -0
- data/docs/examples/md_composer_source_ruby_dsl.md +88 -0
- data/docs/reference/md_composer_nested.md +170 -0
- data/docs/reference/md_composer_reference_api.md +71 -0
- data/docs/reference/md_composer_reference_capabilities.md +63 -0
- data/docs/reference/md_composer_reference_diagnostics.md +54 -0
- data/docs/reference/md_composer_reference_plan_schema.md +75 -0
- data/docs/reference/md_composer_reference_registries.md +63 -0
- data/docs/reference/md_composer_take.md +221 -0
- data/docs/reference/md_composer_unit_tokens.md +228 -0
- data/docs/reference/md_composer_where.md +227 -0
- data/docs/transform/md_composer_transform_anatomy.md +112 -0
- data/docs/transform/md_composer_transform_examples.md +30 -0
- data/docs/transform/md_composer_transform_modes.md +83 -0
- data/docs/transform/md_composer_transform_options.md +142 -0
- data/docs/transform/md_composer_transform_scope.md +97 -0
- data/docs/transform/md_composer_transform_transforms.md +99 -0
- data/examples/README.md +20 -0
- data/examples/advanced_composer.rb +207 -0
- data/examples/basic_compose.rb +24 -0
- data/examples/complex_composer.rb +235 -0
- data/examples/example_support.rb +18 -0
- data/examples/fixtures/current.md +179 -0
- data/examples/fixtures/faq.md +58 -0
- data/examples/fixtures/guide.md +62 -0
- data/examples/fixtures/site_intro.md +29 -0
- data/examples/fixtures/source.html +22 -0
- data/examples/html_input.rb +26 -0
- data/examples/output/advanced_composer.md +76 -0
- data/examples/output/basic_compose.md +25 -0
- data/examples/output/complex_composer.md +85 -0
- data/examples/output/html_input.md +4 -0
- data/examples/output/source_list_dsl.md +126 -0
- data/examples/output/standard_composer.md +46 -0
- data/examples/output/standard_sources_buffer.md +31 -0
- data/examples/output/yaml_plan.md +43 -0
- data/examples/plans/basic.yml +20 -0
- data/examples/source_list_dsl.rb +41 -0
- data/examples/standard_composer.rb +42 -0
- data/examples/standard_sources_buffer.rb +62 -0
- data/examples/yaml_plan.rb +17 -0
- data/lib/markdown_composer/capabilities.rb +223 -0
- data/lib/markdown_composer/composition_buffer.rb +378 -0
- data/lib/markdown_composer/data_path.rb +313 -0
- data/lib/markdown_composer/diagnostics.rb +63 -0
- data/lib/markdown_composer/document_index/html_parser.rb +84 -0
- data/lib/markdown_composer/document_index/markdown_parser.rb +338 -0
- data/lib/markdown_composer/document_index.rb +94 -0
- data/lib/markdown_composer/executor.rb +284 -0
- data/lib/markdown_composer/markdown_renderer.rb +105 -0
- data/lib/markdown_composer/plan.rb +436 -0
- data/lib/markdown_composer/plan_builder.rb +111 -0
- data/lib/markdown_composer/registries/action_entries.rb +26 -0
- data/lib/markdown_composer/registries/condition_entries.rb +58 -0
- data/lib/markdown_composer/registries/registry.rb +69 -0
- data/lib/markdown_composer/registries/source_entries.rb +18 -0
- data/lib/markdown_composer/registries/support_values.rb +23 -0
- data/lib/markdown_composer/registries/take_entries.rb +31 -0
- data/lib/markdown_composer/registries/take_registry.rb +18 -0
- data/lib/markdown_composer/registries/target_entries.rb +40 -0
- data/lib/markdown_composer/registries/unit_token_entries.rb +62 -0
- data/lib/markdown_composer/registries/where_registry.rb +84 -0
- data/lib/markdown_composer/registries.rb +46 -0
- data/lib/markdown_composer/result.rb +34 -0
- data/lib/markdown_composer/selection_resolver.rb +181 -0
- data/lib/markdown_composer/source.rb +57 -0
- data/lib/markdown_composer/source_list_builder.rb +47 -0
- data/lib/markdown_composer/take.rb +129 -0
- data/lib/markdown_composer/transform_options.rb +66 -0
- data/lib/markdown_composer/transform_runner/content_placement.rb +63 -0
- data/lib/markdown_composer/transform_runner/field_interpolator.rb +213 -0
- data/lib/markdown_composer/transform_runner/heading_numbering.rb +106 -0
- data/lib/markdown_composer/transform_runner/scope_resolver.rb +87 -0
- data/lib/markdown_composer/transform_runner.rb +264 -0
- data/lib/markdown_composer/transforms/default_entries.rb +31 -0
- data/lib/markdown_composer/transforms/registry.rb +11 -0
- data/lib/markdown_composer/validator.rb +378 -0
- data/lib/markdown_composer/value_object.rb +15 -0
- data/lib/markdown_composer/version.rb +5 -0
- data/lib/markdown_composer/where.rb +313 -0
- data/lib/markdown_composer.rb +114 -0
- metadata +260 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module MarkdownComposer
|
|
4
|
+
Diagnostic = MarkdownComposer.value_object(:severity, :code, :message, :path, :details) do
|
|
5
|
+
def to_h
|
|
6
|
+
{
|
|
7
|
+
severity: severity,
|
|
8
|
+
code: code,
|
|
9
|
+
message: message,
|
|
10
|
+
path: path,
|
|
11
|
+
details: details
|
|
12
|
+
}.compact
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
class Diagnostics
|
|
17
|
+
attr_reader :items
|
|
18
|
+
|
|
19
|
+
def initialize(items = [])
|
|
20
|
+
@items = items
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def info(code, message, path: nil, details: nil)
|
|
24
|
+
add(:info, code, message, path: path, details: details)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def warn(code, message, path: nil, details: nil)
|
|
28
|
+
add(:warning, code, message, path: path, details: details)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def error(code, message, path: nil, details: nil)
|
|
32
|
+
add(:error, code, message, path: path, details: details)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def concat(other)
|
|
36
|
+
@items.concat(other.to_a)
|
|
37
|
+
self
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def errors
|
|
41
|
+
@items.select { |item| item.severity == :error }
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def any_errors?
|
|
45
|
+
errors.any?
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def to_a
|
|
49
|
+
@items
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def as_json
|
|
53
|
+
@items.map(&:to_h)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
def add(severity, code, message, path:, details:)
|
|
59
|
+
@items << Diagnostic.new(severity: severity, code: code, message: message, path: path, details: details)
|
|
60
|
+
self
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
begin
|
|
4
|
+
require "nokogiri"
|
|
5
|
+
rescue LoadError
|
|
6
|
+
nil
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
module MarkdownComposer
|
|
10
|
+
class DocumentIndex
|
|
11
|
+
class HtmlParser
|
|
12
|
+
attr_reader :source, :diagnostics
|
|
13
|
+
|
|
14
|
+
def initialize(source, diagnostics:)
|
|
15
|
+
@source = source
|
|
16
|
+
@diagnostics = diagnostics
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def index
|
|
20
|
+
fragment = Nokogiri::HTML5.fragment(source.html.to_s)
|
|
21
|
+
markdown = html_to_markdown(fragment)
|
|
22
|
+
DocumentIndex.from_markdown(markdown, source_key: source.key, diagnostics: diagnostics)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def html_to_markdown(fragment)
|
|
28
|
+
fragment.children.map { |node| node_to_markdown(node) }.join.lines.map(&:lstrip).join
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def node_to_markdown(node)
|
|
32
|
+
return node.text if node.text?
|
|
33
|
+
|
|
34
|
+
case node.name
|
|
35
|
+
when /^h([1-6])$/
|
|
36
|
+
"#{"#" * Regexp.last_match(1).to_i} #{node.text.strip}\n\n"
|
|
37
|
+
when "p"
|
|
38
|
+
"#{inline_html(node).strip}\n\n"
|
|
39
|
+
when "a"
|
|
40
|
+
"[#{node.text.strip}](#{node["href"]})"
|
|
41
|
+
when "img"
|
|
42
|
+
"![#{node["alt"]}](#{node["src"]})\n\n"
|
|
43
|
+
when "ul"
|
|
44
|
+
node.css("> li").map { |li| "- #{inline_html(li).strip}\n" }.join + "\n"
|
|
45
|
+
when "ol"
|
|
46
|
+
node.css("> li").each_with_index.map { |li, index| "#{index + 1}. #{inline_html(li).strip}\n" }.join + "\n"
|
|
47
|
+
when "blockquote"
|
|
48
|
+
node.text.lines.map { |line| "> #{line.strip}\n" }.join + "\n"
|
|
49
|
+
when "pre"
|
|
50
|
+
code = node.at_css("code")
|
|
51
|
+
language = code&.classes&.find { |klass| klass.start_with?("language-") }&.sub("language-", "")
|
|
52
|
+
"```#{language}\n#{code&.text || node.text}\n```\n\n"
|
|
53
|
+
when "table"
|
|
54
|
+
rows = node.css("tr").map { |tr| tr.css("th,td").map { |cell| cell.text.strip } }
|
|
55
|
+
return "" if rows.empty?
|
|
56
|
+
|
|
57
|
+
header = "| #{rows.first.join(" | ")} |\n"
|
|
58
|
+
separator = "| #{rows.first.map { "---" }.join(" | ")} |\n"
|
|
59
|
+
body = rows[1..].to_a.map { |row| "| #{row.join(" | ")} |\n" }.join
|
|
60
|
+
"#{header}#{separator}#{body}\n"
|
|
61
|
+
else
|
|
62
|
+
children = node.children.map { |child| node_to_markdown(child) }.join
|
|
63
|
+
children.empty? ? "#{node.to_html}\n" : children
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def inline_html(node)
|
|
68
|
+
node.children.map do |child|
|
|
69
|
+
if child.text?
|
|
70
|
+
child.text
|
|
71
|
+
elsif child.name == "a"
|
|
72
|
+
"[#{child.text.strip}](#{child["href"]})"
|
|
73
|
+
elsif child.name == "img"
|
|
74
|
+
"![#{child["alt"]}](#{child["src"]})"
|
|
75
|
+
elsif child.name == "code"
|
|
76
|
+
"`#{child.text}`"
|
|
77
|
+
else
|
|
78
|
+
inline_html(child)
|
|
79
|
+
end
|
|
80
|
+
end.join
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module MarkdownComposer
|
|
4
|
+
class DocumentIndex
|
|
5
|
+
class MarkdownParser
|
|
6
|
+
attr_reader :lines, :source_key, :diagnostics
|
|
7
|
+
|
|
8
|
+
def initialize(markdown, source_key:, diagnostics:)
|
|
9
|
+
@lines = markdown.lines
|
|
10
|
+
@source_key = source_key
|
|
11
|
+
@diagnostics = diagnostics
|
|
12
|
+
@nodes = []
|
|
13
|
+
@position = 0
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def index
|
|
17
|
+
scan_nodes
|
|
18
|
+
build_sections
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def scan_nodes
|
|
24
|
+
index = 0
|
|
25
|
+
if lines.first&.match?(/\A---\s*$/)
|
|
26
|
+
closing = lines[1..]&.find_index { |line| line.match?(/\A---\s*$/) }
|
|
27
|
+
if closing
|
|
28
|
+
end_index = closing + 1
|
|
29
|
+
add_node("data_block", lines[0..end_index], 0, end_index, text: lines[1...end_index].join.strip, attributes: { "format" => "yaml", "location" => "frontmatter" })
|
|
30
|
+
index = end_index + 1
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
while index < lines.length
|
|
35
|
+
line = lines[index]
|
|
36
|
+
if line.strip.empty?
|
|
37
|
+
index += 1
|
|
38
|
+
next
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
case line
|
|
42
|
+
when /\A\$\$\s*$/
|
|
43
|
+
start = index
|
|
44
|
+
index += 1
|
|
45
|
+
index += 1 while index < lines.length && !lines[index].match?(/\A\$\$\s*$/)
|
|
46
|
+
index += 1 if index < lines.length
|
|
47
|
+
add_node("math_block", lines[start...index], start, index - 1)
|
|
48
|
+
when /\A(```|~~~)\s*([A-Za-z0-9_-]+)?/
|
|
49
|
+
fence = Regexp.last_match(1)
|
|
50
|
+
language = Regexp.last_match(2).to_s
|
|
51
|
+
start = index
|
|
52
|
+
index += 1
|
|
53
|
+
index += 1 while index < lines.length && !lines[index].start_with?(fence)
|
|
54
|
+
closing_index = index
|
|
55
|
+
closed = index < lines.length
|
|
56
|
+
index += 1 if closed
|
|
57
|
+
body_lines = lines[(start + 1)...(closed ? closing_index : index)].to_a
|
|
58
|
+
language_token = language.downcase
|
|
59
|
+
type = language_token == "mermaid" ? "mermaid" : "code_block"
|
|
60
|
+
type = "data_block" if %w[yaml yml json].include?(language_token)
|
|
61
|
+
format = language_token == "yml" ? "yaml" : language_token
|
|
62
|
+
attributes = { "language" => language_token, "format" => format }
|
|
63
|
+
attributes["location"] = "body" if type == "data_block"
|
|
64
|
+
attributes["diagram_type"] = mermaid_diagram_type(body_lines.join) if type == "mermaid"
|
|
65
|
+
add_node(type, lines[start...index], start, index - 1, text: body_lines.join.strip, attributes: attributes)
|
|
66
|
+
when /\A\s{0,3}(\#{1,6})\s+(.+?)\s*#*\s*$/
|
|
67
|
+
level = Regexp.last_match(1).length
|
|
68
|
+
text = clean_text(Regexp.last_match(2))
|
|
69
|
+
add_node("heading_#{level}", [ line ], index, index, level: level, text: text)
|
|
70
|
+
index += 1
|
|
71
|
+
when /\A\|.*\|\s*$/
|
|
72
|
+
start = index
|
|
73
|
+
index += 1 while index < lines.length && lines[index].match?(/\A\|.*\|\s*$/)
|
|
74
|
+
table_lines = lines[start...index]
|
|
75
|
+
add_node("table", table_lines, start, index - 1, attributes: table_attributes(table_lines))
|
|
76
|
+
add_table_part_nodes(table_lines, start)
|
|
77
|
+
when /\A\s*([-*+])\s+/
|
|
78
|
+
start = index
|
|
79
|
+
index += 1 while index < lines.length && lines[index].match?(/\A\s*([-*+])\s+/)
|
|
80
|
+
item_total = index - start
|
|
81
|
+
add_node("unordered_list", lines[start...index], start, index - 1, attributes: { "ordered" => false, "item_count" => item_total })
|
|
82
|
+
lines[start...index].each_with_index do |item_line, offset|
|
|
83
|
+
add_node("list_item", [ item_line ], start + offset, start + offset, text: clean_text(item_line.sub(/\A\s*[-*+]\s+/, "")), attributes: { "ordered" => false, "item_index" => offset + 1, "derived" => true })
|
|
84
|
+
end
|
|
85
|
+
when /\A\s*\d+\.\s+/
|
|
86
|
+
start = index
|
|
87
|
+
index += 1 while index < lines.length && lines[index].match?(/\A\s*\d+\.\s+/)
|
|
88
|
+
item_total = index - start
|
|
89
|
+
add_node("ordered_list", lines[start...index], start, index - 1, attributes: { "ordered" => true, "item_count" => item_total })
|
|
90
|
+
lines[start...index].each_with_index do |item_line, offset|
|
|
91
|
+
add_node("list_item", [ item_line ], start + offset, start + offset, text: clean_text(item_line.sub(/\A\s*\d+\.\s+/, "")), attributes: { "ordered" => true, "item_index" => offset + 1, "derived" => true })
|
|
92
|
+
end
|
|
93
|
+
when /\A>\s?/
|
|
94
|
+
start = index
|
|
95
|
+
index += 1 while index < lines.length && lines[index].match?(/\A>\s?/)
|
|
96
|
+
add_node("blockquote", lines[start...index], start, index - 1)
|
|
97
|
+
when /\A<!--.*-->\s*$/
|
|
98
|
+
add_node("comment", [ line ], index, index, text: line.gsub(/\A<!--|-->\s*\z/, "").strip)
|
|
99
|
+
index += 1
|
|
100
|
+
when /\A<[^>]+>/
|
|
101
|
+
start = index
|
|
102
|
+
index += 1
|
|
103
|
+
add_node("raw_html", lines[start...index], start, index - 1)
|
|
104
|
+
else
|
|
105
|
+
start = index
|
|
106
|
+
index += 1
|
|
107
|
+
while index < lines.length && paragraph_continues?(lines[index])
|
|
108
|
+
index += 1
|
|
109
|
+
end
|
|
110
|
+
add_node("paragraph", lines[start...index], start, index - 1)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def paragraph_continues?(line)
|
|
116
|
+
!line.strip.empty? &&
|
|
117
|
+
!line.match?(/\A(```|~~~)|\A\$\$\s*$|\A\s{0,3}\#{1,6}\s+|\A\|.*\|\s*$|\A\s*([-*+]|\d+\.)\s+|\A>\s?|\A<[^>]+>/)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def add_node(type, raw_lines, start_index, end_index, level: nil, text: nil, attributes: {})
|
|
121
|
+
raw = raw_lines.join
|
|
122
|
+
node = ComposerNode.new(
|
|
123
|
+
id: "#{source_key}:n#{@position + 1}",
|
|
124
|
+
source_key: source_key,
|
|
125
|
+
type: type,
|
|
126
|
+
source_position: @position + 1,
|
|
127
|
+
level: level,
|
|
128
|
+
text: text || clean_text(raw),
|
|
129
|
+
attributes: attributes,
|
|
130
|
+
children: inline_nodes(raw, start_index),
|
|
131
|
+
raw: raw.end_with?("\n") ? raw : "#{raw}\n",
|
|
132
|
+
start_line: start_index + 1,
|
|
133
|
+
end_line: end_index + 1
|
|
134
|
+
)
|
|
135
|
+
@nodes << node
|
|
136
|
+
@position += 1
|
|
137
|
+
node
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def add_table_part_nodes(table_lines, start_index)
|
|
141
|
+
return if table_lines.empty?
|
|
142
|
+
|
|
143
|
+
header_line = table_lines.first
|
|
144
|
+
separator_index = table_lines.find_index { |line| line.match?(/\A\|?\s*:?-{3,}:?\s*(\|\s*:?-{3,}:?\s*)+\|?\s*$/) }
|
|
145
|
+
body_start = separator_index ? separator_index + 1 : 1
|
|
146
|
+
column_count = table_columns(header_line).length
|
|
147
|
+
add_node("table_head", [ header_line ], start_index, start_index, attributes: { "section" => "table_head", "row_count" => 1, "column_count" => column_count, "derived" => true })
|
|
148
|
+
visible_row_index = 0
|
|
149
|
+
table_lines.each_with_index do |row_line, offset|
|
|
150
|
+
next if offset == separator_index
|
|
151
|
+
|
|
152
|
+
visible_row_index += 1
|
|
153
|
+
row_type = offset.zero? ? "table_head" : "table_body"
|
|
154
|
+
cells = table_columns(row_line)
|
|
155
|
+
add_node("table_row", [ row_line ], start_index + offset, start_index + offset, attributes: { "section" => row_type, "row_index" => visible_row_index, "column_count" => cells.length, "derived" => true })
|
|
156
|
+
cells.each_with_index do |cell, column_index|
|
|
157
|
+
add_node(offset < body_start ? "table_header" : "table_cell", [ "#{cell}\n" ], start_index + offset, start_index + offset, text: cell, attributes: { "section" => row_type, "row_index" => visible_row_index, "column_index" => column_index + 1, "derived" => true })
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
body_lines = table_lines[body_start..].to_a
|
|
161
|
+
if body_lines.any?
|
|
162
|
+
body_row_count = body_lines.count { |line| !line.match?(/\A\|?\s*:?-{3,}:?\s*(\|\s*:?-{3,}:?\s*)+\|?\s*$/) }
|
|
163
|
+
add_node("table_body", body_lines, start_index + body_start, start_index + table_lines.length - 1, attributes: { "section" => "table_body", "row_count" => body_row_count, "column_count" => column_count, "derived" => true })
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def table_attributes(table_lines)
|
|
168
|
+
visible_rows = table_lines.reject { |line| line.match?(/\A\|?\s*:?-{3,}:?\s*(\|\s*:?-{3,}:?\s*)+\|?\s*$/) }
|
|
169
|
+
{ "row_count" => visible_rows.length, "column_count" => table_columns(table_lines.first).length }
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def table_columns(row_line)
|
|
173
|
+
row_line.to_s.strip.sub(/\A\|/, "").sub(/\|\z/, "").split("|").map(&:strip)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def inline_nodes(raw, start_index)
|
|
177
|
+
children = []
|
|
178
|
+
raw.scan(/!\[([^\]]*)\]\(([^)\s]+)(?:\s+"([^"]+)")?\)/).each_with_index do |(alt, src, title), offset|
|
|
179
|
+
children << inline_node("image", alt, start_index, offset, { "alt" => alt, "src" => src, "title" => title }.compact)
|
|
180
|
+
end
|
|
181
|
+
raw.scan(/(?<!!)\[([^\]]+)\]\(([^)\s]+)(?:\s+"([^"]+)")?\)/).each_with_index do |(text, href, title), offset|
|
|
182
|
+
children << inline_node("link", text, start_index, offset, { "href" => href, "title" => title }.compact)
|
|
183
|
+
end
|
|
184
|
+
raw.scan(/`([^`\n]+)`/).each_with_index do |(code), offset|
|
|
185
|
+
children << inline_node("inline_code", code, start_index, offset, {})
|
|
186
|
+
end
|
|
187
|
+
raw.scan(/\$([^$\n]+)\$/).each_with_index do |(math), offset|
|
|
188
|
+
children << inline_node("inline_math", math, start_index, offset, {})
|
|
189
|
+
end
|
|
190
|
+
children
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def inline_node(type, text, start_index, offset, attributes)
|
|
194
|
+
ComposerNode.new(
|
|
195
|
+
id: "#{source_key}:i#{@position + 1}:#{type}:#{offset}",
|
|
196
|
+
source_key: source_key,
|
|
197
|
+
type: type,
|
|
198
|
+
source_position: @position + 1,
|
|
199
|
+
level: nil,
|
|
200
|
+
text: text,
|
|
201
|
+
attributes: attributes,
|
|
202
|
+
children: [],
|
|
203
|
+
raw: inline_raw(type, text, attributes),
|
|
204
|
+
start_line: start_index + 1,
|
|
205
|
+
end_line: start_index + 1
|
|
206
|
+
)
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def inline_raw(type, text, attributes)
|
|
210
|
+
case type
|
|
211
|
+
when "link" then "[#{text}](#{attributes["href"]})"
|
|
212
|
+
when "image" then ""
|
|
213
|
+
when "inline_code" then "`#{text}`"
|
|
214
|
+
when "inline_math" then "$#{text}$"
|
|
215
|
+
else text.to_s
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def build_sections
|
|
220
|
+
root_body = []
|
|
221
|
+
root_children = []
|
|
222
|
+
sections = []
|
|
223
|
+
stack = []
|
|
224
|
+
|
|
225
|
+
@nodes.each do |node|
|
|
226
|
+
if node.heading?
|
|
227
|
+
stack.pop while stack.any? && stack.last.level >= node.level
|
|
228
|
+
parent = stack.last
|
|
229
|
+
section = MutableSection.new(
|
|
230
|
+
id: "#{source_key}:s#{sections.length + 1}",
|
|
231
|
+
source_key: source_key,
|
|
232
|
+
heading_node: node,
|
|
233
|
+
level: node.level,
|
|
234
|
+
title_text: node.text,
|
|
235
|
+
body_nodes: [],
|
|
236
|
+
child_sections: [],
|
|
237
|
+
source_position: node.source_position,
|
|
238
|
+
parent_section_id: parent&.id,
|
|
239
|
+
start_line: node.start_line
|
|
240
|
+
)
|
|
241
|
+
parent ? parent.child_sections << section : root_children << section
|
|
242
|
+
sections << section
|
|
243
|
+
stack << section
|
|
244
|
+
elsif stack.any?
|
|
245
|
+
stack.last.body_nodes << node
|
|
246
|
+
else
|
|
247
|
+
root_body << node
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
immutable_sections = sections.map { |section| section.to_immutable(lines.length) }
|
|
252
|
+
by_id = immutable_sections.to_h { |section| [ section.id, section ] }
|
|
253
|
+
resolved_sections = {}
|
|
254
|
+
immutable_sections = immutable_sections.map { |section| resolve_section_children(section, by_id, resolved_sections) }
|
|
255
|
+
by_id = immutable_sections.to_h { |section| [ section.id, section ] }
|
|
256
|
+
root = ComposerSection.new(
|
|
257
|
+
id: "#{source_key}:root",
|
|
258
|
+
source_key: source_key,
|
|
259
|
+
heading_node: nil,
|
|
260
|
+
level: 0,
|
|
261
|
+
title_text: nil,
|
|
262
|
+
body_nodes: root_body,
|
|
263
|
+
child_sections: root_children.map { |child| by_id.fetch(child.id) },
|
|
264
|
+
all_nodes: (@nodes - []),
|
|
265
|
+
source_position: 0,
|
|
266
|
+
parent_section_id: nil,
|
|
267
|
+
start_line: 1,
|
|
268
|
+
end_line: lines.length
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
DocumentIndex.new(source_key: source_key, root: root, nodes: @nodes, sections: immutable_sections, diagnostics: diagnostics, lines: lines)
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def resolve_section_children(section, by_id, resolved_sections)
|
|
275
|
+
return resolved_sections[section.id] if resolved_sections.key?(section.id)
|
|
276
|
+
|
|
277
|
+
resolved = section.with(
|
|
278
|
+
child_sections: section.child_sections.map do |child|
|
|
279
|
+
resolve_section_children(by_id.fetch(child.id), by_id, resolved_sections)
|
|
280
|
+
end
|
|
281
|
+
)
|
|
282
|
+
resolved_sections[section.id] = resolved
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def clean_text(raw)
|
|
286
|
+
raw.to_s
|
|
287
|
+
.gsub(/```.*?```/m, "")
|
|
288
|
+
.gsub(/!\[([^\]]*)\]\([^)]+\)/, "\\1")
|
|
289
|
+
.gsub(/\[([^\]]+)\]\([^)]+\)/, "\\1")
|
|
290
|
+
.gsub(/[`*_>#|-]/, " ")
|
|
291
|
+
.gsub(/\s+/, " ")
|
|
292
|
+
.strip
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def mermaid_diagram_type(source)
|
|
296
|
+
first = source.to_s.lines.map(&:strip).reject { |line| line.empty? || line.start_with?("%%") }.first.to_s
|
|
297
|
+
case first
|
|
298
|
+
when /\A(?:flowchart|graph)\b/i then "flowchart"
|
|
299
|
+
when /\AsequenceDiagram\b/i then "sequence"
|
|
300
|
+
when /\AclassDiagram\b/i then "class"
|
|
301
|
+
when /\AstateDiagram(?:-v2)?\b/i then "state"
|
|
302
|
+
when /\AerDiagram\b/i then "er"
|
|
303
|
+
when /\Agantt\b/i then "gantt"
|
|
304
|
+
when /\Apie\b/i then "pie"
|
|
305
|
+
when /\Ajourney\b/i then "journey"
|
|
306
|
+
when /\AgitGraph\b/i then "gitgraph"
|
|
307
|
+
when /\Amindmap\b/i then "mindmap"
|
|
308
|
+
when /\Atimeline\b/i then "timeline"
|
|
309
|
+
when /\AquadrantChart\b/i then "quadrant"
|
|
310
|
+
when /\Axychart-beta\b/i then "xy"
|
|
311
|
+
when /\Asankey-beta\b/i then "sankey"
|
|
312
|
+
else "unknown"
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
MutableSection = Struct.new(:id, :source_key, :heading_node, :level, :title_text, :body_nodes, :child_sections, :source_position, :parent_section_id, :start_line, keyword_init: true) do
|
|
317
|
+
def to_immutable(document_end)
|
|
318
|
+
all = body_nodes + child_sections.flat_map { |child| [ child.heading_node, *child.to_immutable(document_end).all_nodes ] }
|
|
319
|
+
end_line = ([ heading_node&.end_line, *all.map(&:end_line) ].compact.max || start_line)
|
|
320
|
+
ComposerSection.new(
|
|
321
|
+
id: id,
|
|
322
|
+
source_key: source_key,
|
|
323
|
+
heading_node: heading_node,
|
|
324
|
+
level: level,
|
|
325
|
+
title_text: title_text,
|
|
326
|
+
body_nodes: body_nodes,
|
|
327
|
+
child_sections: child_sections,
|
|
328
|
+
all_nodes: all,
|
|
329
|
+
source_position: source_position,
|
|
330
|
+
parent_section_id: parent_section_id,
|
|
331
|
+
start_line: start_line,
|
|
332
|
+
end_line: [ end_line, document_end ].min
|
|
333
|
+
)
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module MarkdownComposer
|
|
4
|
+
ComposerNode = MarkdownComposer.value_object(:id, :source_key, :type, :source_position, :level, :text, :attributes, :children, :raw, :start_line, :end_line) do
|
|
5
|
+
def heading?
|
|
6
|
+
type.start_with?("heading_")
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def to_h
|
|
10
|
+
{
|
|
11
|
+
id: id,
|
|
12
|
+
source_key: source_key,
|
|
13
|
+
type: type,
|
|
14
|
+
source_position: source_position,
|
|
15
|
+
level: level,
|
|
16
|
+
text: text,
|
|
17
|
+
attributes: attributes,
|
|
18
|
+
children: children.map(&:to_h),
|
|
19
|
+
raw: raw,
|
|
20
|
+
start_line: start_line,
|
|
21
|
+
end_line: end_line
|
|
22
|
+
}
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
ComposerSection = MarkdownComposer.value_object(:id, :source_key, :heading_node, :level, :title_text, :body_nodes, :child_sections, :all_nodes, :source_position, :parent_section_id, :start_line, :end_line) do
|
|
27
|
+
def text
|
|
28
|
+
[ title_text, *all_nodes.map(&:text) ].compact.join("\n")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def raw
|
|
32
|
+
[ heading_node, *all_nodes.reject { |node| node.attributes["derived"] } ].compact.uniq(&:id).sort_by(&:source_position).map(&:raw).join
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def to_h
|
|
36
|
+
{
|
|
37
|
+
id: id,
|
|
38
|
+
source_key: source_key,
|
|
39
|
+
heading_node: heading_node&.to_h,
|
|
40
|
+
level: level,
|
|
41
|
+
title_text: title_text,
|
|
42
|
+
body_nodes: body_nodes.map(&:to_h),
|
|
43
|
+
child_sections: child_sections.map(&:id),
|
|
44
|
+
all_nodes: all_nodes.map(&:id),
|
|
45
|
+
source_position: source_position,
|
|
46
|
+
parent_section_id: parent_section_id,
|
|
47
|
+
start_line: start_line,
|
|
48
|
+
end_line: end_line
|
|
49
|
+
}
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
class DocumentIndex
|
|
54
|
+
attr_reader :source_key, :root, :nodes, :sections, :diagnostics, :lines
|
|
55
|
+
|
|
56
|
+
def self.build(source, diagnostics: Diagnostics.new)
|
|
57
|
+
source = Source.build(source)
|
|
58
|
+
if source.format == :html
|
|
59
|
+
from_html(source, diagnostics: diagnostics)
|
|
60
|
+
else
|
|
61
|
+
from_markdown(source.markdown.to_s, source_key: source.key, diagnostics: diagnostics)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def self.from_markdown(markdown, source_key: "buffer", diagnostics: Diagnostics.new)
|
|
66
|
+
MarkdownParser.new(markdown.to_s, source_key: source_key, diagnostics: diagnostics).index
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def self.from_html(source, diagnostics:)
|
|
70
|
+
if defined?(Nokogiri)
|
|
71
|
+
HtmlParser.new(source, diagnostics: diagnostics).index
|
|
72
|
+
else
|
|
73
|
+
diagnostics.error("source.html_parser_missing", "HTML input requires nokogiri", path: "sources.#{source.key}")
|
|
74
|
+
from_markdown(source.html.to_s, source_key: source.key, diagnostics: diagnostics)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def initialize(source_key:, root:, nodes:, sections:, diagnostics:, lines:)
|
|
79
|
+
@source_key = source_key
|
|
80
|
+
@root = root
|
|
81
|
+
@nodes = nodes
|
|
82
|
+
@sections = sections
|
|
83
|
+
@diagnostics = diagnostics
|
|
84
|
+
@lines = lines
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def markdown_for_range(start_line, end_line)
|
|
88
|
+
lines[(start_line - 1)..(end_line - 1)].join
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
require_relative "document_index/markdown_parser"
|
|
94
|
+
require_relative "document_index/html_parser"
|