markdown_composer 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +23 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +278 -0
  5. data/ROADMAP.md +80 -0
  6. data/docs/_md_composer_architecture.md +50 -0
  7. data/docs/_md_composer_cheatsheet.md +72 -0
  8. data/docs/_md_composer_concepts.md +64 -0
  9. data/docs/_md_composer_dev_guide.md +55 -0
  10. data/docs/_md_composer_getting_started.md +114 -0
  11. data/docs/_md_composer_readme.md +93 -0
  12. data/docs/_md_composer_user_guide.md +65 -0
  13. data/docs/ai/md_composer_ai_audit.md +35 -0
  14. data/docs/ai/md_composer_ai_canonical_docs.md +44 -0
  15. data/docs/ai/md_composer_ai_source_map.md +39 -0
  16. data/docs/compose/md_composer_compose_actions.md +338 -0
  17. data/docs/compose/md_composer_compose_anatomy.md +156 -0
  18. data/docs/compose/md_composer_compose_buffer.md +81 -0
  19. data/docs/compose/md_composer_compose_examples.md +31 -0
  20. data/docs/compose/md_composer_compose_include.md +136 -0
  21. data/docs/compose/md_composer_compose_select.md +198 -0
  22. data/docs/compose/md_composer_compose_sources.md +161 -0
  23. data/docs/compose/md_composer_compose_targets.md +194 -0
  24. data/docs/examples/md_composer_example_basic_compose.md +57 -0
  25. data/docs/examples/md_composer_example_buffer_target_actions.md +83 -0
  26. data/docs/examples/md_composer_example_fixtures.md +62 -0
  27. data/docs/examples/md_composer_example_html_output.md +50 -0
  28. data/docs/examples/md_composer_example_modify.md +77 -0
  29. data/docs/examples/md_composer_example_multi_row_compose.md +67 -0
  30. data/docs/examples/md_composer_example_ruby_plans.md +62 -0
  31. data/docs/examples/md_composer_example_structured_data.md +68 -0
  32. data/docs/examples/md_composer_example_transforms.md +68 -0
  33. data/docs/examples/md_composer_example_yaml_json_rows.md +56 -0
  34. data/docs/examples/md_composer_examples_readme.md +45 -0
  35. data/docs/examples/md_composer_runnable_examples.md +374 -0
  36. data/docs/examples/md_composer_source_ruby_dsl.md +88 -0
  37. data/docs/reference/md_composer_nested.md +170 -0
  38. data/docs/reference/md_composer_reference_api.md +71 -0
  39. data/docs/reference/md_composer_reference_capabilities.md +63 -0
  40. data/docs/reference/md_composer_reference_diagnostics.md +54 -0
  41. data/docs/reference/md_composer_reference_plan_schema.md +75 -0
  42. data/docs/reference/md_composer_reference_registries.md +63 -0
  43. data/docs/reference/md_composer_take.md +221 -0
  44. data/docs/reference/md_composer_unit_tokens.md +228 -0
  45. data/docs/reference/md_composer_where.md +227 -0
  46. data/docs/transform/md_composer_transform_anatomy.md +112 -0
  47. data/docs/transform/md_composer_transform_examples.md +30 -0
  48. data/docs/transform/md_composer_transform_modes.md +83 -0
  49. data/docs/transform/md_composer_transform_options.md +142 -0
  50. data/docs/transform/md_composer_transform_scope.md +97 -0
  51. data/docs/transform/md_composer_transform_transforms.md +99 -0
  52. data/examples/README.md +20 -0
  53. data/examples/advanced_composer.rb +207 -0
  54. data/examples/basic_compose.rb +24 -0
  55. data/examples/complex_composer.rb +235 -0
  56. data/examples/example_support.rb +18 -0
  57. data/examples/fixtures/current.md +179 -0
  58. data/examples/fixtures/faq.md +58 -0
  59. data/examples/fixtures/guide.md +62 -0
  60. data/examples/fixtures/site_intro.md +29 -0
  61. data/examples/fixtures/source.html +22 -0
  62. data/examples/html_input.rb +26 -0
  63. data/examples/output/advanced_composer.md +76 -0
  64. data/examples/output/basic_compose.md +25 -0
  65. data/examples/output/complex_composer.md +85 -0
  66. data/examples/output/html_input.md +4 -0
  67. data/examples/output/source_list_dsl.md +126 -0
  68. data/examples/output/standard_composer.md +46 -0
  69. data/examples/output/standard_sources_buffer.md +31 -0
  70. data/examples/output/yaml_plan.md +43 -0
  71. data/examples/plans/basic.yml +20 -0
  72. data/examples/source_list_dsl.rb +41 -0
  73. data/examples/standard_composer.rb +42 -0
  74. data/examples/standard_sources_buffer.rb +62 -0
  75. data/examples/yaml_plan.rb +17 -0
  76. data/lib/markdown_composer/capabilities.rb +223 -0
  77. data/lib/markdown_composer/composition_buffer.rb +378 -0
  78. data/lib/markdown_composer/data_path.rb +313 -0
  79. data/lib/markdown_composer/diagnostics.rb +63 -0
  80. data/lib/markdown_composer/document_index/html_parser.rb +84 -0
  81. data/lib/markdown_composer/document_index/markdown_parser.rb +338 -0
  82. data/lib/markdown_composer/document_index.rb +94 -0
  83. data/lib/markdown_composer/executor.rb +284 -0
  84. data/lib/markdown_composer/markdown_renderer.rb +105 -0
  85. data/lib/markdown_composer/plan.rb +436 -0
  86. data/lib/markdown_composer/plan_builder.rb +111 -0
  87. data/lib/markdown_composer/registries/action_entries.rb +26 -0
  88. data/lib/markdown_composer/registries/condition_entries.rb +58 -0
  89. data/lib/markdown_composer/registries/registry.rb +69 -0
  90. data/lib/markdown_composer/registries/source_entries.rb +18 -0
  91. data/lib/markdown_composer/registries/support_values.rb +23 -0
  92. data/lib/markdown_composer/registries/take_entries.rb +31 -0
  93. data/lib/markdown_composer/registries/take_registry.rb +18 -0
  94. data/lib/markdown_composer/registries/target_entries.rb +40 -0
  95. data/lib/markdown_composer/registries/unit_token_entries.rb +62 -0
  96. data/lib/markdown_composer/registries/where_registry.rb +84 -0
  97. data/lib/markdown_composer/registries.rb +46 -0
  98. data/lib/markdown_composer/result.rb +34 -0
  99. data/lib/markdown_composer/selection_resolver.rb +181 -0
  100. data/lib/markdown_composer/source.rb +57 -0
  101. data/lib/markdown_composer/source_list_builder.rb +47 -0
  102. data/lib/markdown_composer/take.rb +129 -0
  103. data/lib/markdown_composer/transform_options.rb +66 -0
  104. data/lib/markdown_composer/transform_runner/content_placement.rb +63 -0
  105. data/lib/markdown_composer/transform_runner/field_interpolator.rb +213 -0
  106. data/lib/markdown_composer/transform_runner/heading_numbering.rb +106 -0
  107. data/lib/markdown_composer/transform_runner/scope_resolver.rb +87 -0
  108. data/lib/markdown_composer/transform_runner.rb +264 -0
  109. data/lib/markdown_composer/transforms/default_entries.rb +31 -0
  110. data/lib/markdown_composer/transforms/registry.rb +11 -0
  111. data/lib/markdown_composer/validator.rb +378 -0
  112. data/lib/markdown_composer/value_object.rb +15 -0
  113. data/lib/markdown_composer/version.rb +5 -0
  114. data/lib/markdown_composer/where.rb +313 -0
  115. data/lib/markdown_composer.rb +114 -0
  116. metadata +260 -0
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarkdownComposer
4
+ Diagnostic = MarkdownComposer.value_object(:severity, :code, :message, :path, :details) do
5
+ def to_h
6
+ {
7
+ severity: severity,
8
+ code: code,
9
+ message: message,
10
+ path: path,
11
+ details: details
12
+ }.compact
13
+ end
14
+ end
15
+
16
+ class Diagnostics
17
+ attr_reader :items
18
+
19
+ def initialize(items = [])
20
+ @items = items
21
+ end
22
+
23
+ def info(code, message, path: nil, details: nil)
24
+ add(:info, code, message, path: path, details: details)
25
+ end
26
+
27
+ def warn(code, message, path: nil, details: nil)
28
+ add(:warning, code, message, path: path, details: details)
29
+ end
30
+
31
+ def error(code, message, path: nil, details: nil)
32
+ add(:error, code, message, path: path, details: details)
33
+ end
34
+
35
+ def concat(other)
36
+ @items.concat(other.to_a)
37
+ self
38
+ end
39
+
40
+ def errors
41
+ @items.select { |item| item.severity == :error }
42
+ end
43
+
44
+ def any_errors?
45
+ errors.any?
46
+ end
47
+
48
+ def to_a
49
+ @items
50
+ end
51
+
52
+ def as_json
53
+ @items.map(&:to_h)
54
+ end
55
+
56
+ private
57
+
58
+ def add(severity, code, message, path:, details:)
59
+ @items << Diagnostic.new(severity: severity, code: code, message: message, path: path, details: details)
60
+ self
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "nokogiri"
5
+ rescue LoadError
6
+ nil
7
+ end
8
+
9
+ module MarkdownComposer
10
+ class DocumentIndex
11
+ class HtmlParser
12
+ attr_reader :source, :diagnostics
13
+
14
+ def initialize(source, diagnostics:)
15
+ @source = source
16
+ @diagnostics = diagnostics
17
+ end
18
+
19
+ def index
20
+ fragment = Nokogiri::HTML5.fragment(source.html.to_s)
21
+ markdown = html_to_markdown(fragment)
22
+ DocumentIndex.from_markdown(markdown, source_key: source.key, diagnostics: diagnostics)
23
+ end
24
+
25
+ private
26
+
27
+ def html_to_markdown(fragment)
28
+ fragment.children.map { |node| node_to_markdown(node) }.join.lines.map(&:lstrip).join
29
+ end
30
+
31
+ def node_to_markdown(node)
32
+ return node.text if node.text?
33
+
34
+ case node.name
35
+ when /^h([1-6])$/
36
+ "#{"#" * Regexp.last_match(1).to_i} #{node.text.strip}\n\n"
37
+ when "p"
38
+ "#{inline_html(node).strip}\n\n"
39
+ when "a"
40
+ "[#{node.text.strip}](#{node["href"]})"
41
+ when "img"
42
+ "![#{node["alt"]}](#{node["src"]})\n\n"
43
+ when "ul"
44
+ node.css("> li").map { |li| "- #{inline_html(li).strip}\n" }.join + "\n"
45
+ when "ol"
46
+ node.css("> li").each_with_index.map { |li, index| "#{index + 1}. #{inline_html(li).strip}\n" }.join + "\n"
47
+ when "blockquote"
48
+ node.text.lines.map { |line| "> #{line.strip}\n" }.join + "\n"
49
+ when "pre"
50
+ code = node.at_css("code")
51
+ language = code&.classes&.find { |klass| klass.start_with?("language-") }&.sub("language-", "")
52
+ "```#{language}\n#{code&.text || node.text}\n```\n\n"
53
+ when "table"
54
+ rows = node.css("tr").map { |tr| tr.css("th,td").map { |cell| cell.text.strip } }
55
+ return "" if rows.empty?
56
+
57
+ header = "| #{rows.first.join(" | ")} |\n"
58
+ separator = "| #{rows.first.map { "---" }.join(" | ")} |\n"
59
+ body = rows[1..].to_a.map { |row| "| #{row.join(" | ")} |\n" }.join
60
+ "#{header}#{separator}#{body}\n"
61
+ else
62
+ children = node.children.map { |child| node_to_markdown(child) }.join
63
+ children.empty? ? "#{node.to_html}\n" : children
64
+ end
65
+ end
66
+
67
+ def inline_html(node)
68
+ node.children.map do |child|
69
+ if child.text?
70
+ child.text
71
+ elsif child.name == "a"
72
+ "[#{child.text.strip}](#{child["href"]})"
73
+ elsif child.name == "img"
74
+ "![#{child["alt"]}](#{child["src"]})"
75
+ elsif child.name == "code"
76
+ "`#{child.text}`"
77
+ else
78
+ inline_html(child)
79
+ end
80
+ end.join
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,338 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarkdownComposer
4
+ class DocumentIndex
5
+ class MarkdownParser
6
+ attr_reader :lines, :source_key, :diagnostics
7
+
8
+ def initialize(markdown, source_key:, diagnostics:)
9
+ @lines = markdown.lines
10
+ @source_key = source_key
11
+ @diagnostics = diagnostics
12
+ @nodes = []
13
+ @position = 0
14
+ end
15
+
16
+ def index
17
+ scan_nodes
18
+ build_sections
19
+ end
20
+
21
+ private
22
+
23
+ def scan_nodes
24
+ index = 0
25
+ if lines.first&.match?(/\A---\s*$/)
26
+ closing = lines[1..]&.find_index { |line| line.match?(/\A---\s*$/) }
27
+ if closing
28
+ end_index = closing + 1
29
+ add_node("data_block", lines[0..end_index], 0, end_index, text: lines[1...end_index].join.strip, attributes: { "format" => "yaml", "location" => "frontmatter" })
30
+ index = end_index + 1
31
+ end
32
+ end
33
+
34
+ while index < lines.length
35
+ line = lines[index]
36
+ if line.strip.empty?
37
+ index += 1
38
+ next
39
+ end
40
+
41
+ case line
42
+ when /\A\$\$\s*$/
43
+ start = index
44
+ index += 1
45
+ index += 1 while index < lines.length && !lines[index].match?(/\A\$\$\s*$/)
46
+ index += 1 if index < lines.length
47
+ add_node("math_block", lines[start...index], start, index - 1)
48
+ when /\A(```|~~~)\s*([A-Za-z0-9_-]+)?/
49
+ fence = Regexp.last_match(1)
50
+ language = Regexp.last_match(2).to_s
51
+ start = index
52
+ index += 1
53
+ index += 1 while index < lines.length && !lines[index].start_with?(fence)
54
+ closing_index = index
55
+ closed = index < lines.length
56
+ index += 1 if closed
57
+ body_lines = lines[(start + 1)...(closed ? closing_index : index)].to_a
58
+ language_token = language.downcase
59
+ type = language_token == "mermaid" ? "mermaid" : "code_block"
60
+ type = "data_block" if %w[yaml yml json].include?(language_token)
61
+ format = language_token == "yml" ? "yaml" : language_token
62
+ attributes = { "language" => language_token, "format" => format }
63
+ attributes["location"] = "body" if type == "data_block"
64
+ attributes["diagram_type"] = mermaid_diagram_type(body_lines.join) if type == "mermaid"
65
+ add_node(type, lines[start...index], start, index - 1, text: body_lines.join.strip, attributes: attributes)
66
+ when /\A\s{0,3}(\#{1,6})\s+(.+?)\s*#*\s*$/
67
+ level = Regexp.last_match(1).length
68
+ text = clean_text(Regexp.last_match(2))
69
+ add_node("heading_#{level}", [ line ], index, index, level: level, text: text)
70
+ index += 1
71
+ when /\A\|.*\|\s*$/
72
+ start = index
73
+ index += 1 while index < lines.length && lines[index].match?(/\A\|.*\|\s*$/)
74
+ table_lines = lines[start...index]
75
+ add_node("table", table_lines, start, index - 1, attributes: table_attributes(table_lines))
76
+ add_table_part_nodes(table_lines, start)
77
+ when /\A\s*([-*+])\s+/
78
+ start = index
79
+ index += 1 while index < lines.length && lines[index].match?(/\A\s*([-*+])\s+/)
80
+ item_total = index - start
81
+ add_node("unordered_list", lines[start...index], start, index - 1, attributes: { "ordered" => false, "item_count" => item_total })
82
+ lines[start...index].each_with_index do |item_line, offset|
83
+ add_node("list_item", [ item_line ], start + offset, start + offset, text: clean_text(item_line.sub(/\A\s*[-*+]\s+/, "")), attributes: { "ordered" => false, "item_index" => offset + 1, "derived" => true })
84
+ end
85
+ when /\A\s*\d+\.\s+/
86
+ start = index
87
+ index += 1 while index < lines.length && lines[index].match?(/\A\s*\d+\.\s+/)
88
+ item_total = index - start
89
+ add_node("ordered_list", lines[start...index], start, index - 1, attributes: { "ordered" => true, "item_count" => item_total })
90
+ lines[start...index].each_with_index do |item_line, offset|
91
+ add_node("list_item", [ item_line ], start + offset, start + offset, text: clean_text(item_line.sub(/\A\s*\d+\.\s+/, "")), attributes: { "ordered" => true, "item_index" => offset + 1, "derived" => true })
92
+ end
93
+ when /\A>\s?/
94
+ start = index
95
+ index += 1 while index < lines.length && lines[index].match?(/\A>\s?/)
96
+ add_node("blockquote", lines[start...index], start, index - 1)
97
+ when /\A<!--.*-->\s*$/
98
+ add_node("comment", [ line ], index, index, text: line.gsub(/\A<!--|-->\s*\z/, "").strip)
99
+ index += 1
100
+ when /\A<[^>]+>/
101
+ start = index
102
+ index += 1
103
+ add_node("raw_html", lines[start...index], start, index - 1)
104
+ else
105
+ start = index
106
+ index += 1
107
+ while index < lines.length && paragraph_continues?(lines[index])
108
+ index += 1
109
+ end
110
+ add_node("paragraph", lines[start...index], start, index - 1)
111
+ end
112
+ end
113
+ end
114
+
115
+ def paragraph_continues?(line)
116
+ !line.strip.empty? &&
117
+ !line.match?(/\A(```|~~~)|\A\$\$\s*$|\A\s{0,3}\#{1,6}\s+|\A\|.*\|\s*$|\A\s*([-*+]|\d+\.)\s+|\A>\s?|\A<[^>]+>/)
118
+ end
119
+
120
+ def add_node(type, raw_lines, start_index, end_index, level: nil, text: nil, attributes: {})
121
+ raw = raw_lines.join
122
+ node = ComposerNode.new(
123
+ id: "#{source_key}:n#{@position + 1}",
124
+ source_key: source_key,
125
+ type: type,
126
+ source_position: @position + 1,
127
+ level: level,
128
+ text: text || clean_text(raw),
129
+ attributes: attributes,
130
+ children: inline_nodes(raw, start_index),
131
+ raw: raw.end_with?("\n") ? raw : "#{raw}\n",
132
+ start_line: start_index + 1,
133
+ end_line: end_index + 1
134
+ )
135
+ @nodes << node
136
+ @position += 1
137
+ node
138
+ end
139
+
140
+ def add_table_part_nodes(table_lines, start_index)
141
+ return if table_lines.empty?
142
+
143
+ header_line = table_lines.first
144
+ separator_index = table_lines.find_index { |line| line.match?(/\A\|?\s*:?-{3,}:?\s*(\|\s*:?-{3,}:?\s*)+\|?\s*$/) }
145
+ body_start = separator_index ? separator_index + 1 : 1
146
+ column_count = table_columns(header_line).length
147
+ add_node("table_head", [ header_line ], start_index, start_index, attributes: { "section" => "table_head", "row_count" => 1, "column_count" => column_count, "derived" => true })
148
+ visible_row_index = 0
149
+ table_lines.each_with_index do |row_line, offset|
150
+ next if offset == separator_index
151
+
152
+ visible_row_index += 1
153
+ row_type = offset.zero? ? "table_head" : "table_body"
154
+ cells = table_columns(row_line)
155
+ add_node("table_row", [ row_line ], start_index + offset, start_index + offset, attributes: { "section" => row_type, "row_index" => visible_row_index, "column_count" => cells.length, "derived" => true })
156
+ cells.each_with_index do |cell, column_index|
157
+ add_node(offset < body_start ? "table_header" : "table_cell", [ "#{cell}\n" ], start_index + offset, start_index + offset, text: cell, attributes: { "section" => row_type, "row_index" => visible_row_index, "column_index" => column_index + 1, "derived" => true })
158
+ end
159
+ end
160
+ body_lines = table_lines[body_start..].to_a
161
+ if body_lines.any?
162
+ body_row_count = body_lines.count { |line| !line.match?(/\A\|?\s*:?-{3,}:?\s*(\|\s*:?-{3,}:?\s*)+\|?\s*$/) }
163
+ add_node("table_body", body_lines, start_index + body_start, start_index + table_lines.length - 1, attributes: { "section" => "table_body", "row_count" => body_row_count, "column_count" => column_count, "derived" => true })
164
+ end
165
+ end
166
+
167
+ def table_attributes(table_lines)
168
+ visible_rows = table_lines.reject { |line| line.match?(/\A\|?\s*:?-{3,}:?\s*(\|\s*:?-{3,}:?\s*)+\|?\s*$/) }
169
+ { "row_count" => visible_rows.length, "column_count" => table_columns(table_lines.first).length }
170
+ end
171
+
172
+ def table_columns(row_line)
173
+ row_line.to_s.strip.sub(/\A\|/, "").sub(/\|\z/, "").split("|").map(&:strip)
174
+ end
175
+
176
+ def inline_nodes(raw, start_index)
177
+ children = []
178
+ raw.scan(/!\[([^\]]*)\]\(([^)\s]+)(?:\s+"([^"]+)")?\)/).each_with_index do |(alt, src, title), offset|
179
+ children << inline_node("image", alt, start_index, offset, { "alt" => alt, "src" => src, "title" => title }.compact)
180
+ end
181
+ raw.scan(/(?<!!)\[([^\]]+)\]\(([^)\s]+)(?:\s+"([^"]+)")?\)/).each_with_index do |(text, href, title), offset|
182
+ children << inline_node("link", text, start_index, offset, { "href" => href, "title" => title }.compact)
183
+ end
184
+ raw.scan(/`([^`\n]+)`/).each_with_index do |(code), offset|
185
+ children << inline_node("inline_code", code, start_index, offset, {})
186
+ end
187
+ raw.scan(/\$([^$\n]+)\$/).each_with_index do |(math), offset|
188
+ children << inline_node("inline_math", math, start_index, offset, {})
189
+ end
190
+ children
191
+ end
192
+
193
+ def inline_node(type, text, start_index, offset, attributes)
194
+ ComposerNode.new(
195
+ id: "#{source_key}:i#{@position + 1}:#{type}:#{offset}",
196
+ source_key: source_key,
197
+ type: type,
198
+ source_position: @position + 1,
199
+ level: nil,
200
+ text: text,
201
+ attributes: attributes,
202
+ children: [],
203
+ raw: inline_raw(type, text, attributes),
204
+ start_line: start_index + 1,
205
+ end_line: start_index + 1
206
+ )
207
+ end
208
+
209
+ def inline_raw(type, text, attributes)
210
+ case type
211
+ when "link" then "[#{text}](#{attributes["href"]})"
212
+ when "image" then "![#{text}](#{attributes["src"]})"
213
+ when "inline_code" then "`#{text}`"
214
+ when "inline_math" then "$#{text}$"
215
+ else text.to_s
216
+ end
217
+ end
218
+
219
+ def build_sections
220
+ root_body = []
221
+ root_children = []
222
+ sections = []
223
+ stack = []
224
+
225
+ @nodes.each do |node|
226
+ if node.heading?
227
+ stack.pop while stack.any? && stack.last.level >= node.level
228
+ parent = stack.last
229
+ section = MutableSection.new(
230
+ id: "#{source_key}:s#{sections.length + 1}",
231
+ source_key: source_key,
232
+ heading_node: node,
233
+ level: node.level,
234
+ title_text: node.text,
235
+ body_nodes: [],
236
+ child_sections: [],
237
+ source_position: node.source_position,
238
+ parent_section_id: parent&.id,
239
+ start_line: node.start_line
240
+ )
241
+ parent ? parent.child_sections << section : root_children << section
242
+ sections << section
243
+ stack << section
244
+ elsif stack.any?
245
+ stack.last.body_nodes << node
246
+ else
247
+ root_body << node
248
+ end
249
+ end
250
+
251
+ immutable_sections = sections.map { |section| section.to_immutable(lines.length) }
252
+ by_id = immutable_sections.to_h { |section| [ section.id, section ] }
253
+ resolved_sections = {}
254
+ immutable_sections = immutable_sections.map { |section| resolve_section_children(section, by_id, resolved_sections) }
255
+ by_id = immutable_sections.to_h { |section| [ section.id, section ] }
256
+ root = ComposerSection.new(
257
+ id: "#{source_key}:root",
258
+ source_key: source_key,
259
+ heading_node: nil,
260
+ level: 0,
261
+ title_text: nil,
262
+ body_nodes: root_body,
263
+ child_sections: root_children.map { |child| by_id.fetch(child.id) },
264
+ all_nodes: (@nodes - []),
265
+ source_position: 0,
266
+ parent_section_id: nil,
267
+ start_line: 1,
268
+ end_line: lines.length
269
+ )
270
+
271
+ DocumentIndex.new(source_key: source_key, root: root, nodes: @nodes, sections: immutable_sections, diagnostics: diagnostics, lines: lines)
272
+ end
273
+
274
+ def resolve_section_children(section, by_id, resolved_sections)
275
+ return resolved_sections[section.id] if resolved_sections.key?(section.id)
276
+
277
+ resolved = section.with(
278
+ child_sections: section.child_sections.map do |child|
279
+ resolve_section_children(by_id.fetch(child.id), by_id, resolved_sections)
280
+ end
281
+ )
282
+ resolved_sections[section.id] = resolved
283
+ end
284
+
285
+ def clean_text(raw)
286
+ raw.to_s
287
+ .gsub(/```.*?```/m, "")
288
+ .gsub(/!\[([^\]]*)\]\([^)]+\)/, "\\1")
289
+ .gsub(/\[([^\]]+)\]\([^)]+\)/, "\\1")
290
+ .gsub(/[`*_>#|-]/, " ")
291
+ .gsub(/\s+/, " ")
292
+ .strip
293
+ end
294
+
295
+ def mermaid_diagram_type(source)
296
+ first = source.to_s.lines.map(&:strip).reject { |line| line.empty? || line.start_with?("%%") }.first.to_s
297
+ case first
298
+ when /\A(?:flowchart|graph)\b/i then "flowchart"
299
+ when /\AsequenceDiagram\b/i then "sequence"
300
+ when /\AclassDiagram\b/i then "class"
301
+ when /\AstateDiagram(?:-v2)?\b/i then "state"
302
+ when /\AerDiagram\b/i then "er"
303
+ when /\Agantt\b/i then "gantt"
304
+ when /\Apie\b/i then "pie"
305
+ when /\Ajourney\b/i then "journey"
306
+ when /\AgitGraph\b/i then "gitgraph"
307
+ when /\Amindmap\b/i then "mindmap"
308
+ when /\Atimeline\b/i then "timeline"
309
+ when /\AquadrantChart\b/i then "quadrant"
310
+ when /\Axychart-beta\b/i then "xy"
311
+ when /\Asankey-beta\b/i then "sankey"
312
+ else "unknown"
313
+ end
314
+ end
315
+
316
+ MutableSection = Struct.new(:id, :source_key, :heading_node, :level, :title_text, :body_nodes, :child_sections, :source_position, :parent_section_id, :start_line, keyword_init: true) do
317
+ def to_immutable(document_end)
318
+ all = body_nodes + child_sections.flat_map { |child| [ child.heading_node, *child.to_immutable(document_end).all_nodes ] }
319
+ end_line = ([ heading_node&.end_line, *all.map(&:end_line) ].compact.max || start_line)
320
+ ComposerSection.new(
321
+ id: id,
322
+ source_key: source_key,
323
+ heading_node: heading_node,
324
+ level: level,
325
+ title_text: title_text,
326
+ body_nodes: body_nodes,
327
+ child_sections: child_sections,
328
+ all_nodes: all,
329
+ source_position: source_position,
330
+ parent_section_id: parent_section_id,
331
+ start_line: start_line,
332
+ end_line: [ end_line, document_end ].min
333
+ )
334
+ end
335
+ end
336
+ end
337
+ end
338
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarkdownComposer
4
+ ComposerNode = MarkdownComposer.value_object(:id, :source_key, :type, :source_position, :level, :text, :attributes, :children, :raw, :start_line, :end_line) do
5
+ def heading?
6
+ type.start_with?("heading_")
7
+ end
8
+
9
+ def to_h
10
+ {
11
+ id: id,
12
+ source_key: source_key,
13
+ type: type,
14
+ source_position: source_position,
15
+ level: level,
16
+ text: text,
17
+ attributes: attributes,
18
+ children: children.map(&:to_h),
19
+ raw: raw,
20
+ start_line: start_line,
21
+ end_line: end_line
22
+ }
23
+ end
24
+ end
25
+
26
+ ComposerSection = MarkdownComposer.value_object(:id, :source_key, :heading_node, :level, :title_text, :body_nodes, :child_sections, :all_nodes, :source_position, :parent_section_id, :start_line, :end_line) do
27
+ def text
28
+ [ title_text, *all_nodes.map(&:text) ].compact.join("\n")
29
+ end
30
+
31
+ def raw
32
+ [ heading_node, *all_nodes.reject { |node| node.attributes["derived"] } ].compact.uniq(&:id).sort_by(&:source_position).map(&:raw).join
33
+ end
34
+
35
+ def to_h
36
+ {
37
+ id: id,
38
+ source_key: source_key,
39
+ heading_node: heading_node&.to_h,
40
+ level: level,
41
+ title_text: title_text,
42
+ body_nodes: body_nodes.map(&:to_h),
43
+ child_sections: child_sections.map(&:id),
44
+ all_nodes: all_nodes.map(&:id),
45
+ source_position: source_position,
46
+ parent_section_id: parent_section_id,
47
+ start_line: start_line,
48
+ end_line: end_line
49
+ }
50
+ end
51
+ end
52
+
53
+ class DocumentIndex
54
+ attr_reader :source_key, :root, :nodes, :sections, :diagnostics, :lines
55
+
56
+ def self.build(source, diagnostics: Diagnostics.new)
57
+ source = Source.build(source)
58
+ if source.format == :html
59
+ from_html(source, diagnostics: diagnostics)
60
+ else
61
+ from_markdown(source.markdown.to_s, source_key: source.key, diagnostics: diagnostics)
62
+ end
63
+ end
64
+
65
+ def self.from_markdown(markdown, source_key: "buffer", diagnostics: Diagnostics.new)
66
+ MarkdownParser.new(markdown.to_s, source_key: source_key, diagnostics: diagnostics).index
67
+ end
68
+
69
+ def self.from_html(source, diagnostics:)
70
+ if defined?(Nokogiri)
71
+ HtmlParser.new(source, diagnostics: diagnostics).index
72
+ else
73
+ diagnostics.error("source.html_parser_missing", "HTML input requires nokogiri", path: "sources.#{source.key}")
74
+ from_markdown(source.html.to_s, source_key: source.key, diagnostics: diagnostics)
75
+ end
76
+ end
77
+
78
+ def initialize(source_key:, root:, nodes:, sections:, diagnostics:, lines:)
79
+ @source_key = source_key
80
+ @root = root
81
+ @nodes = nodes
82
+ @sections = sections
83
+ @diagnostics = diagnostics
84
+ @lines = lines
85
+ end
86
+
87
+ def markdown_for_range(start_line, end_line)
88
+ lines[(start_line - 1)..(end_line - 1)].join
89
+ end
90
+ end
91
+ end
92
+
93
+ require_relative "document_index/markdown_parser"
94
+ require_relative "document_index/html_parser"