markdown_composer 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +23 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +278 -0
  5. data/ROADMAP.md +80 -0
  6. data/docs/_md_composer_architecture.md +50 -0
  7. data/docs/_md_composer_cheatsheet.md +72 -0
  8. data/docs/_md_composer_concepts.md +64 -0
  9. data/docs/_md_composer_dev_guide.md +55 -0
  10. data/docs/_md_composer_getting_started.md +114 -0
  11. data/docs/_md_composer_readme.md +93 -0
  12. data/docs/_md_composer_user_guide.md +65 -0
  13. data/docs/ai/md_composer_ai_audit.md +35 -0
  14. data/docs/ai/md_composer_ai_canonical_docs.md +44 -0
  15. data/docs/ai/md_composer_ai_source_map.md +39 -0
  16. data/docs/compose/md_composer_compose_actions.md +338 -0
  17. data/docs/compose/md_composer_compose_anatomy.md +156 -0
  18. data/docs/compose/md_composer_compose_buffer.md +81 -0
  19. data/docs/compose/md_composer_compose_examples.md +31 -0
  20. data/docs/compose/md_composer_compose_include.md +136 -0
  21. data/docs/compose/md_composer_compose_select.md +198 -0
  22. data/docs/compose/md_composer_compose_sources.md +161 -0
  23. data/docs/compose/md_composer_compose_targets.md +194 -0
  24. data/docs/examples/md_composer_example_basic_compose.md +57 -0
  25. data/docs/examples/md_composer_example_buffer_target_actions.md +83 -0
  26. data/docs/examples/md_composer_example_fixtures.md +62 -0
  27. data/docs/examples/md_composer_example_html_output.md +50 -0
  28. data/docs/examples/md_composer_example_modify.md +77 -0
  29. data/docs/examples/md_composer_example_multi_row_compose.md +67 -0
  30. data/docs/examples/md_composer_example_ruby_plans.md +62 -0
  31. data/docs/examples/md_composer_example_structured_data.md +68 -0
  32. data/docs/examples/md_composer_example_transforms.md +68 -0
  33. data/docs/examples/md_composer_example_yaml_json_rows.md +56 -0
  34. data/docs/examples/md_composer_examples_readme.md +45 -0
  35. data/docs/examples/md_composer_runnable_examples.md +374 -0
  36. data/docs/examples/md_composer_source_ruby_dsl.md +88 -0
  37. data/docs/reference/md_composer_nested.md +170 -0
  38. data/docs/reference/md_composer_reference_api.md +71 -0
  39. data/docs/reference/md_composer_reference_capabilities.md +63 -0
  40. data/docs/reference/md_composer_reference_diagnostics.md +54 -0
  41. data/docs/reference/md_composer_reference_plan_schema.md +75 -0
  42. data/docs/reference/md_composer_reference_registries.md +63 -0
  43. data/docs/reference/md_composer_take.md +221 -0
  44. data/docs/reference/md_composer_unit_tokens.md +228 -0
  45. data/docs/reference/md_composer_where.md +227 -0
  46. data/docs/transform/md_composer_transform_anatomy.md +112 -0
  47. data/docs/transform/md_composer_transform_examples.md +30 -0
  48. data/docs/transform/md_composer_transform_modes.md +83 -0
  49. data/docs/transform/md_composer_transform_options.md +142 -0
  50. data/docs/transform/md_composer_transform_scope.md +97 -0
  51. data/docs/transform/md_composer_transform_transforms.md +99 -0
  52. data/examples/README.md +20 -0
  53. data/examples/advanced_composer.rb +207 -0
  54. data/examples/basic_compose.rb +24 -0
  55. data/examples/complex_composer.rb +235 -0
  56. data/examples/example_support.rb +18 -0
  57. data/examples/fixtures/current.md +179 -0
  58. data/examples/fixtures/faq.md +58 -0
  59. data/examples/fixtures/guide.md +62 -0
  60. data/examples/fixtures/site_intro.md +29 -0
  61. data/examples/fixtures/source.html +22 -0
  62. data/examples/html_input.rb +26 -0
  63. data/examples/output/advanced_composer.md +76 -0
  64. data/examples/output/basic_compose.md +25 -0
  65. data/examples/output/complex_composer.md +85 -0
  66. data/examples/output/html_input.md +4 -0
  67. data/examples/output/source_list_dsl.md +126 -0
  68. data/examples/output/standard_composer.md +46 -0
  69. data/examples/output/standard_sources_buffer.md +31 -0
  70. data/examples/output/yaml_plan.md +43 -0
  71. data/examples/plans/basic.yml +20 -0
  72. data/examples/source_list_dsl.rb +41 -0
  73. data/examples/standard_composer.rb +42 -0
  74. data/examples/standard_sources_buffer.rb +62 -0
  75. data/examples/yaml_plan.rb +17 -0
  76. data/lib/markdown_composer/capabilities.rb +223 -0
  77. data/lib/markdown_composer/composition_buffer.rb +378 -0
  78. data/lib/markdown_composer/data_path.rb +313 -0
  79. data/lib/markdown_composer/diagnostics.rb +63 -0
  80. data/lib/markdown_composer/document_index/html_parser.rb +84 -0
  81. data/lib/markdown_composer/document_index/markdown_parser.rb +338 -0
  82. data/lib/markdown_composer/document_index.rb +94 -0
  83. data/lib/markdown_composer/executor.rb +284 -0
  84. data/lib/markdown_composer/markdown_renderer.rb +105 -0
  85. data/lib/markdown_composer/plan.rb +436 -0
  86. data/lib/markdown_composer/plan_builder.rb +111 -0
  87. data/lib/markdown_composer/registries/action_entries.rb +26 -0
  88. data/lib/markdown_composer/registries/condition_entries.rb +58 -0
  89. data/lib/markdown_composer/registries/registry.rb +69 -0
  90. data/lib/markdown_composer/registries/source_entries.rb +18 -0
  91. data/lib/markdown_composer/registries/support_values.rb +23 -0
  92. data/lib/markdown_composer/registries/take_entries.rb +31 -0
  93. data/lib/markdown_composer/registries/take_registry.rb +18 -0
  94. data/lib/markdown_composer/registries/target_entries.rb +40 -0
  95. data/lib/markdown_composer/registries/unit_token_entries.rb +62 -0
  96. data/lib/markdown_composer/registries/where_registry.rb +84 -0
  97. data/lib/markdown_composer/registries.rb +46 -0
  98. data/lib/markdown_composer/result.rb +34 -0
  99. data/lib/markdown_composer/selection_resolver.rb +181 -0
  100. data/lib/markdown_composer/source.rb +57 -0
  101. data/lib/markdown_composer/source_list_builder.rb +47 -0
  102. data/lib/markdown_composer/take.rb +129 -0
  103. data/lib/markdown_composer/transform_options.rb +66 -0
  104. data/lib/markdown_composer/transform_runner/content_placement.rb +63 -0
  105. data/lib/markdown_composer/transform_runner/field_interpolator.rb +213 -0
  106. data/lib/markdown_composer/transform_runner/heading_numbering.rb +106 -0
  107. data/lib/markdown_composer/transform_runner/scope_resolver.rb +87 -0
  108. data/lib/markdown_composer/transform_runner.rb +264 -0
  109. data/lib/markdown_composer/transforms/default_entries.rb +31 -0
  110. data/lib/markdown_composer/transforms/registry.rb +11 -0
  111. data/lib/markdown_composer/validator.rb +378 -0
  112. data/lib/markdown_composer/value_object.rb +15 -0
  113. data/lib/markdown_composer/version.rb +5 -0
  114. data/lib/markdown_composer/where.rb +313 -0
  115. data/lib/markdown_composer.rb +114 -0
  116. metadata +260 -0
@@ -0,0 +1,378 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarkdownComposer
4
+ class CompositionBuffer
5
+ attr_reader :diagnostics, :origin_nodes
6
+
7
+ def initialize(markdown = +"", diagnostics: Diagnostics.new)
8
+ @markdown = markdown.dup
9
+ @diagnostics = diagnostics
10
+ @origin_nodes = []
11
+ end
12
+
13
+ def markdown
14
+ @markdown.dup
15
+ end
16
+
17
+ def empty?
18
+ @markdown.strip.empty?
19
+ end
20
+
21
+ def set(units)
22
+ @markdown = units_to_markdown(units)
23
+ @origin_nodes = origin_units(units)
24
+ self
25
+ end
26
+
27
+ def append(units)
28
+ @origin_nodes.concat(origin_units(units))
29
+ insert_at_end(units_to_markdown(units))
30
+ end
31
+
32
+ def prepend(units)
33
+ @origin_nodes = origin_units(units) + @origin_nodes
34
+ insert_at_start(units_to_markdown(units))
35
+ end
36
+
37
+ def insert_before(target, units, options: {})
38
+ insert_relative(target, units, :before, options: options)
39
+ end
40
+
41
+ def insert_after(target, units, options: {})
42
+ insert_relative(target, units, :after, options: options)
43
+ end
44
+
45
+ def replace(target, units, options: {})
46
+ if target && target["position"] == "output"
47
+ return set(units)
48
+ end
49
+
50
+ ranges = target_ranges(target, options: options)
51
+ return warn_empty_destructive_target("replace", path: "target") if ranges.empty?
52
+
53
+ @origin_nodes.concat(origin_units(units))
54
+ replace_ranges(ranges, units_to_markdown(units))
55
+ end
56
+
57
+ def remove(target, options: {})
58
+ ranges = target_ranges(target, options: options)
59
+ return warn_empty_destructive_target("remove_buffer_target", path: "target") if ranges.empty?
60
+
61
+ replace_ranges(ranges, "")
62
+ end
63
+
64
+ def copy(target, units, options: {})
65
+ placement = target&.fetch("placement", nil) || "after"
66
+ placement == "before" ? insert_before(target, units, options: options) : insert_after(target, units, options: options)
67
+ end
68
+
69
+ def place_markdown(target, markdown, origin_nodes: [], options: {})
70
+ target ||= { "position" => "end" }
71
+ content = normalize_markdown_fragment(markdown)
72
+ origins = Array(origin_nodes).compact
73
+
74
+ case target["position"]
75
+ when "output"
76
+ @origin_nodes = origins
77
+ @markdown = trim_outer_blank_lines(content)
78
+ @markdown = normalize_markdown_fragment(@markdown) unless @markdown.empty?
79
+ when "start"
80
+ @origin_nodes = origins + @origin_nodes
81
+ insert_at_start(content)
82
+ when "end"
83
+ @origin_nodes.concat(origins)
84
+ insert_at_end(content)
85
+ when nil
86
+ place_markdown_relative(target, content, origins, target["placement"] == "before" ? :before : :after, options: options)
87
+ end
88
+
89
+ self
90
+ end
91
+
92
+ def insert_between(start_target, end_target, units, options: {})
93
+ start_ranges = target_ranges(start_target, options: options)
94
+ end_ranges = target_ranges(end_target, options: options)
95
+ return diagnostics.warn("target.empty", "Between target matched no buffer content", path: "target") if start_ranges.empty? || end_ranges.empty?
96
+
97
+ start_range = start_ranges.first
98
+ end_range = end_ranges.first
99
+ if start_range.end >= end_range.begin
100
+ diagnostics.error("target.order_invalid", "Between start anchor must resolve before end anchor", path: "target")
101
+ return self
102
+ end
103
+
104
+ @origin_nodes.concat(origin_units(units))
105
+ buffer_lines = lines
106
+ @markdown = join_markdown_fragments(
107
+ buffer_lines[0...start_range.end].to_a.join,
108
+ units_to_markdown(units),
109
+ buffer_lines[start_range.end..].to_a.join
110
+ )
111
+ self
112
+ end
113
+
114
+ def move(selector, target, units = nil, options: {})
115
+ ranges = units ? unit_ranges(units) : target_ranges(selector, options: options)
116
+ return warn_empty_destructive_target("move", path: "select") if ranges.empty?
117
+ return warn_empty_destructive_target("move", path: "target") if target_ranges(target, options: options).empty?
118
+
119
+ moved = units ? units_to_markdown(units) : ranges.map { |range| lines[(range.begin - 1)..(range.end - 1)].join }.join("\n")
120
+ replace_ranges(ranges, "")
121
+ unit = ComposerNode.new(id: "buffer:moved", source_key: "buffer", type: "paragraph", source_position: 0, level: nil, text: moved, attributes: {}, children: [], raw: moved, start_line: 1, end_line: moved.lines.length)
122
+ copy(target, [ unit ], options: options)
123
+ end
124
+
125
+ def index
126
+ DocumentIndex.from_markdown(@markdown, source_key: "buffer", diagnostics: diagnostics)
127
+ end
128
+
129
+ def replace_markdown(markdown)
130
+ @markdown = markdown.to_s
131
+ self
132
+ end
133
+
134
+ def replace_markdown_ranges(replacements)
135
+ buffer_lines = lines
136
+ Array(replacements).sort_by { |replacement| replacement.fetch(:range).begin }.reverse_each do |replacement|
137
+ range = replacement.fetch(:range)
138
+ content = replacement.fetch(:markdown).to_s
139
+ start = [ range.begin - 1, 0 ].max
140
+ finish = [ range.end - 1, buffer_lines.length - 1 ].min
141
+ replacement_lines = content.empty? ? [] : [ normalize_markdown_fragment(content) ]
142
+ buffer_lines[start..finish] = replacement_lines
143
+ end
144
+ @markdown = buffer_lines.join
145
+ sync_origins_to_current!
146
+ self
147
+ end
148
+
149
+ def dedupe_by_origin!
150
+ seen = {}
151
+ deduped = []
152
+ origin_nodes.each do |node|
153
+ key = node.attributes["origin_id"] || node.id
154
+ next if seen[key]
155
+
156
+ seen[key] = true
157
+ deduped << node
158
+ end
159
+ set(deduped)
160
+ end
161
+
162
+ def sync_origins_to_current!
163
+ @origin_nodes = index.nodes
164
+ self
165
+ end
166
+
167
+ def to_h
168
+ {
169
+ type: "composition_buffer",
170
+ markdown: markdown,
171
+ origins: origin_nodes.map { |node| { id: node.id, origin_id: node.attributes["origin_id"], source_key: node.source_key, source_position: node.source_position } },
172
+ nodes: index.nodes.map(&:to_h),
173
+ sections: index.sections.map(&:to_h)
174
+ }
175
+ end
176
+
177
+ private
178
+
179
+ def insert_relative(target, units, placement, options:)
180
+ ranges = target_ranges(target, options: options)
181
+ content = units_to_markdown(units)
182
+ return diagnostics.warn("target.empty", "Target matched no buffer content", path: "target") if ranges.empty?
183
+
184
+ range = ranges.first
185
+ buffer_lines = lines
186
+ insertion_index = placement == :before ? range.begin - 1 : range.end
187
+ @markdown = join_markdown_fragments(
188
+ buffer_lines[0...insertion_index].to_a.join,
189
+ content,
190
+ buffer_lines[insertion_index..].to_a.join
191
+ )
192
+ @origin_nodes.concat(origin_units(units))
193
+ self
194
+ end
195
+
196
+ def place_markdown_relative(target, content, origins, placement, options:)
197
+ if target["placement"] == "between"
198
+ return place_markdown_between(target["start"], target["end"], content, origins, options: options)
199
+ end
200
+
201
+ ranges = target_ranges(target, options: options)
202
+ return diagnostics.warn("target.empty", "Target matched no buffer content", path: "target") if ranges.empty?
203
+
204
+ range = ranges.first
205
+ buffer_lines = lines
206
+ insertion_index = placement == :before ? range.begin - 1 : range.end
207
+ @markdown = join_markdown_fragments(
208
+ buffer_lines[0...insertion_index].to_a.join,
209
+ content,
210
+ buffer_lines[insertion_index..].to_a.join
211
+ )
212
+ @origin_nodes.concat(origins)
213
+ self
214
+ end
215
+
216
+ def place_markdown_between(start_target, end_target, content, origins, options:)
217
+ start_ranges = target_ranges(start_target, options: options)
218
+ end_ranges = target_ranges(end_target, options: options)
219
+ return diagnostics.warn("target.empty", "Between target matched no buffer content", path: "target") if start_ranges.empty? || end_ranges.empty?
220
+
221
+ start_range = start_ranges.first
222
+ end_range = end_ranges.first
223
+ if start_range.end >= end_range.begin
224
+ diagnostics.error("target.order_invalid", "Between start anchor must resolve before end anchor", path: "target")
225
+ return self
226
+ end
227
+
228
+ buffer_lines = lines
229
+ @markdown = join_markdown_fragments(
230
+ buffer_lines[0...start_range.end].to_a.join,
231
+ content,
232
+ buffer_lines[start_range.end..].to_a.join
233
+ )
234
+ @origin_nodes.concat(origins)
235
+ self
236
+ end
237
+
238
+ def insert_at_end(content)
239
+ @markdown = join_markdown_fragments(@markdown, content)
240
+ self
241
+ end
242
+
243
+ def insert_at_start(content)
244
+ @markdown = join_markdown_fragments(content, @markdown)
245
+ self
246
+ end
247
+
248
+ def target_ranges(target, options:)
249
+ target ||= { "position" => "output" }
250
+ return [ 1..[ lines.length, 1 ].max ] if target["position"] == "output"
251
+ return [ 1..0 ] if target["position"] == "start"
252
+ return [ (lines.length + 1)..lines.length ] if target["position"] == "end"
253
+
254
+ idx = index
255
+ resolver = SelectionResolver.new(index: idx, options: options, diagnostics: diagnostics, path: "target")
256
+ selector = target.reject { |key, _| %w[placement start end position].include?(key) }
257
+ resolver.resolve(selector).map { |unit| unit.start_line..unit.end_line }
258
+ end
259
+
260
+ def unit_ranges(units)
261
+ Array(units).compact
262
+ .reject { |unit| unit.respond_to?(:attributes) && unit.attributes["derived"] }
263
+ .map { |unit| unit.start_line..unit.end_line }
264
+ .sort_by(&:begin)
265
+ .each_with_object([]) do |range, merged|
266
+ if merged.any? && range.begin <= merged.last.end + 1
267
+ merged[-1] = merged.last.begin..[ merged.last.end, range.end ].max
268
+ else
269
+ merged << range
270
+ end
271
+ end
272
+ end
273
+
274
+ def warn_empty_destructive_target(action, path:)
275
+ diagnostics.warn(
276
+ "target.empty_destructive",
277
+ "#{action} matched no buffer content and made no destructive change",
278
+ path: path,
279
+ details: { action: action }
280
+ )
281
+ self
282
+ end
283
+
284
+ def replace_ranges(ranges, content)
285
+ buffer_lines = lines
286
+ ranges.sort_by(&:begin).reverse_each do |range|
287
+ start = [ range.begin - 1, 0 ].max
288
+ finish = [ range.end - 1, buffer_lines.length - 1 ].min
289
+ replacement = content.empty? ? [] : [ normalize_markdown_fragment(content) ]
290
+ buffer_lines[start..finish] = replacement
291
+ end
292
+ @markdown = buffer_lines.join
293
+ self
294
+ end
295
+
296
+ def units_to_markdown(units)
297
+ serialize_units(origin_units(units))
298
+ end
299
+
300
+ def origin_units(units)
301
+ Array(units).compact.uniq(&:id).sort_by(&:source_position).map { |unit| clone_origin(unit) }
302
+ end
303
+
304
+ def clone_origin(unit)
305
+ attrs = unit.attributes.merge("origin_id" => unit.attributes["origin_id"] || unit.id, "origin_source_key" => unit.attributes["origin_source_key"] || unit.source_key)
306
+ unit.with(id: "buffer:#{unit.id}:#{object_id}:#{unit.source_position}", source_key: "buffer", attributes: attrs)
307
+ end
308
+
309
+ def serialize_units(units)
310
+ result = +""
311
+ previous = nil
312
+ Array(units).each do |unit|
313
+ text = normalize_markdown_fragment(unit.raw)
314
+ next if text.strip.empty?
315
+
316
+ if result.empty?
317
+ result << text
318
+ elsif tight_continuation?(previous, unit)
319
+ result = result.sub(/\n+\z/, "\n")
320
+ result << text.sub(/\A\n+/, "")
321
+ else
322
+ result = result.sub(/\n+\z/, "\n\n")
323
+ result << text.sub(/\A\n+/, "")
324
+ end
325
+ previous = unit
326
+ end
327
+ result
328
+ end
329
+
330
+ def join_markdown_fragments(*fragments)
331
+ text = fragments.map { |fragment| trim_outer_blank_lines(fragment) }
332
+ .reject(&:empty?)
333
+ .join("\n\n")
334
+ text.empty? ? +"" : "#{text}\n"
335
+ end
336
+
337
+ def normalize_markdown_fragment(content)
338
+ text = content.to_s
339
+ text = "#{text}\n" unless text.end_with?("\n")
340
+ text
341
+ end
342
+
343
+ def trim_outer_blank_lines(content)
344
+ fragment_lines = normalize_markdown_fragment(content).lines
345
+ fragment_lines.shift while fragment_lines.first&.strip&.empty?
346
+ fragment_lines.pop while fragment_lines.last&.strip&.empty?
347
+ fragment_lines.join.sub(/\n\z/, "")
348
+ end
349
+
350
+ def tight_continuation?(previous, current)
351
+ return false unless previous && current
352
+ return false unless previous.source_key == current.source_key
353
+
354
+ contiguous_list_items?(previous, current) || contiguous_table_fragments?(previous, current)
355
+ end
356
+
357
+ def contiguous_list_items?(previous, current)
358
+ return false unless previous.type == "list_item" && current.type == "list_item"
359
+ return false unless previous.attributes["ordered"] == current.attributes["ordered"]
360
+
361
+ current.start_line == previous.end_line + 1
362
+ end
363
+
364
+ def contiguous_table_fragments?(previous, current)
365
+ return false unless table_fragment?(previous) && table_fragment?(current)
366
+
367
+ current.start_line <= previous.end_line + 1
368
+ end
369
+
370
+ def table_fragment?(unit)
371
+ %w[table_head table_body table_row table_header table_cell].include?(unit.type)
372
+ end
373
+
374
+ def lines
375
+ @markdown.lines
376
+ end
377
+ end
378
+ end
@@ -0,0 +1,313 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarkdownComposer
4
+ module DataPath
5
+ module_function
6
+
7
+ def resolve(data_block, path, diagnostics:, diagnostic_path:)
8
+ unless data_block.type == "data_block"
9
+ diagnostics.error("data_path.scope_invalid", "data_path is only valid inside data_block include scope", path: diagnostic_path)
10
+ return []
11
+ end
12
+
13
+ data = parse_data(data_block, diagnostics: diagnostics, path: diagnostic_path)
14
+ return [] if data.nil?
15
+
16
+ evaluated = evaluate(data, path.to_s, diagnostics: diagnostics, diagnostic_path: diagnostic_path)
17
+ values = evaluated.is_a?(Array) ? evaluated : [ evaluated ].compact
18
+ diagnostics.warn("data_path.empty", "Data path matched no values", path: diagnostic_path) if values.empty?
19
+ values.each_with_index.map { |value, index| node_for(data_block, path, value, index) }
20
+ end
21
+
22
+ def parse_data(data_block, diagnostics:, path:)
23
+ format = data_block.attributes.fetch("format", "auto").to_s
24
+ source = data_block.text.to_s
25
+ case format
26
+ when "json"
27
+ JSON.parse(source)
28
+ when "yaml", "yml"
29
+ YAML.safe_load(source, permitted_classes: [ Symbol ], aliases: false)
30
+ else
31
+ parse_auto(source)
32
+ end
33
+ rescue JSON::ParserError, Psych::Exception => e
34
+ diagnostics.error("data_path.invalid_data", "Invalid #{format} data: #{e.message}", path: path)
35
+ nil
36
+ end
37
+
38
+ def parse_auto(source)
39
+ JSON.parse(source)
40
+ rescue JSON::ParserError
41
+ YAML.safe_load(source, permitted_classes: [ Symbol ], aliases: false)
42
+ end
43
+
44
+ def evaluate(data, expression, diagnostics:, diagnostic_path:)
45
+ current = data
46
+ segments = split_path(expression)
47
+ index = 0
48
+ while index < segments.length
49
+ segment = segments[index]
50
+ if segment == "**" && segments[index + 1]
51
+ current = recursive_project(current, segments[index + 1])
52
+ index += 2
53
+ next
54
+ end
55
+
56
+ current = apply_segment(current, segment, diagnostics: diagnostics, diagnostic_path: diagnostic_path)
57
+ index += 1
58
+ end
59
+ current
60
+ end
61
+
62
+ def split_path(expression)
63
+ split_top_level(expression.to_s, ".").map(&:strip).reject(&:empty?)
64
+ end
65
+
66
+ def apply_segment(current, segment, diagnostics:, diagnostic_path:)
67
+ key, filter = parse_segment(segment)
68
+ current = project(current, key, diagnostics: diagnostics, diagnostic_path: diagnostic_path) unless key.empty?
69
+ if filter
70
+ current = quoted?(filter) ? project(current, unquote(filter), diagnostics: diagnostics, diagnostic_path: diagnostic_path) : apply_filter(current, filter, diagnostics: diagnostics, diagnostic_path: diagnostic_path)
71
+ end
72
+ current
73
+ end
74
+
75
+ def parse_segment(segment)
76
+ if segment =~ /\A([^\[]*)\[(.+)\]\z/
77
+ [ normalize_key(Regexp.last_match(1).strip), Regexp.last_match(2).strip ]
78
+ else
79
+ [ normalize_key(segment.strip), nil ]
80
+ end
81
+ end
82
+
83
+ def project(current, key, diagnostics:, diagnostic_path:)
84
+ keys = split_key_list(key)
85
+ if keys.length > 1
86
+ return Array(current).map { |item| project_keys(item, keys, diagnostics: diagnostics, diagnostic_path: diagnostic_path) }.compact if current.is_a?(Array)
87
+ return project_keys(current, keys, diagnostics: diagnostics, diagnostic_path: diagnostic_path)
88
+ end
89
+
90
+ key = keys.first
91
+ case current
92
+ when Hash
93
+ fetch_key(current, key, diagnostics: diagnostics, diagnostic_path: diagnostic_path)
94
+ when Array
95
+ current.map { |item| project(item, key, diagnostics: diagnostics, diagnostic_path: diagnostic_path) }.flatten.compact
96
+ else
97
+ diagnostics.warn("data_path.type_mismatch", "Cannot read #{key.inspect} from #{current.class}", path: diagnostic_path)
98
+ nil
99
+ end
100
+ end
101
+
102
+ def project_keys(item, keys, diagnostics:, diagnostic_path:)
103
+ unless item.is_a?(Hash)
104
+ diagnostics.warn("data_path.type_mismatch", "Cannot project keys from #{item.class}", path: diagnostic_path)
105
+ return nil
106
+ end
107
+
108
+ keys.each_with_object({}) do |key, hash|
109
+ value = fetch_key(item, key, diagnostics: diagnostics, diagnostic_path: diagnostic_path)
110
+ hash[key] = value unless value.nil?
111
+ end
112
+ end
113
+
114
+ def fetch_key(hash, key, diagnostics:, diagnostic_path:)
115
+ return hash[key] if hash.key?(key)
116
+ return hash[key.to_sym] if hash.key?(key.to_sym)
117
+
118
+ diagnostics.warn("data_path.missing_key", "Missing data key #{key.inspect}", path: diagnostic_path)
119
+ nil
120
+ end
121
+
122
+ def apply_filter(current, filter, diagnostics:, diagnostic_path:)
123
+ if take_filter?(filter)
124
+ errors = Take.validate(Take.parse(filter))
125
+ errors.each { |message| diagnostics.error("data_path.take_invalid", message, path: diagnostic_path) }
126
+ return [] if errors.any?
127
+
128
+ return Take.apply(Array(current), Take.parse(filter), diagnostics: diagnostics, path: diagnostic_path)
129
+ end
130
+
131
+ Array(current).select { |item| filter_match?(item, filter) }
132
+ end
133
+
134
+ def take_filter?(filter)
135
+ filter.match?(/\A(?:all|odd|even)\z/) ||
136
+ filter.match?(/\A(?:first|last|position|range|ranges|skip|skip_last|every|except|top_percent|bottom_percent|middle|middle_percent|alternate|random):/)
137
+ end
138
+
139
+ def filter_match?(item, filter)
140
+ return false unless item.is_a?(Hash)
141
+
142
+ split_top_level(filter, "|").any? do |or_group|
143
+ split_top_level(or_group, ";").all? { |atom| filter_atom_match?(item, atom.strip) }
144
+ end
145
+ end
146
+
147
+ def filter_atom_match?(item, atom)
148
+ negated = atom.start_with?("!") && !atom.start_with?("!=")
149
+ atom = atom[1..-1].to_s.strip if negated
150
+
151
+ matched = if atom =~ /\A([^!<>=~]+)(!?=|>=|<=|>|<|~=)(.+)\z/
152
+ key = Regexp.last_match(1).strip
153
+ operator = Regexp.last_match(2)
154
+ expected = cast(unquote(Regexp.last_match(3).strip))
155
+ actual = cast(item[key] || item[key.to_sym])
156
+ compare(actual, operator, expected)
157
+ else
158
+ !!(item[atom] || item[atom.to_sym])
159
+ end
160
+ negated ? !matched : matched
161
+ end
162
+
163
+ def compare(actual, operator, expected)
164
+ case operator
165
+ when "=" then actual.to_s == expected.to_s
166
+ when "!=" then actual.to_s != expected.to_s
167
+ when ">" then numeric(actual) > numeric(expected)
168
+ when ">=" then numeric(actual) >= numeric(expected)
169
+ when "<" then numeric(actual) < numeric(expected)
170
+ when "<=" then numeric(actual) <= numeric(expected)
171
+ when "~=" then actual.to_s.include?(expected.to_s)
172
+ else false
173
+ end
174
+ end
175
+
176
+ def unquote(value)
177
+ value.sub(/\A["']/, "").sub(/["']\z/, "")
178
+ end
179
+
180
+ def quoted?(value)
181
+ value.to_s.match?(/\A(["']).*\1\z/)
182
+ end
183
+
184
+ def normalize_key(key)
185
+ unquote(key.to_s).gsub("\\.", ".").gsub("\\,", ",")
186
+ end
187
+
188
+ def split_key_list(key)
189
+ split_top_level(key.to_s, ",").map { |part| normalize_key(part.strip) }.reject(&:empty?)
190
+ end
191
+
192
+ def split_top_level(text, delimiter)
193
+ parts = []
194
+ current = +""
195
+ quote = nil
196
+ bracket_depth = 0
197
+ escaped = false
198
+ text.to_s.each_char do |char|
199
+ if escaped
200
+ current << char
201
+ escaped = false
202
+ next
203
+ end
204
+
205
+ if char == "\\"
206
+ escaped = true
207
+ next
208
+ end
209
+
210
+ if quote
211
+ quote = nil if char == quote
212
+ current << char
213
+ next
214
+ end
215
+
216
+ if char == '"' || char == "'"
217
+ quote = char
218
+ current << char
219
+ next
220
+ end
221
+
222
+ bracket_depth += 1 if char == "["
223
+ bracket_depth -= 1 if char == "]" && bracket_depth.positive?
224
+
225
+ if char == delimiter && bracket_depth.zero?
226
+ parts << current
227
+ current = +""
228
+ else
229
+ current << char
230
+ end
231
+ end
232
+ parts << current
233
+ parts
234
+ end
235
+
236
+ def recursive_project(current, key)
237
+ key = normalize_key(key)
238
+ values = []
239
+ collect_recursive_values(current, key, values)
240
+ values
241
+ end
242
+
243
+ def collect_recursive_values(current, key, values)
244
+ case current
245
+ when Hash
246
+ values << current[key] if current.key?(key)
247
+ values << current[key.to_sym] if current.key?(key.to_sym)
248
+ current.each_value { |value| collect_recursive_values(value, key, values) }
249
+ when Array
250
+ current.each { |value| collect_recursive_values(value, key, values) }
251
+ end
252
+ end
253
+
254
+ def cast(value)
255
+ return value if value.is_a?(Numeric) || value == true || value == false
256
+ return value.to_i if value.to_s.match?(/\A-?\d+\z/)
257
+ return value.to_f if value.to_s.match?(/\A-?\d+\.\d+\z/)
258
+
259
+ value.to_s
260
+ end
261
+
262
+ def numeric(value)
263
+ value.is_a?(Numeric) ? value : value.to_s.to_f
264
+ end
265
+
266
+ def node_for(data_block, path, value, index)
267
+ type = value.is_a?(Hash) ? "data_record" : "data_value"
268
+ value_text = render_value(value)
269
+ ComposerNode.new(
270
+ id: "#{data_block.id}:data:#{index + 1}",
271
+ source_key: data_block.source_key,
272
+ type: type,
273
+ source_position: data_block.source_position * 1000 + index + 1,
274
+ level: nil,
275
+ text: value_text.strip,
276
+ attributes: data_block.attributes.merge(
277
+ "path" => path.to_s,
278
+ "source_type" => "data_block",
279
+ "value" => value,
280
+ "value_type" => value_type(value)
281
+ ),
282
+ children: [],
283
+ raw: value_text.end_with?("\n") ? value_text : "#{value_text}\n",
284
+ start_line: data_block.start_line,
285
+ end_line: data_block.end_line
286
+ )
287
+ end
288
+
289
+ def render_value(value)
290
+ case value
291
+ when Hash
292
+ YAML.dump(value).sub(/\A---\s*\n/, "")
293
+ when Array
294
+ value.map { |item| render_value(item).strip }.join("\n")
295
+ else
296
+ value.to_s
297
+ end
298
+ end
299
+
300
+ def value_type(value)
301
+ case value
302
+ when Hash then "object"
303
+ when Array then "array"
304
+ when String then "string"
305
+ when Integer then "integer"
306
+ when Float then "float"
307
+ when TrueClass, FalseClass then "boolean"
308
+ when NilClass then "null"
309
+ else value.class.name
310
+ end
311
+ end
312
+ end
313
+ end