expressir 2.1.31 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/docs.yml +3 -2
  3. data/.github/workflows/release.yml +6 -0
  4. data/.rubocop_todo.yml +106 -92
  5. data/Gemfile +1 -1
  6. data/README.adoc +372 -1
  7. data/docs/_guides/formatter/formatter-architecture.adoc +401 -0
  8. data/docs/_guides/ruby-api/parsing-files.adoc +1 -1
  9. data/docs/_pages/parsers.adoc +31 -5
  10. data/docs/lychee.toml +3 -0
  11. data/expressir.gemspec +3 -2
  12. data/lib/expressir/benchmark.rb +6 -6
  13. data/lib/expressir/cli.rb +9 -0
  14. data/lib/expressir/commands/base.rb +2 -9
  15. data/lib/expressir/commands/format.rb +30 -0
  16. data/lib/expressir/commands/package.rb +92 -87
  17. data/lib/expressir/commands/validate_ascii.rb +2 -4
  18. data/lib/expressir/commands/validate_load.rb +8 -5
  19. data/lib/expressir/coverage.rb +15 -11
  20. data/lib/expressir/errors.rb +115 -0
  21. data/lib/expressir/express/builder.rb +350 -0
  22. data/lib/expressir/express/builders/attribute_decl_builder.rb +38 -0
  23. data/lib/expressir/express/builders/built_in_builder.rb +88 -0
  24. data/lib/expressir/express/builders/constant_builder.rb +115 -0
  25. data/lib/expressir/express/builders/declaration_builder.rb +24 -0
  26. data/lib/expressir/express/builders/derive_clause_builder.rb +16 -0
  27. data/lib/expressir/express/builders/derived_attr_builder.rb +28 -0
  28. data/lib/expressir/express/builders/domain_rule_builder.rb +21 -0
  29. data/lib/expressir/express/builders/entity_decl_builder.rb +108 -0
  30. data/lib/expressir/express/builders/explicit_attr_builder.rb +52 -0
  31. data/lib/expressir/express/builders/expression_builder.rb +453 -0
  32. data/lib/expressir/express/builders/function_decl_builder.rb +84 -0
  33. data/lib/expressir/express/builders/helpers.rb +148 -0
  34. data/lib/expressir/express/builders/interface_builder.rb +171 -0
  35. data/lib/expressir/express/builders/inverse_attr_builder.rb +45 -0
  36. data/lib/expressir/express/builders/inverse_attr_type_builder.rb +36 -0
  37. data/lib/expressir/express/builders/inverse_clause_builder.rb +16 -0
  38. data/lib/expressir/express/builders/literal_builder.rb +107 -0
  39. data/lib/expressir/express/builders/procedure_decl_builder.rb +80 -0
  40. data/lib/expressir/express/builders/qualifier_builder.rb +128 -0
  41. data/lib/expressir/express/builders/reference_builder.rb +27 -0
  42. data/lib/expressir/express/builders/rule_decl_builder.rb +95 -0
  43. data/lib/expressir/express/builders/schema_body_decl_builder.rb +22 -0
  44. data/lib/expressir/express/builders/schema_decl_builder.rb +62 -0
  45. data/lib/expressir/express/builders/schema_version_builder.rb +40 -0
  46. data/lib/expressir/express/builders/simple_id_builder.rb +26 -0
  47. data/lib/expressir/express/builders/statement_builder.rb +250 -0
  48. data/lib/expressir/express/builders/subtype_constraint_builder.rb +188 -0
  49. data/lib/expressir/express/builders/syntax_builder.rb +19 -0
  50. data/lib/expressir/express/builders/token_builder.rb +15 -0
  51. data/lib/expressir/express/builders/type_builder.rb +264 -0
  52. data/lib/expressir/express/builders/type_decl_builder.rb +32 -0
  53. data/lib/expressir/express/builders/unique_clause_builder.rb +22 -0
  54. data/lib/expressir/express/builders/unique_rule_builder.rb +36 -0
  55. data/lib/expressir/express/builders/where_clause_builder.rb +22 -0
  56. data/lib/expressir/express/builders.rb +43 -0
  57. data/lib/expressir/express/error.rb +18 -2
  58. data/lib/expressir/express/formatter.rb +18 -1508
  59. data/lib/expressir/express/formatters/data_types_formatter.rb +317 -0
  60. data/lib/expressir/express/formatters/declarations_formatter.rb +689 -0
  61. data/lib/expressir/express/formatters/expressions_formatter.rb +160 -0
  62. data/lib/expressir/express/formatters/literals_formatter.rb +46 -0
  63. data/lib/expressir/express/formatters/references_formatter.rb +42 -0
  64. data/lib/expressir/express/formatters/remark_formatter.rb +296 -0
  65. data/lib/expressir/express/formatters/statements_formatter.rb +224 -0
  66. data/lib/expressir/express/formatters/supertype_expressions_formatter.rb +48 -0
  67. data/lib/expressir/express/parser.rb +129 -14
  68. data/lib/expressir/express/pretty_formatter.rb +624 -0
  69. data/lib/expressir/express/remark_attacher.rb +1155 -0
  70. data/lib/expressir/express/resolve_references_model_visitor.rb +1 -0
  71. data/lib/expressir/express/streaming_builder.rb +467 -0
  72. data/lib/expressir/express/transformer/remark_handling.rb +196 -0
  73. data/lib/expressir/model/identifier.rb +1 -1
  74. data/lib/expressir/model/model_element.rb +30 -2
  75. data/lib/expressir/model/remark_info.rb +51 -0
  76. data/lib/expressir/model/search_engine.rb +58 -9
  77. data/lib/expressir/version.rb +1 -1
  78. data/lib/expressir.rb +6 -4
  79. metadata +71 -7
  80. data/lib/expressir/express/visitor.rb +0 -2815
@@ -0,0 +1,1155 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Expressir
6
+ module Express
7
+ # Handles attaching remarks (comments) to model elements after parsing.
8
+ #
9
+ # NOTE: Post-processing remark attachment has inherent limitations for scope-based
10
+ # matching. Remarks with simple tags (like "WR1") inside scopes (TYPE, ENTITY, etc.)
11
+ # cannot be perfectly matched without parsing context. This implementation prioritizes:
12
+ # 1. Exact path matches (e.g., "schema.entity.WR1")
13
+ # 2. Proximity-based matching for simple tags
14
+ # 3. NOT creating spurious schema-level items for ambiguous tags
15
+ class RemarkAttacher
16
+ # Types that support informal propositions
17
+ INFORMAL_PROPOSITION_TYPES = [
18
+ Model::Declarations::Entity,
19
+ Model::Declarations::Rule,
20
+ Model::Declarations::Type,
21
+ Model::Declarations::InformalPropositionRule,
22
+ ].freeze
23
+
24
+ # Types that support remark items (have remark_items attribute)
25
+ # These are types where we can create RemarkItem children
26
+ REMARK_ITEM_TYPES = [
27
+ Model::Declarations::Schema,
28
+ Model::Declarations::Entity,
29
+ Model::Declarations::Type,
30
+ Model::Declarations::Rule,
31
+ Model::Declarations::Function,
32
+ Model::Declarations::Procedure,
33
+ Model::Declarations::InformalPropositionRule,
34
+ Model::Declarations::WhereRule,
35
+ Model::Declarations::UniqueRule,
36
+ # Attribute types (all include Identifier which provides remark_items)
37
+ Model::Declarations::Attribute,
38
+ Model::Declarations::DerivedAttribute,
39
+ Model::Declarations::InverseAttribute,
40
+ ].freeze
41
+
42
+ # Types that support where rules
43
+ WHERE_RULE_TYPES = [
44
+ Model::Declarations::Entity,
45
+ Model::Declarations::Type,
46
+ Model::Declarations::Rule,
47
+ Model::Declarations::Function,
48
+ Model::Declarations::Procedure,
49
+ ].freeze
50
+
51
+ # Scope container types (can contain other declarations)
52
+ SCOPE_CONTAINER_TYPES = [
53
+ Model::Declarations::Schema,
54
+ Model::Declarations::Function,
55
+ Model::Declarations::Procedure,
56
+ Model::Declarations::Rule,
57
+ Model::Declarations::Entity,
58
+ Model::Declarations::Type,
59
+ ].freeze
60
+
61
+ # Types that support remarks (have Identifier module or define remarks directly)
62
+ REMARKS_SUPPORT_TYPES = [
63
+ # Declaration types with Identifier module
64
+ Model::Declarations::Schema,
65
+ Model::Declarations::Entity,
66
+ Model::Declarations::Type,
67
+ Model::Declarations::Function,
68
+ Model::Declarations::Procedure,
69
+ Model::Declarations::Rule,
70
+ Model::Declarations::Constant,
71
+ Model::Declarations::Attribute,
72
+ Model::Declarations::InverseAttribute,
73
+ Model::Declarations::DerivedAttribute,
74
+ Model::Declarations::WhereRule,
75
+ Model::Declarations::UniqueRule,
76
+ Model::Declarations::InformalPropositionRule,
77
+ Model::Declarations::SubtypeConstraint,
78
+ Model::Declarations::Parameter,
79
+ Model::Declarations::Variable,
80
+ # Statement types with Identifier module
81
+ Model::Statements::Alias,
82
+ Model::Statements::Repeat,
83
+ # Expression types with Identifier module
84
+ Model::Expressions::QueryExpression,
85
+ # Data types with Identifier module
86
+ Model::DataTypes::Aggregate,
87
+ Model::DataTypes::EnumerationItem,
88
+ Model::DataTypes::Generic,
89
+ Model::DataTypes::GenericEntity,
90
+ # Types with remarks attribute defined directly (not via Identifier)
91
+ Model::Declarations::RemarkItem,
92
+ ].freeze
93
+
94
+ def initialize(source)
95
+ @source = source
96
+ @attached_spans = Set.new
97
+ @line_cache = {}
98
+ @model = nil
99
+ end
100
+
101
+ def attach(model)
102
+ @model = model
103
+ remarks = extract_all_remarks
104
+ attach_tagged_remarks(model, remarks)
105
+ attach_untagged_remarks(model, remarks)
106
+ model
107
+ end
108
+
109
+ private
110
+
111
+ def extract_all_remarks
112
+ remarks = []
113
+ byte_position = 0
114
+
115
+ @source.each_line.with_index do |line, line_idx|
116
+ line_bytesize = line.bytesize
117
+ line_bytes = line.b # Get byte string for indexing
118
+ if (dash_byte_idx = line_bytes.index("--"))
119
+ remark_text = line.byteslice((dash_byte_idx + 2)..).strip
120
+
121
+ # Check for special patterns like --IP1: content (informal proposition)
122
+ if remark_text.match?(/^IP\d+:\s*(.*)$/)
123
+ tag = remark_text[/^(IP\d+):/, 1]
124
+ content = remark_text[/^IP\d+:\s*(.*)$/, 1]
125
+ remarks << {
126
+ position: byte_position + dash_byte_idx,
127
+ line: line_idx + 1,
128
+ text: content,
129
+ tag: tag,
130
+ format: "tail",
131
+ }
132
+ else
133
+ tag, content = parse_tagged_remark(remark_text)
134
+ remarks << {
135
+ position: byte_position + dash_byte_idx,
136
+ line: line_idx + 1,
137
+ text: content || remark_text,
138
+ tag: tag,
139
+ format: "tail",
140
+ }
141
+ end
142
+ end
143
+ byte_position += line_bytesize
144
+ end
145
+
146
+ extract_embedded_remarks(remarks)
147
+ # Sort by position to ensure remarks are processed in source order
148
+ remarks.sort_by! { |r| r[:position] }
149
+ remarks
150
+ end
151
+
152
+ def extract_embedded_remarks(remarks)
153
+ source_bytes = @source.b
154
+ start_pos = 0
155
+ while (start_idx = source_bytes.index("(*", start_pos))
156
+ end_idx = source_bytes.index("*)", start_idx + 2)
157
+ break unless end_idx
158
+
159
+ content = @source.byteslice((start_idx + 2)...end_idx)
160
+ line_num = get_line_number(start_idx)
161
+
162
+ tag, text = parse_tagged_embedded_remark(content)
163
+
164
+ remarks << {
165
+ position: start_idx,
166
+ line: line_num,
167
+ text: text,
168
+ tag: tag,
169
+ format: "embedded",
170
+ }
171
+
172
+ start_pos = end_idx + 2
173
+ end
174
+ end
175
+
176
+ def parse_tagged_remark(text)
177
+ if text.start_with?('"') && (end_quote = text.index('"', 1))
178
+ [text[1...end_quote], text[(end_quote + 1)..].strip]
179
+ else
180
+ [nil, text]
181
+ end
182
+ end
183
+
184
+ def parse_tagged_embedded_remark(content)
185
+ stripped = content.strip
186
+ if stripped.start_with?('"') && (end_quote = stripped.index('"', 1))
187
+ [stripped[1...end_quote], stripped[(end_quote + 1)..].strip]
188
+ else
189
+ [nil, stripped]
190
+ end
191
+ end
192
+
193
+ def get_line_number(position)
194
+ return 1 if position.nil? || position.zero?
195
+
196
+ @line_cache[position] ||= @source.byteslice(0...position).count("\n") + 1
197
+ end
198
+
199
+ def attach_tagged_remarks(model, remarks)
200
+ schema_ids = repository?(model) ? model.schemas.filter_map(&:id) : []
201
+
202
+ # Collect nodes with positions for finding containing scopes
203
+ nodes_with_positions = []
204
+ collect_nodes_with_positions(model, nodes_with_positions)
205
+ # Use stable sort to ensure deterministic ordering across Ruby versions
206
+ # When positions are equal, preserve original order using index as tie-breaker
207
+ nodes_with_positions.sort_by!.with_index { |n, i| [n[:position] || Float::INFINITY, i] }
208
+
209
+ remarks.select do |r|
210
+ r[:tag]
211
+ end.sort_by { |r| r[:position] }.each do |remark|
212
+ next if @attached_spans.include?(remark[:position])
213
+
214
+ tag = remark[:tag]
215
+ target = nil
216
+
217
+ # Check if this is an informal proposition tag (IP\d+)
218
+ if tag.match?(/^IP\d+$/)
219
+ # Find the containing scope (entity, type, rule) that supports informal propositions
220
+ target = find_containing_scope_for_ip(remark[:line],
221
+ nodes_with_positions)
222
+ if target
223
+ # Create or find the informal proposition
224
+ target = create_or_find_informal_proposition(target, tag)
225
+ end
226
+ end
227
+
228
+ # Standard path-based lookup
229
+ if target.nil?
230
+ # Find containing scope for scope-aware path resolution
231
+ containing_scope = find_containing_scope(remark[:line],
232
+ nodes_with_positions)
233
+
234
+ # Handle prefixed tags like wr:WR1, ip:IP1, ur:UR1
235
+ if tag.include?(":") && !tag.include?(".")
236
+ target = handle_prefixed_tag(tag, containing_scope, model,
237
+ schema_ids)
238
+ end
239
+
240
+ # Strategy 1: Try exact path lookup
241
+ if target.nil?
242
+ target = find_by_exact_path(model, tag)
243
+ end
244
+
245
+ # Strategy 1b: For paths with dots, try with scope path prefix first
246
+ if target.nil? && tag.include?(".")
247
+ # First, try building full path from containing scope
248
+ if containing_scope && function_rule_procedure?(containing_scope)
249
+ scope_path = build_scope_path(containing_scope)
250
+ if scope_path
251
+ full_path = "#{scope_path}.#{tag}"
252
+ target = find_by_exact_path(model, full_path)
253
+ end
254
+ end
255
+
256
+ # Then try schema prefix
257
+ if target.nil?
258
+ schema_ids.each do |schema_id|
259
+ target = find_by_exact_path(model, "#{schema_id}.#{tag}")
260
+ break if target
261
+ end
262
+ end
263
+ end
264
+
265
+ # Strategy 2: For simple tags, find in containing scope first
266
+ if target.nil? && !tag.include?(".")
267
+ if containing_scope
268
+ # Search within the containing scope
269
+ target = find_node_in_scope(containing_scope, tag)
270
+
271
+ # Special handling for remarks inside WHERE clauses
272
+ if target.nil? && supports_where_rules?(containing_scope)
273
+ target = find_target_in_where_clause(containing_scope, tag,
274
+ remark[:line])
275
+ end
276
+
277
+ # Only fall back to schema prefix if NOT inside a function/rule/procedure
278
+ # This prevents remarks inside scopes from attaching to schema-level items
279
+ if target.nil? && !function_rule_procedure?(containing_scope)
280
+ schema_ids.each do |schema_id|
281
+ target = find_by_exact_path(model, "#{schema_id}.#{tag}")
282
+ break if target
283
+ end
284
+ end
285
+ else
286
+ # No containing scope, try with schema prefix
287
+ schema_ids.each do |schema_id|
288
+ target = find_by_exact_path(model, "#{schema_id}.#{tag}")
289
+ break if target
290
+ end
291
+ end
292
+ end
293
+
294
+ # Strategy 3: Create implicit item for qualified paths only
295
+ if target.nil? && tag.include?(".")
296
+ # Try with scope path first
297
+ if containing_scope && function_rule_procedure?(containing_scope)
298
+ scope_path = build_scope_path(containing_scope)
299
+ if scope_path
300
+ full_path = "#{scope_path}.#{tag}"
301
+ target = create_implicit_remark_item(model, full_path,
302
+ schema_ids)
303
+ end
304
+ end
305
+ # Fall back to schema prefix
306
+ if target.nil?
307
+ target = create_implicit_remark_item(model, tag, schema_ids)
308
+ end
309
+ end
310
+
311
+ # Strategy 4: For simple tags at schema level, create implicit item
312
+ if target.nil? && !tag.include?(".") && schema_ids.any?
313
+ target = create_implicit_remark_item_at_schema(model, tag,
314
+ schema_ids.first)
315
+ end
316
+ end
317
+
318
+ if target
319
+ add_remark(target, remark[:text], format: remark[:format],
320
+ tag: remark[:tag])
321
+ @attached_spans << remark[:position]
322
+ end
323
+ end
324
+ end
325
+
326
+ def find_node_in_scope(scope, tag)
327
+ return nil unless scope
328
+
329
+ # Search within the scope for a node with the given tag/id
330
+ # Check all collections that might contain nodes with ids
331
+ %i[constants types variables parameters statements
332
+ attributes derived_attributes inverse_attributes
333
+ where_rules unique_rules informal_propositions].each do |attr|
334
+ collection = safe_get_collection(scope, attr)
335
+ next unless collection
336
+
337
+ collection.each do |item|
338
+ return item if item.id == tag
339
+ rescue NoMethodError
340
+ next
341
+ end
342
+ end
343
+
344
+ # Search inside types for enumeration items
345
+ types = safe_get_collection(scope, :types)
346
+ types&.each do |type|
347
+ result = find_enumeration_item_in_type(type, tag)
348
+ return result if result
349
+ end
350
+
351
+ # Search inside statements for nested items (alias, repeat, query)
352
+ statements = safe_get_collection(scope, :statements)
353
+ statements&.each do |stmt|
354
+ result = find_node_in_statement(stmt, tag)
355
+ return result if result
356
+
357
+ # Search inside expressions for QueryExpression (nested in assignments, etc.)
358
+ result = find_query_in_expression(stmt, tag)
359
+ return result if result
360
+ end
361
+
362
+ nil
363
+ end
364
+
365
+ def find_enumeration_item_in_type(type, tag)
366
+ return nil unless type
367
+
368
+ # Check if type is a Type declaration with enumeration
369
+ if type.is_a?(Model::Declarations::Type)
370
+ # Check enumeration_items on the type itself
371
+ type.enumeration_items&.each do |item|
372
+ return item if item.id == tag
373
+ end
374
+
375
+ # Also check underlying_type if it's an enumeration
376
+ ut = type.underlying_type
377
+ if ut.is_a?(Model::DataTypes::Enumeration) && ut.items
378
+ ut.items.each do |item|
379
+ return item if item.id == tag
380
+ end
381
+ end
382
+ end
383
+
384
+ nil
385
+ end
386
+
387
+ def find_query_in_expression(node, tag, visited = Set.new)
388
+ return nil unless node
389
+ return nil if visited.include?(node.object_id)
390
+
391
+ visited.add(node.object_id)
392
+
393
+ # Check if this node is a QueryExpression with matching id
394
+ if node.is_a?(Model::Expressions::QueryExpression) && node.id == tag
395
+ return node
396
+ end
397
+
398
+ # Recursively search expression attributes
399
+ %i[expression operand left right condition aggregate
400
+ query_expression repeat_control].each do |attr|
401
+ child = safe_send(node, attr)
402
+ next unless child
403
+
404
+ result = find_query_in_expression(child, tag, visited)
405
+ return result if result
406
+ end
407
+
408
+ # Search in arrays
409
+ %i[expressions operands parameters arguments].each do |attr|
410
+ children = safe_send(node, attr)
411
+ next unless children.is_a?(Array)
412
+
413
+ children.each do |child|
414
+ result = find_query_in_expression(child, tag, visited)
415
+ return result if result
416
+ end
417
+ end
418
+
419
+ nil
420
+ end
421
+
422
+ def function_rule_procedure?(node)
423
+ return false unless node
424
+
425
+ node.is_a?(Model::Declarations::Function) ||
426
+ node.is_a?(Model::Declarations::Rule) ||
427
+ node.is_a?(Model::Declarations::Procedure)
428
+ end
429
+
430
+ # Handle prefixed tags like wr:WR1, ip:IP1, ur:UR1
431
+ def handle_prefixed_tag(tag, containing_scope, model, schema_ids)
432
+ prefix, id = tag.split(":")
433
+ return nil unless id
434
+
435
+ # Determine collection based on prefix
436
+ collection_attr = case prefix.downcase
437
+ when "wr" then :where_rules
438
+ when "ip" then :informal_propositions
439
+ when "ur" then :unique_rules
440
+ end
441
+ return nil unless collection_attr
442
+
443
+ # First try to find in containing scope
444
+ collection = safe_get_collection(containing_scope, collection_attr)
445
+ if collection
446
+ found = collection.find { |item| item.is_a?(Model::ModelElement) && item.id == id }
447
+ return found if found
448
+ end
449
+
450
+ # Fallback: try to find by full path
451
+ schema_ids.each do |schema_id|
452
+ full_path = "#{schema_id}.#{tag.tr(':', '.')}"
453
+ found = safe_find(model, full_path)
454
+ return found if found
455
+ end
456
+
457
+ nil
458
+ end
459
+
460
+ # Find target for remarks inside WHERE clauses
461
+ def find_target_in_where_clause(scope, tag, remark_line)
462
+ return nil unless supports_where_rules?(scope)
463
+
464
+ where_rules = scope.where_rules
465
+ return nil unless where_rules&.any?
466
+
467
+ # Search source text for WHERE clause containing this remark
468
+ lines = @source.lines
469
+
470
+ where_rules.each do |wr|
471
+ next unless wr.id
472
+
473
+ # Find the WHERE rule declaration
474
+ lines.each_with_index do |line, idx|
475
+ line_num = idx + 1
476
+ next unless line_num < remark_line
477
+
478
+ # Look for "WHERE {id}:" pattern
479
+ # Check if remark is within a few lines after this WHERE declaration
480
+ if (line =~ /^\s*WHERE\s+#{Regexp.escape(wr.id)}\s*:/i) && remark_line.between?(
481
+ line_num, line_num + 5
482
+ )
483
+ # Found the WHERE rule - create remark item inside it
484
+ return create_remark_item(wr, tag)
485
+ end
486
+ end
487
+ end
488
+
489
+ nil
490
+ end
491
+
492
+ def find_node_in_statement(stmt, tag)
493
+ case stmt
494
+ when Model::Statements::Alias
495
+ return stmt if stmt.id == tag
496
+ when Model::Statements::Repeat
497
+ return stmt if stmt.id == tag
498
+ when Model::Expressions::QueryExpression
499
+ return stmt if stmt.id == tag
500
+ end
501
+ nil
502
+ end
503
+
504
+ def find_containing_scope(remark_line, nodes_with_positions)
505
+ # First try text-based detection (more reliable when source tracking is broken)
506
+ text_based_scope = find_scope_by_text_search(remark_line)
507
+ return text_based_scope if text_based_scope
508
+
509
+ # Fallback to position-based detection
510
+ # Exclude Repository and Cache as they are not semantic scopes
511
+ containing_nodes = nodes_with_positions.select do |n|
512
+ n[:line] && n[:end_line] && remark_line >= n[:line] && remark_line <= n[:end_line] &&
513
+ !repository?(n[:node]) && !cache?(n[:node])
514
+ end
515
+
516
+ # Return the innermost scope container (function, procedure, rule, entity, type)
517
+ containing_nodes.reverse_each do |n|
518
+ node = n[:node]
519
+ SCOPE_CONTAINER_TYPES.each do |scope_class|
520
+ return node if node.is_a?(scope_class)
521
+ end
522
+ end
523
+
524
+ nil
525
+ end
526
+
527
+ def find_scope_by_text_search(remark_line)
528
+ lines = @source.lines
529
+ return nil if remark_line < 1 || remark_line > lines.length
530
+
531
+ # Track nested scopes by searching backwards from remark_line
532
+ scope_stack = []
533
+
534
+ lines.each_with_index do |line, idx|
535
+ line_num = idx + 1
536
+ break if line_num > remark_line
537
+
538
+ # Check for START keywords first
539
+ if line =~ /^\s*SCHEMA\s+(\w+)/i
540
+ scope_stack << { type: :schema, name: $1, line: line_num }
541
+ end
542
+
543
+ if line =~ /^\s*FUNCTION\s+(\w+)/i
544
+ scope_stack << { type: :function, name: $1, line: line_num }
545
+ end
546
+
547
+ if line =~ /^\s*PROCEDURE\s+(\w+)/i
548
+ scope_stack << { type: :procedure, name: $1, line: line_num }
549
+ end
550
+
551
+ if line =~ /^\s*RULE\s+(\w+)/i
552
+ scope_stack << { type: :rule, name: $1, line: line_num }
553
+ end
554
+
555
+ if line =~ /^\s*ENTITY\s+(\w+)/i
556
+ scope_stack << { type: :entity, name: $1, line: line_num }
557
+ end
558
+
559
+ if line =~ /^\s*TYPE\s+(\w+)/i
560
+ scope_stack << { type: :type, name: $1, line: line_num }
561
+ end
562
+
563
+ # Then check for END keywords (to handle inline closures on same line)
564
+ if (line =~ /END_TYPE/i) && (scope_stack.last&.dig(:type) == :type)
565
+ scope_stack.pop
566
+ end
567
+ if (line =~ /END_FUNCTION/i) && (scope_stack.last&.dig(:type) == :function)
568
+ scope_stack.pop
569
+ end
570
+ if (line =~ /END_PROCEDURE/i) && (scope_stack.last&.dig(:type) == :procedure)
571
+ scope_stack.pop
572
+ end
573
+ if (line =~ /END_RULE/i) && (scope_stack.last&.dig(:type) == :rule)
574
+ scope_stack.pop
575
+ end
576
+ if (line =~ /END_ENTITY/i) && (scope_stack.last&.dig(:type) == :entity)
577
+ scope_stack.pop
578
+ end
579
+ if (line =~ /END_SCHEMA/i) && (scope_stack.last&.dig(:type) == :schema)
580
+ scope_stack.pop
581
+ end
582
+ end
583
+
584
+ # Find the innermost scope and get the corresponding model node
585
+ return nil if scope_stack.empty?
586
+
587
+ innermost = scope_stack.last
588
+ find_scope_node(innermost[:type], innermost[:name])
589
+ end
590
+
591
+ def find_scope_node(type, name)
592
+ return nil unless @model && name
593
+
594
+ @model.schemas.each do |schema|
595
+ # Check schema itself
596
+ if type == :schema && schema.id == name
597
+ return schema
598
+ end
599
+
600
+ # Check schema-level declarations
601
+ case type
602
+ when :function
603
+ found = schema.functions&.find { |f| f.id == name }
604
+ return found if found
605
+ when :procedure
606
+ found = schema.procedures&.find { |p| p.id == name }
607
+ return found if found
608
+ when :rule
609
+ found = schema.rules&.find { |r| r.id == name }
610
+ return found if found
611
+ when :entity
612
+ found = schema.entities&.find { |e| e.id == name }
613
+ return found if found
614
+ when :type
615
+ found = schema.types&.find { |t| t.id == name }
616
+ return found if found
617
+ end
618
+ end
619
+
620
+ nil
621
+ end
622
+
623
+ def build_scope_path(node)
624
+ return nil unless node
625
+
626
+ parts = []
627
+ current = node
628
+
629
+ while current
630
+ if current.is_a?(Model::ModelElement) && current.id
631
+ parts.unshift(current.id)
632
+ end
633
+
634
+ # Stop at schema level
635
+ break if current.is_a?(Model::Declarations::Schema)
636
+
637
+ current = current.parent
638
+ end
639
+
640
+ parts.empty? ? nil : parts.join(".")
641
+ end
642
+
643
+ def find_containing_scope_for_ip(remark_line, nodes_with_positions)
644
+ # First try text-based detection (more reliable when source tracking is broken)
645
+ # This handles cases where node end_line doesn't include trailing remarks
646
+ text_based_scope = find_scope_by_text_search(remark_line)
647
+ if text_based_scope && supports_informal_propositions?(text_based_scope)
648
+ return text_based_scope
649
+ end
650
+
651
+ # Fallback to position-based detection
652
+ # Find nodes that contain this remark line
653
+ # Exclude Repository and Cache as they are not semantic scopes
654
+ containing_nodes = nodes_with_positions.select do |n|
655
+ n[:line] && n[:end_line] && remark_line >= n[:line] && remark_line <= n[:end_line] &&
656
+ !repository?(n[:node]) && !cache?(n[:node])
657
+ end
658
+
659
+ # Find the innermost node that supports informal propositions
660
+ # Priority: Entity, Rule, Type, Schema
661
+ if containing_nodes.any?
662
+ containing_nodes.reverse_each do |n|
663
+ node = n[:node]
664
+ if supports_informal_propositions?(node)
665
+ return node
666
+ end
667
+
668
+ # Fallback to schema
669
+ return node if node.is_a?(Model::Declarations::Schema)
670
+ end
671
+ end
672
+
673
+ # Fallback: search for containing entity/type/rule by source text
674
+ find_scope_by_source_text(remark_line)
675
+ end
676
+
677
+ def find_scope_by_source_text(remark_line)
678
+ # Search backwards from remark_line for containing scope
679
+ lines = @source.lines
680
+
681
+ # Find the entity/type/rule that contains this line
682
+ entity_start = nil
683
+ type_start = nil
684
+ rule_start = nil
685
+ current_entity = nil
686
+ current_type = nil
687
+ current_rule = nil
688
+
689
+ lines.each_with_index do |line, idx|
690
+ line_num = idx + 1
691
+
692
+ case line
693
+ when /^\s*ENTITY\s+(\w+)/i
694
+ entity_start = line_num
695
+ current_entity = $1
696
+ when /^\s*END_ENTITY/i
697
+ if entity_start && remark_line >= entity_start && remark_line <= line_num
698
+ # Found containing entity
699
+ return find_node_by_type_and_name(Model::Declarations::Entity,
700
+ current_entity)
701
+ end
702
+
703
+ entity_start = nil
704
+ current_entity = nil
705
+ when /^\s*TYPE\s+(\w+)/i
706
+ type_start = line_num
707
+ current_type = $1
708
+ when /^\s*END_TYPE/i
709
+ if type_start && remark_line >= type_start && remark_line <= line_num
710
+ # Found containing type
711
+ return find_node_by_type_and_name(Model::Declarations::Type,
712
+ current_type)
713
+ end
714
+
715
+ type_start = nil
716
+ current_type = nil
717
+ when /^\s*RULE\s+(\w+)/i
718
+ rule_start = line_num
719
+ current_rule = $1
720
+ when /^\s*END_RULE/i
721
+ if rule_start && remark_line >= rule_start && remark_line <= line_num
722
+ # Found containing rule
723
+ return find_node_by_type_and_name(Model::Declarations::Rule,
724
+ current_rule)
725
+ end
726
+
727
+ rule_start = nil
728
+ current_rule = nil
729
+ end
730
+ end
731
+
732
+ nil
733
+ end
734
+
735
+ def find_node_by_type_and_name(node_class, name)
736
+ return nil unless @model && name
737
+
738
+ # Search through all schemas
739
+ @model.schemas.each do |schema|
740
+ collection = case node_class.name
741
+ when "Expressir::Model::Declarations::Entity"
742
+ schema.entities
743
+ when "Expressir::Model::Declarations::Type"
744
+ schema.types
745
+ when "Expressir::Model::Declarations::Rule"
746
+ schema.rules
747
+ end
748
+
749
+ if collection
750
+ found = collection.find { |n| n.id == name }
751
+ return found if found
752
+ end
753
+ end
754
+
755
+ nil
756
+ end
757
+
758
+ def find_by_exact_path(model, path)
759
+ return nil unless path && repository?(model)
760
+
761
+ # Try original path
762
+ result = safe_find(model, path)
763
+ return result if result
764
+
765
+ # Try with colon converted to dot
766
+ normalized = path.tr(":", ".")
767
+ normalized == path ? nil : safe_find(model, normalized)
768
+ end
769
+
770
+ def create_implicit_remark_item_at_schema(model, item_id, schema_id)
771
+ return nil unless repository?(model)
772
+
773
+ schema = safe_find(model, schema_id)
774
+ return nil unless schema.is_a?(Model::Declarations::Schema)
775
+
776
+ # Handle informal propositions (IP\d+ pattern) - only if schema supports it
777
+ # Note: Schema doesn't have informal_propositions, so this will create a remark_item instead
778
+ if item_id.match?(/^IP\d+$/) && supports_informal_propositions?(schema)
779
+ return create_or_find_informal_proposition(schema, item_id)
780
+ end
781
+
782
+ # Handle remark items
783
+ return nil unless supports_remark_items?(schema)
784
+
785
+ existing = schema.remark_items&.find { |ri| ri.id == item_id }
786
+ return existing if existing
787
+
788
+ create_remark_item(schema, item_id)
789
+ end
790
+
791
+ def create_implicit_remark_item(model, path, schema_ids = [])
792
+ return nil unless repository?(model)
793
+
794
+ # Normalize path (handle "ip:IP1" format)
795
+ clean_path = normalize_path(path)
796
+ parts = clean_path.split(".")
797
+ return nil if parts.length < 2
798
+
799
+ # Find the deepest existing parent and create item there
800
+ (parts.length - 1).downto(1) do |i|
801
+ parent_path = parts[0...i].join(".")
802
+ item_id = parts[i]
803
+
804
+ parent = safe_find(model, parent_path)
805
+
806
+ # Try with schema prefix if not found
807
+ if parent.nil? && schema_ids.any?
808
+ schema_ids.each do |schema_id|
809
+ parent = safe_find(model, "#{schema_id}.#{parent_path}")
810
+ break if parent
811
+ end
812
+ end
813
+
814
+ next unless parent
815
+
816
+ return create_item_at_parent(parent, item_id)
817
+ end
818
+
819
+ nil
820
+ end
821
+
822
+ def normalize_path(path)
823
+ return path unless path.include?(":")
824
+
825
+ path_part, _, item_suffix = path.rpartition(":")
826
+ "#{path_part}.#{item_suffix}"
827
+ end
828
+
829
+ def create_item_at_parent(parent, item_id)
830
+ # Handle informal propositions
831
+ if item_id.match?(/^IP\d+$/) && supports_informal_propositions?(parent)
832
+ return create_or_find_informal_proposition(parent, item_id)
833
+ end
834
+
835
+ # Handle remark items
836
+ return nil unless supports_remark_items?(parent)
837
+
838
+ existing = parent.remark_items&.find { |ri| ri.id == item_id }
839
+ return existing if existing
840
+
841
+ create_remark_item(parent, item_id)
842
+ end
843
+
844
+ def create_or_find_informal_proposition(parent, id)
845
+ # Only Entity, Rule, Type, and InformalPropositionRule have informal_propositions
846
+ return nil unless supports_informal_propositions?(parent)
847
+
848
+ existing = parent.informal_propositions&.find { |ip| ip.id == id }
849
+ return existing if existing
850
+
851
+ ip = Model::Declarations::InformalPropositionRule.new(id: id)
852
+ ip.parent = parent
853
+ parent.informal_propositions ||= []
854
+ parent.informal_propositions << ip
855
+ safe_reset_children_by_id(parent)
856
+
857
+ # Also create a RemarkItem inside the InformalPropositionRule
858
+ # This is the expected structure for informal proposition remarks
859
+ remark_item = Model::Declarations::RemarkItem.new(id: id)
860
+ remark_item.parent = ip
861
+ ip.remark_items ||= []
862
+ ip.remark_items << remark_item
863
+ safe_reset_children_by_id(ip)
864
+
865
+ # Return the remark_item so remarks are added to it
866
+ remark_item
867
+ end
868
+
869
+ def create_remark_item(parent, id)
870
+ item = Model::Declarations::RemarkItem.new(id: id)
871
+ item.parent = parent
872
+ parent.remark_items ||= []
873
+ parent.remark_items << item
874
+ safe_reset_children_by_id(parent)
875
+ item
876
+ end
877
+
878
+ def attach_untagged_remarks(model, remarks)
879
+ untagged = remarks.reject { |r| r[:tag] }
880
+ return unless untagged.any?
881
+
882
+ nodes_with_positions = []
883
+ collect_nodes_with_positions(model, nodes_with_positions)
884
+ # Use stable sort to preserve original order for equal keys
885
+ nodes_with_positions.sort_by!.with_index { |n, i| [n[:position] || Float::INFINITY, i] }
886
+
887
+ untagged.each do |remark|
888
+ next if @attached_spans.include?(remark[:position])
889
+
890
+ if end_scope_line?(remark[:line])
891
+ matched_node = find_node_for_end_scope_remark(remark,
892
+ nodes_with_positions)
893
+ if matched_node
894
+ add_remark(matched_node, remark[:text], format: remark[:format],
895
+ tag: nil)
896
+ @attached_spans << remark[:position]
897
+ next
898
+ end
899
+ end
900
+
901
+ matched_node = find_nearest_node(remark, nodes_with_positions)
902
+ if matched_node
903
+ add_remark(matched_node, remark[:text], format: remark[:format],
904
+ tag: nil)
905
+ @attached_spans << remark[:position]
906
+ end
907
+ end
908
+ end
909
+
910
+ def end_scope_line?(line_num)
911
+ line = get_line_content(line_num)
912
+ line =~ /END_(SCHEMA|ENTITY|TYPE|FUNCTION|PROCEDURE|RULE)/i
913
+ end
914
+
915
+ def get_line_content(line_num)
916
+ lines = @source.lines
917
+ return "" if line_num < 1 || line_num > lines.length
918
+
919
+ lines[line_num - 1]
920
+ end
921
+
922
+ def find_node_for_end_scope_remark(remark, nodes)
923
+ line_content = get_line_content(remark[:line])
924
+
925
+ node_type = case line_content
926
+ when /END_SCHEMA/i then Model::Declarations::Schema
927
+ when /END_ENTITY/i then Model::Declarations::Entity
928
+ when /END_TYPE/i then Model::Declarations::Type
929
+ when /END_FUNCTION/i then Model::Declarations::Function
930
+ when /END_PROCEDURE/i then Model::Declarations::Procedure
931
+ when /END_RULE/i then Model::Declarations::Rule
932
+ end
933
+
934
+ return nil unless node_type
935
+
936
+ matching_nodes = nodes.select do |n|
937
+ n[:node].is_a?(node_type) &&
938
+ (n[:end_line] == remark[:line] ||
939
+ (n[:end_line] && n[:end_line] <= remark[:line] && n[:end_line] >= remark[:line] - 2))
940
+ end
941
+
942
+ matching_nodes.first&.dig(:node) || find_node_by_type(nodes, node_type)
943
+ end
944
+
945
+ def find_node_by_type(nodes, node_type)
946
+ nodes.find { |n| n[:node].is_a?(node_type) }&.dig(:node)
947
+ end
948
+
949
+ def add_remark(node, text, format: "tail", tag: nil)
950
+ return unless node
951
+ return unless node.is_a?(Model::ModelElement)
952
+
953
+ # Only add remarks to nodes that support them
954
+ if supports_remarks?(node)
955
+ node.remarks ||= []
956
+ node.remarks << text
957
+
958
+ if tag.nil?
959
+ remark_info = Model::RemarkInfo.new(text: text, format: format,
960
+ tag: tag)
961
+ node.untagged_remarks ||= []
962
+ node.untagged_remarks << remark_info
963
+ end
964
+ end
965
+ end
966
+
967
+ def supports_remarks?(obj)
968
+ REMARKS_SUPPORT_TYPES.any? { |t| obj.is_a?(t) }
969
+ end
970
+
971
+ def collect_nodes_with_positions(node, result, visited = Set.new)
972
+ return unless node
973
+ return if visited.include?(node.object_id)
974
+
975
+ visited.add(node.object_id)
976
+
977
+ if node.is_a?(Model::ModelElement) && node.source
978
+ # Use stored source_offset from parser
979
+ # The parser always provides this via Slice#offset
980
+ if node.source_offset
981
+ pos = node.source_offset
982
+ line = get_line_number(pos)
983
+ source_end_line = get_line_number(pos + node.source.length)
984
+
985
+ # For container nodes, use the maximum end_line from children
986
+ # This is needed because source.length only covers the declaration, not the body
987
+ children_end_line = calculate_children_end_line(node)
988
+ end_line = [source_end_line, children_end_line].compact.max || source_end_line
989
+
990
+ result << {
991
+ node: node,
992
+ position: pos,
993
+ line: line,
994
+ end_line: end_line,
995
+ }
996
+ else
997
+ # No source_offset available - should not happen if parser provides Slice
998
+ result << { node: node, position: nil, line: nil, end_line: nil }
999
+ end
1000
+ else
1001
+ result << { node: node, position: nil, line: nil, end_line: nil }
1002
+ end
1003
+
1004
+ collect_children(node, result, visited)
1005
+ end
1006
+
1007
+ # Calculate the end line from all children of a node
1008
+ # This is needed for container nodes like schemas, entities, etc.
1009
+ # where source.length only covers the declaration, not the body
1010
+ def calculate_children_end_line(node)
1011
+ children_end_lines = []
1012
+
1013
+ # Check standard children attribute
1014
+ children = safe_get_collection(node, :children)
1015
+ children&.each do |child|
1016
+ if child.is_a?(Model::ModelElement) && child.source_offset && child.source
1017
+ child_end_line = get_line_number(child.source_offset + child.source.length)
1018
+ children_end_lines << child_end_line
1019
+ end
1020
+ end
1021
+
1022
+ # Check specific child collections
1023
+ %i[schemas types entities functions procedures rules constants
1024
+ attributes derived_attributes inverse_attributes
1025
+ where_rules unique_rules informal_propositions
1026
+ parameters variables statements items remark_items].each do |attr|
1027
+ collection = safe_get_collection(node, attr)
1028
+ collection&.each do |child|
1029
+ if child.is_a?(Model::ModelElement) && child.source_offset && child.source
1030
+ child_end_line = get_line_number(child.source_offset + child.source.length)
1031
+ children_end_lines << child_end_line
1032
+ end
1033
+ end
1034
+ end
1035
+
1036
+ children_end_lines.max
1037
+ end
1038
+
1039
+ def collect_children(node, result, visited)
1040
+ children = safe_get_collection(node, :children)
1041
+ children&.each do |child|
1042
+ collect_nodes_with_positions(child, result, visited)
1043
+ end
1044
+
1045
+ %i[schemas types entities functions procedures rules constants
1046
+ attributes derived_attributes inverse_attributes
1047
+ where_rules unique_rules informal_propositions
1048
+ parameters variables statements items remark_items].each do |attr|
1049
+ collection = safe_get_collection(node, attr)
1050
+ collection&.each do |item|
1051
+ collect_nodes_with_positions(item, result, visited)
1052
+ end
1053
+ end
1054
+ end
1055
+
1056
+ def find_nearest_node(remark, nodes)
1057
+ remark_line = remark[:line]
1058
+
1059
+ # For tail remarks, prefer nodes that START on the same line
1060
+ # This handles cases like: "attr : STRING; -- tail remark"
1061
+ # Exclude Repository and Cache as they are not semantic scopes
1062
+ same_start_line = nodes.select do |n|
1063
+ n[:line] == remark_line &&
1064
+ !repository?(n[:node]) && !cache?(n[:node])
1065
+ end
1066
+ return same_start_line.last[:node] if same_start_line.any?
1067
+
1068
+ # Also check nodes that END on the same line
1069
+ same_end_line = nodes.select do |n|
1070
+ n[:end_line] == remark_line &&
1071
+ !repository?(n[:node]) && !cache?(n[:node])
1072
+ end
1073
+ return same_end_line.last[:node] if same_end_line.any?
1074
+
1075
+ # Find the node that CONTAINS this remark line
1076
+ # This handles preamble remarks and embedded remarks
1077
+ # Exclude Repository and Cache as they are not semantic scopes
1078
+ containing = nodes.select do |n|
1079
+ n[:line] && n[:end_line] && n[:line] <= remark_line && n[:end_line] >= remark_line &&
1080
+ !repository?(n[:node]) && !cache?(n[:node])
1081
+ end
1082
+
1083
+ if containing.any?
1084
+ # Return the most specific (smallest) containing node
1085
+ # Sort by span size and return the smallest
1086
+ containing.min_by { |n| n[:end_line] - n[:line] }[:node]
1087
+ else
1088
+ # Fallback: find the last node that ends before this line
1089
+ before = nodes.select do |n|
1090
+ n[:end_line] && n[:end_line] < remark_line &&
1091
+ !repository?(n[:node]) && !cache?(n[:node])
1092
+ end
1093
+ before.max_by { |n| n[:end_line] }[:node] if before.any?
1094
+ end
1095
+ end
1096
+
1097
+ # Type checking helper methods
1098
+
1099
+ def repository?(obj)
1100
+ obj.is_a?(Model::Repository)
1101
+ end
1102
+
1103
+ def cache?(obj)
1104
+ obj.is_a?(Model::Cache)
1105
+ end
1106
+
1107
+ def supports_informal_propositions?(obj)
1108
+ INFORMAL_PROPOSITION_TYPES.any? { |t| obj.is_a?(t) }
1109
+ end
1110
+
1111
+ def supports_remark_items?(obj)
1112
+ REMARK_ITEM_TYPES.any? { |t| obj.is_a?(t) }
1113
+ end
1114
+
1115
+ def supports_where_rules?(obj)
1116
+ WHERE_RULE_TYPES.any? { |t| obj.is_a?(t) }
1117
+ end
1118
+
1119
+ # Safe accessor methods that return nil instead of NoMethodError
1120
+
1121
+ def safe_send(obj, method)
1122
+ return nil unless obj
1123
+
1124
+ obj.send(method)
1125
+ rescue NoMethodError
1126
+ nil
1127
+ end
1128
+
1129
+ def safe_get_collection(obj, attr)
1130
+ return nil unless obj
1131
+
1132
+ collection = obj.send(attr)
1133
+ collection if collection.is_a?(Array)
1134
+ rescue NoMethodError
1135
+ nil
1136
+ end
1137
+
1138
+ def safe_find(model, path)
1139
+ return nil unless model
1140
+
1141
+ model.find(path)
1142
+ rescue StandardError
1143
+ nil
1144
+ end
1145
+
1146
+ def safe_reset_children_by_id(obj)
1147
+ return unless obj
1148
+
1149
+ obj.reset_children_by_id
1150
+ rescue NoMethodError
1151
+ nil
1152
+ end
1153
+ end
1154
+ end
1155
+ end