expressir 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +480 -49
  3. data/benchmark/srl_benchmark.rb +47 -34
  4. data/benchmark/srl_native_benchmark.rb +20 -16
  5. data/benchmark/srl_ruby_benchmark.rb +14 -12
  6. data/expressir.gemspec +2 -2
  7. data/lib/expressir/changes/item_change.rb +0 -1
  8. data/lib/expressir/changes/mapping_change.rb +0 -1
  9. data/lib/expressir/changes/schema_change.rb +0 -1
  10. data/lib/expressir/changes/version_change.rb +0 -1
  11. data/lib/expressir/commands/changes_import_eengine.rb +2 -2
  12. data/lib/expressir/commands/validate_ascii.rb +0 -1
  13. data/lib/expressir/eengine/arm_compare_report.rb +0 -1
  14. data/lib/expressir/eengine/changes_section.rb +0 -1
  15. data/lib/expressir/eengine/mim_compare_report.rb +0 -1
  16. data/lib/expressir/eengine/modified_object.rb +0 -1
  17. data/lib/expressir/express/builder.rb +64 -21
  18. data/lib/expressir/express/builders/built_in_builder.rb +4 -2
  19. data/lib/expressir/express/builders/entity_decl_builder.rb +8 -4
  20. data/lib/expressir/express/builders/expression_builder.rb +0 -6
  21. data/lib/expressir/express/builders/function_decl_builder.rb +8 -8
  22. data/lib/expressir/express/builders/procedure_decl_builder.rb +8 -8
  23. data/lib/expressir/express/builders/rule_decl_builder.rb +8 -8
  24. data/lib/expressir/express/builders/syntax_builder.rb +2 -44
  25. data/lib/expressir/express/formatters/remark_formatter.rb +1 -3
  26. data/lib/expressir/express/parser.rb +234 -14
  27. data/lib/expressir/express/remark_attacher.rb +47 -18
  28. data/lib/expressir/express/transformer/remark_handling.rb +0 -1
  29. data/lib/expressir/model/exp_file.rb +2 -1
  30. data/lib/expressir/model/model_element.rb +1 -1
  31. data/lib/expressir/model/repository.rb +8 -9
  32. data/lib/expressir/model/search_engine.rb +7 -6
  33. data/lib/expressir/package/builder.rb +3 -1
  34. data/lib/expressir/package/metadata.rb +0 -1
  35. data/lib/expressir/schema_manifest.rb +0 -1
  36. data/lib/expressir/schema_manifest_entry.rb +0 -1
  37. data/lib/expressir/version.rb +1 -1
  38. metadata +15 -15
@@ -45,9 +45,6 @@ module Expressir
45
45
 
46
46
  return term if rhs.nil? || (rhs.respond_to?(:empty?) && rhs.empty?)
47
47
 
48
- # Handle both formats:
49
- # - Ruby parser: rhs is Array of {:item => {...}} hashes
50
- # - Native parser: rhs might be a Hash (merged) or Array
51
48
  rhs_array = rhs.is_a?(Array) ? rhs : [rhs]
52
49
 
53
50
  operands = [term]
@@ -79,9 +76,6 @@ module Expressir
79
76
 
80
77
  return factor if rhs.nil? || (rhs.respond_to?(:empty?) && rhs.empty?)
81
78
 
82
- # Handle both formats:
83
- # - Ruby parser: rhs is Array of {:item => {...}} hashes
84
- # - Native parser: rhs might be a Hash (merged) or Array
85
79
  rhs_array = rhs.is_a?(Array) ? rhs : [rhs]
86
80
 
87
81
  operands = [factor]
@@ -25,15 +25,15 @@ module Expressir
25
25
  functions = declarations.grep(Expressir::Model::Declarations::Function)
26
26
  procedures = declarations.grep(Expressir::Model::Declarations::Procedure)
27
27
  constants = if algorithm_head.is_a?(Hash) && algorithm_head[:constant_decl]
28
- build_constant_decl(algorithm_head[:constant_decl])
29
- else
30
- []
31
- end
28
+ build_constant_decl(algorithm_head[:constant_decl])
29
+ else
30
+ []
31
+ end
32
32
  variables = if algorithm_head.is_a?(Hash) && algorithm_head[:local_decl]
33
- build_local_decl(algorithm_head[:local_decl])
34
- else
35
- []
36
- end
33
+ build_local_decl(algorithm_head[:local_decl])
34
+ else
35
+ []
36
+ end
37
37
  statements = Builder.build_children(stmts)
38
38
 
39
39
  Expressir::Model::Declarations::Function.new(
@@ -24,15 +24,15 @@ module Expressir
24
24
  functions = declarations.grep(Expressir::Model::Declarations::Function)
25
25
  procedures = declarations.grep(Expressir::Model::Declarations::Procedure)
26
26
  constants = if algorithm_head.is_a?(Hash) && algorithm_head[:constant_decl]
27
- build_constant_decl(algorithm_head[:constant_decl])
28
- else
29
- []
30
- end
27
+ build_constant_decl(algorithm_head[:constant_decl])
28
+ else
29
+ []
30
+ end
31
31
  variables = if algorithm_head.is_a?(Hash) && algorithm_head[:local_decl]
32
- build_local_decl(algorithm_head[:local_decl])
33
- else
34
- []
35
- end
32
+ build_local_decl(algorithm_head[:local_decl])
33
+ else
34
+ []
35
+ end
36
36
  statements = Builder.build_children(stmts)
37
37
 
38
38
  Expressir::Model::Declarations::Procedure.new(
@@ -25,15 +25,15 @@ module Expressir
25
25
  functions = declarations.grep(Expressir::Model::Declarations::Function)
26
26
  procedures = declarations.grep(Expressir::Model::Declarations::Procedure)
27
27
  constants = if algorithm_head.is_a?(Hash) && algorithm_head[:constant_decl]
28
- build_constant_decl(algorithm_head[:constant_decl])
29
- else
30
- []
31
- end
28
+ build_constant_decl(algorithm_head[:constant_decl])
29
+ else
30
+ []
31
+ end
32
32
  variables = if algorithm_head.is_a?(Hash) && algorithm_head[:local_decl]
33
- build_local_decl(algorithm_head[:local_decl])
34
- else
35
- []
36
- end
33
+ build_local_decl(algorithm_head[:local_decl])
34
+ else
35
+ []
36
+ end
37
37
  statements = Builder.build_children(stmts)
38
38
  where_rules = where_clause ? Builder.build({ where_clause: where_clause }) : []
39
39
 
@@ -7,14 +7,9 @@ module Expressir
7
7
  # Each parsed EXPRESS file is represented as an ExpFile containing schemas.
8
8
  class SyntaxBuilder
9
9
  # Build ExpFile from AST data
10
- # @param ast_data [Hash, Array] The parsed AST with syntax node
11
- # - Ruby parser: Hash with :schema_decl key
12
- # - Native parser: Hash with :syntax wrapper containing :schemaDecl
10
+ # @param ast_data [Hash] The parsed AST with syntax node
13
11
  # @return [Model::ExpFile] ExpFile containing the parsed schemas
14
12
  def call(ast_data)
15
- # Handle both formats:
16
- # - Ruby parser: ast_data is a merged Hash {:schema_decl => [...], :spaces => {...}}
17
- # - Native parser: ast_data is {:syntax => {:spaces, :schemaDecl => [...], :trailer}}
18
13
  schema_decl_data = extract_schema_decls(ast_data)
19
14
 
20
15
  schemas = Builder.build_children(schema_decl_data)
@@ -24,47 +19,10 @@ module Expressir
24
19
  private
25
20
 
26
21
  # Extract schema declarations from AST data.
27
- # Handles both Ruby (snake_case) and native (camelCase) formats.
28
22
  def extract_schema_decls(ast_data)
29
23
  if ast_data.is_a?(Hash)
30
- # Check for :syntax wrapper (native parser format)
31
- if ast_data.key?(:syntax)
32
- inner = ast_data[:syntax]
33
- return inner[:schemaDecl] || inner[:schema_decl] if inner.is_a?(Hash)
34
- end
35
-
36
- # Ruby parser format: snake_case
37
- return ast_data[:schema_decl] if ast_data.key?(:schema_decl)
38
-
39
- # Native parser format: camelCase at top level (legacy)
40
- return ast_data[:schemaDecl] if ast_data.key?(:schemaDecl)
41
- elsif ast_data.is_a?(Array)
42
- # Legacy native parser format: Array of Hashes
43
- merged = {}
44
- ast_data.each do |item|
45
- if item.is_a?(Hash)
46
- item.each do |_key, value|
47
- if value.is_a?(Array)
48
- value.each do |sub|
49
- if sub.is_a?(Hash)
50
- sub.each do |k, v|
51
- snake_key = k.to_s
52
- .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
53
- .gsub(/([a-z\d])([A-Z])/, '\1_\2')
54
- .downcase
55
- .to_sym
56
- merged[snake_key] ||= []
57
- merged[snake_key] = v if v.is_a?(Array)
58
- end
59
- end
60
- end
61
- end
62
- end
63
- end
64
- end
65
- return merged[:schema_decl]
24
+ ast_data[:schema_decl]
66
25
  end
67
- nil
68
26
  end
69
27
  end
70
28
  end
@@ -237,9 +237,7 @@ module Expressir
237
237
  !node.untagged_remarks.nil? &&
238
238
  skip_untagged_types.any? { |type| node.is_a?(type) }
239
239
 
240
- remarks.concat(node.untagged_remarks.compact.select do |remark|
241
- remark.is_a?(Model::RemarkInfo)
242
- end.map do |remark|
240
+ remarks.concat(node.untagged_remarks.compact.grep(Model::RemarkInfo).map do |remark|
243
241
  format_untagged_remark(remark)
244
242
  end)
245
243
  end
@@ -14,6 +14,13 @@ module Expressir
14
14
  @@cached_parser = nil
15
15
  @@parser_mutex = Mutex.new
16
16
 
17
+ # Cache for schemaDecl grammar JSON (used for streaming parse)
18
+ @@cached_schema_grammar_json = nil
19
+
20
+ # Threshold for using memory-bounded fresh parse (bytes)
21
+ # Files above this use parse_fresh which has no packrat cache
22
+ LARGE_FILE_THRESHOLD = 1024 * 1024 # 1 MB
23
+
17
24
  # Get cached parser instance (thread-safe)
18
25
  # Reusing the parser avoids the overhead of reinitializing all rule definitions
19
26
  def self.cached_parser
@@ -59,6 +66,15 @@ module Expressir
59
66
  @@cached_grammar_json
60
67
  end
61
68
 
69
+ # Get cached grammar JSON for schemaDecl (used for streaming parse)
70
+ def self.cached_schema_grammar_json
71
+ return @@cached_schema_grammar_json if @@cached_schema_grammar_json
72
+
73
+ schema_atom = cached_parser.schemaDecl
74
+ @@cached_schema_grammar_json = Parsanol::Native.serialize_grammar(schema_atom)
75
+ @@cached_schema_grammar_json
76
+ end
77
+
62
78
  # Parse using native engine with Rust-side transformation (fastest)
63
79
  #
64
80
  # This method provides ~17x speedup over pure Ruby parsing.
@@ -74,7 +90,12 @@ module Expressir
74
90
  end
75
91
 
76
92
  grammar_atom = cached_parser.syntax
77
- Parsanol::Native.parse(grammar_atom, source)
93
+ # Use fresh-parse (no cache) for large files to bound memory
94
+ if source.bytesize > LARGE_FILE_THRESHOLD
95
+ Parsanol::Native.parse_fresh(grammar_atom, source)
96
+ else
97
+ Parsanol::Native.parse(grammar_atom, source)
98
+ end
78
99
  end
79
100
 
80
101
  def cts(atom)
@@ -670,24 +691,37 @@ module Expressir
670
691
  # @param [String] file Express file path
671
692
  # @param [Boolean] skip_references skip resolving references
672
693
  # @param [Boolean] include_source attach original source code to model elements
694
+ # @param [Boolean] use_native use native parser (default: true when available)
673
695
  # @return [Model::ExpFile] ExpFile containing parsed schemas
674
696
  # @raise [SchemaParseFailure] if the schema file fails to parse
675
- def self.from_file(file, skip_references: nil, include_source: nil, root_path: nil) # rubocop:disable Metrics/AbcSize
697
+ def self.from_file(file, skip_references: nil, include_source: nil, root_path: nil, use_native: nil) # rubocop:disable Metrics/AbcSize
676
698
  Expressir::Benchmark.measure_file(file) do
677
699
  source = File.read file
678
700
 
679
701
  # remove root path from file path
680
702
  schema_file = root_path ? Pathname.new(file.to_s).relative_path_from(root_path).to_s : file.to_s
681
703
 
704
+ use_native = Parser.native_available? if use_native.nil?
705
+
682
706
  begin
683
- ast = Parser.cached_parser.parse source
707
+ ast = if use_native && Parser.native_available?
708
+ begin
709
+ Parser.parse_native(source)
710
+ rescue StandardError
711
+ # Native parser may fail on non-ASCII or edge cases;
712
+ # fall back to Ruby parser
713
+ Parser.cached_parser.parse source
714
+ end
715
+ else
716
+ Parser.cached_parser.parse source
717
+ end
684
718
  rescue Parsanol::ParseFailed => e
685
719
  # Instead of just printing, raise a proper error with file context
686
720
  raise Error::SchemaParseFailure.new(schema_file, e)
687
721
  end
688
722
 
689
723
  @exp_file = ::Expressir::Express::Builder.build_with_remarks(ast, source: source,
690
- include_source: include_source)
724
+ include_source: include_source)
691
725
 
692
726
  # Set file path on the ExpFile and propagate to schemas
693
727
  @exp_file.path = schema_file
@@ -712,16 +746,19 @@ module Expressir
712
746
  # @param [Array<String>] files Express file paths
713
747
  # @param [Boolean] skip_references skip resolving references
714
748
  # @param [Boolean] include_source attach original source code to model elements
749
+ # @param [Boolean] use_native use native parser (default: true when available)
715
750
  # @yield [filename, schemas, error] Optional block called for each file processed
716
751
  # @yieldparam filename [String] Name of the file being processed
717
752
  # @yieldparam schemas [Array, nil] Array of parsed schemas (nil if parsing failed)
718
753
  # @yieldparam error [Exception, nil] Error that occurred (nil if parsing succeeded)
719
754
  # @return [Model::Repository] Repository containing all parsed ExpFiles
720
- def self.from_files(files, skip_references: nil, include_source: nil, root_path: nil)
755
+ def self.from_files(files, skip_references: nil, include_source: nil,
756
+ root_path: nil, use_native: nil)
721
757
  all_exp_files = []
722
758
 
723
759
  files.each do |file|
724
- exp_file = from_file(file, skip_references: true, root_path: root_path)
760
+ exp_file = from_file(file, skip_references: true,
761
+ root_path: root_path, use_native: use_native)
725
762
  all_exp_files << exp_file
726
763
 
727
764
  # Call the progress block if provided
@@ -751,18 +788,20 @@ module Expressir
751
788
  # @param [String] content Express content as string
752
789
  # @param [Boolean] skip_references skip resolving references
753
790
  # @param [Boolean] include_source attach original source code to model elements
754
- # @param [Boolean] use_native use native parser if available (default: false - AST format differs slightly)
791
+ # @param [Boolean] use_native use native parser (default: true when available)
755
792
  # @param [Boolean] use_streaming use streaming builder for maximum performance (default: false)
756
793
  # @return [Model::ExpFile] Parsed ExpFile
757
794
  # @raise [SchemaParseFailure] if the content fails to parse
758
795
  def self.from_exp(content, skip_references: nil, include_source: nil,
759
- use_native: false, use_streaming: false)
796
+ use_native: nil, use_streaming: false)
760
797
  # Streaming builder mode - uses Parsanol streaming callbacks
761
798
  if use_streaming && Parser.native_available? && defined?(Parsanol::Native.parse_with_builder)
762
799
  return from_exp_streaming(content, skip_references: skip_references,
763
800
  include_source: include_source)
764
801
  end
765
802
 
803
+ use_native = Parser.native_available? if use_native.nil?
804
+
766
805
  begin
767
806
  # Use cached parser instance for performance (avoids ~7ms Parser.new overhead)
768
807
  ast = if use_native && Parser.native_available?
@@ -774,8 +813,9 @@ module Expressir
774
813
  raise Error::SchemaParseFailure.new("(from string)", e)
775
814
  end
776
815
 
777
- exp_file = ::Expressir::Express::Builder.build_with_remarks(ast, source: content,
778
- include_source: include_source)
816
+ exp_file = ::Expressir::Express::Builder.build_with_remarks(ast,
817
+ source: content,
818
+ include_source: include_source)
779
819
 
780
820
  exp_file.schemas.each do |schema|
781
821
  schema.file = nil
@@ -793,20 +833,21 @@ module Expressir
793
833
  exp_file
794
834
  end
795
835
 
796
- # Parse using streaming builder (maximum performance)
836
+ # Parse using streaming builder (construct-by-construct)
797
837
  # @param [String] content Express content as string
798
838
  # @param [Boolean] skip_references skip resolving references
799
839
  # @param [Boolean] include_source attach original source code to model elements
800
840
  # @return [Model::ExpFile] Parsed ExpFile
801
841
  # @raise [SchemaParseFailure] if the content fails to parse
802
- def self.from_exp_streaming(content, skip_references: nil, include_source: nil)
842
+ def self.from_exp_streaming_builder(content, skip_references: nil,
843
+ include_source: nil)
803
844
  grammar_json = Parser.cached_grammar_json
804
845
  builder = ::Expressir::Express::StreamingBuilder.new(source: content,
805
- include_source: include_source)
846
+ include_source: include_source)
806
847
 
807
848
  begin
808
849
  exp_file = Parsanol::Native.parse_with_builder(grammar_json,
809
- content, builder)
850
+ content, builder)
810
851
  rescue StandardError => e
811
852
  raise Error::SchemaParseFailure.new("(streaming)", e)
812
853
  end
@@ -826,6 +867,185 @@ module Expressir
826
867
 
827
868
  exp_file
828
869
  end
870
+
871
+ # Parse each schema separately with fresh arena (memory-bounded)
872
+ #
873
+ # This splits the source into schema blocks and parses each independently.
874
+ # Memory is bounded by the largest schema, not the entire file.
875
+ #
876
+ # @param content [String] EXPRESS source code
877
+ # @param skip_references [Boolean] Whether to skip reference resolution
878
+ # @param include_source [Boolean] Whether to include source in model
879
+ # @return [Expressir::Model::ExpFile] Parsed EXPRESS file
880
+ def self.from_exp_streaming(content, skip_references: nil,
881
+ include_source: nil)
882
+ grammar_json = Parser.cached_schema_grammar_json
883
+
884
+ # Extract schema blocks from source
885
+ schema_blocks = extract_schema_blocks(content)
886
+
887
+ # Parse each schema with fresh arena
888
+ schemas = schema_blocks.map do |block|
889
+ ast = Parsanol::Native.parse_fresh(grammar_json, block[:source])
890
+ schema_model = Builder.build(ast)
891
+ schema_model.source = block[:source]
892
+ schema_model
893
+ rescue StandardError => e
894
+ raise Error::SchemaParseFailure.new(
895
+ "(schema #{block[:name] || 'unknown'})", e
896
+ )
897
+ end
898
+
899
+ # Build the file model
900
+ exp_file = Expressir::Model::ExpFile.new
901
+ exp_file.schemas = schemas
902
+
903
+ exp_file.schemas.each do |schema|
904
+ schema.file = nil
905
+ schema.file_basename = nil
906
+ schema.formatted = schema.to_s(no_remarks: true)
907
+ end
908
+
909
+ unless skip_references
910
+ Expressir::Benchmark.measure_references do
911
+ resolve_references_model_visitor = ResolveReferencesModelVisitor.new
912
+ resolve_references_model_visitor.visit(exp_file)
913
+ end
914
+ end
915
+
916
+ exp_file
917
+ end
918
+
919
+ # Extract individual schema blocks from EXPRESS source
920
+ #
921
+ # This uses a state machine to properly handle nested comments and strings.
922
+ #
923
+ # @param source [String] EXPRESS source
924
+ # @return [Array<Hash>] Array of {name: String, source: String} for each schema
925
+ def self.extract_schema_blocks(source)
926
+ blocks = []
927
+ pos = 0
928
+ len = source.length
929
+
930
+ while pos < len
931
+ # Skip whitespace and find SCHEMA keyword
932
+ skip_ws_and_comments(source, pos)
933
+ pos = skip_ws_and_comments(source, pos)
934
+
935
+ break if pos >= len
936
+
937
+ # Check for SCHEMA
938
+ if source[pos..].start_with?("SCHEMA")
939
+ result = parse_schema_block(source, pos)
940
+ if result
941
+ blocks << result
942
+ pos = result[:end_pos]
943
+ next
944
+ end
945
+ end
946
+
947
+ pos += 1
948
+ end
949
+
950
+ blocks
951
+ end
952
+
953
+ private_class_method :extract_schema_blocks
954
+
955
+ def self.parse_schema_block(source, start_pos)
956
+ # Must be at SCHEMA keyword
957
+ return nil unless source[start_pos..].start_with?("SCHEMA")
958
+
959
+ pos = start_pos + "SCHEMA".length
960
+ pos = skip_spaces(source, pos)
961
+
962
+ # Extract schema name (identifier)
963
+ name_start = pos
964
+ while pos < source.length && (source[pos] =~ /[a-zA-Z0-9_]/ || source[pos] == "_")
965
+ pos += 1
966
+ end
967
+ schema_name = source[name_start...pos]
968
+
969
+ return nil if schema_name.empty?
970
+
971
+ # Skip to END_SCHEMA
972
+ depth = 1
973
+ search_pos = pos
974
+ end_pos = nil
975
+
976
+ while search_pos < source.length
977
+ if source[search_pos] == '"'
978
+ # String literal - skip to end
979
+ search_pos += 1
980
+ while search_pos < source.length && source[search_pos] != '"'
981
+ search_pos += 1
982
+ end
983
+ search_pos += 1
984
+ elsif source[search_pos] == "(" && source[search_pos + 1] == "*"
985
+ # Comment - skip to end
986
+ search_pos += 2
987
+ while search_pos < source.length && !(source[search_pos] == "*" && source[search_pos + 1] == ")")
988
+ search_pos += 1
989
+ end
990
+ search_pos += 2
991
+ elsif source[search_pos..].start_with?("END_SCHEMA")
992
+ depth -= 1
993
+ if depth.zero?
994
+ end_pos = search_pos + "END_SCHEMA".length
995
+ # Skip trailing whitespace and semicolon
996
+ pos = end_pos
997
+ pos = skip_spaces(source, pos)
998
+ pos += 1 if source[pos] == ";" # semicolon
999
+ break
1000
+ end
1001
+ search_pos += "END_SCHEMA".length
1002
+ else
1003
+ search_pos += 1
1004
+ end
1005
+ end
1006
+
1007
+ return nil unless end_pos
1008
+
1009
+ schema_source = source[start_pos...end_pos]
1010
+ {
1011
+ name: schema_name,
1012
+ source: schema_source,
1013
+ start_pos: start_pos,
1014
+ end_pos: pos,
1015
+ }
1016
+ end
1017
+ private_class_method :parse_schema_block
1018
+
1019
+ def self.skip_spaces(source, pos)
1020
+ while pos < source.length && [" ", "\t", "\n",
1021
+ "\r"].include?(source[pos])
1022
+ pos += 1
1023
+ end
1024
+ pos
1025
+ end
1026
+ private_class_method :skip_spaces
1027
+
1028
+ def self.skip_ws_and_comments(source, pos)
1029
+ len = source.length
1030
+ while pos < len
1031
+ c = source[pos]
1032
+ if [" ", "\t", "\n", "\r"].include?(c)
1033
+ pos += 1
1034
+ elsif c == "(" && source[pos + 1] == "*"
1035
+ # Comment - skip to end
1036
+ pos += 2
1037
+ while pos < len - 1 && !(source[pos] == "*" && source[pos + 1] == ")")
1038
+ pos += 1
1039
+ end
1040
+ pos += 2
1041
+ else
1042
+ break
1043
+ end
1044
+ end
1045
+ pos
1046
+ end
1047
+ private_class_method :skip_ws_and_comments
1048
+
829
1049
  private_class_method :from_exp_streaming
830
1050
  end
831
1051
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "set"
4
-
5
3
  module Expressir
6
4
  module Express
7
5
  # Handles attaching remarks (comments) to model elements after parsing.
@@ -27,6 +25,15 @@ module Expressir
27
25
  remarks = extract_all_remarks
28
26
  attach_tagged_remarks(model, remarks)
29
27
  attach_untagged_remarks(model, remarks)
28
+
29
+ # Free expensive data structures after attachment is complete.
30
+ # These are only needed during the attach process.
31
+ @source = nil
32
+ @source_lines = nil
33
+ @scope_map = nil
34
+ @line_cache = nil
35
+ @remarks_cache = nil
36
+
30
37
  model
31
38
  end
32
39
 
@@ -242,7 +249,8 @@ module Expressir
242
249
  end
243
250
  # Fall back to schema prefix
244
251
  if target.nil?
245
- target = create_implicit_remark_item(model, tag, get_schema_ids(model))
252
+ target = create_implicit_remark_item(model, tag,
253
+ get_schema_ids(model))
246
254
  end
247
255
  end
248
256
 
@@ -280,6 +288,7 @@ module Expressir
280
288
 
281
289
  nil
282
290
  end
291
+
283
292
  # Done once per RemarkAttacher instance (O(file_lines)).
284
293
  # Each find_containing_scope call then becomes O(1).
285
294
  def build_scope_map
@@ -1021,21 +1030,41 @@ module Expressir
1021
1030
  # The parser always provides this via Slice#offset
1022
1031
  if node.source_offset
1023
1032
  pos = node.source_offset
1024
- line = get_line_number(pos)
1025
- source_end_line = get_line_number(pos + node.source.length)
1026
-
1027
- # For container nodes, use the maximum end_line from children
1028
- # This is needed because source.length only covers the declaration, not the body
1029
- children_end_line = calculate_children_end_line(node)
1030
- end_line = [source_end_line,
1031
- children_end_line].compact.max || source_end_line
1032
-
1033
- result << {
1034
- node: node,
1035
- position: pos,
1036
- line: line,
1037
- end_line: end_line,
1038
- }
1033
+ # Validate offset: native parser returns 0 for leaf nodes (WhereRule)
1034
+ # where it can't determine the actual position. These have short
1035
+ # expression-like source ("TRUE;") that doesn't appear at file start.
1036
+ # Container nodes (Schema, Entity, Type) have declaration-like source
1037
+ # that either starts at position 0 legitimately or is clearly valid.
1038
+ valid = pos.positive?
1039
+ if !valid && pos.zero? && node.source
1040
+ src = node.source.to_s
1041
+ # Accept position=0 if source is a declaration keyword line
1042
+ valid = src.start_with?("SCHEMA", "ENTITY", "TYPE", "FUNCTION",
1043
+ "PROCEDURE", "RULE", "CONSTANT", "VARIABLE",
1044
+ "USE", "REFERENCE", "END_SCHEMA", "END_ENTITY",
1045
+ "END_TYPE", "END_FUNCTION", "END_PROCEDURE",
1046
+ "END_RULE", "END_CONSTANT", "END_VARIABLE")
1047
+ end
1048
+ if valid
1049
+ line = get_line_number(pos)
1050
+ source_end_line = get_line_number(pos + node.source.length)
1051
+
1052
+ # For container nodes, use the maximum end_line from children
1053
+ # This is needed because source.length only covers the declaration, not the body
1054
+ children_end_line = calculate_children_end_line(node)
1055
+ end_line = [source_end_line,
1056
+ children_end_line].compact.max || source_end_line
1057
+
1058
+ result << {
1059
+ node: node,
1060
+ position: pos,
1061
+ line: line,
1062
+ end_line: end_line,
1063
+ }
1064
+ else
1065
+ # Invalid offset — treat as unknown position
1066
+ result << { node: node, position: nil, line: nil, end_line: nil }
1067
+ end
1039
1068
  else
1040
1069
  # No source_offset available - should not happen if parser provides Slice
1041
1070
  result << { node: node, position: nil, line: nil, end_line: nil }
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "parsanol"
4
- require "set"
5
4
 
6
5
  module Expressir
7
6
  module Express
@@ -14,7 +14,8 @@ module Expressir
14
14
  include ScopeContainer
15
15
 
16
16
  attribute :path, :string
17
- attribute :schemas, Declarations::Schema, collection: true, initialize_empty: true
17
+ attribute :schemas, Declarations::Schema, collection: true,
18
+ initialize_empty: true
18
19
  attribute :_class, :string, default: -> { self.class.name }
19
20
 
20
21
  key_value do
@@ -208,7 +208,7 @@ module Expressir
208
208
  def remark_infos
209
209
  return [] if untagged_remarks.nil?
210
210
 
211
- untagged_remarks.select { |r| r.is_a?(RemarkInfo) }
211
+ untagged_remarks.grep(RemarkInfo)
212
212
  end
213
213
 
214
214
  private