expressir 2.2.1 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -1
  3. data/.rubocop_todo.yml +681 -78
  4. data/Gemfile +4 -1
  5. data/README.adoc +63 -26
  6. data/benchmark/srl_benchmark.rb +399 -0
  7. data/benchmark/srl_native_benchmark.rb +146 -0
  8. data/benchmark/srl_ruby_benchmark.rb +132 -0
  9. data/expressir.gemspec +3 -2
  10. data/lib/expressir/benchmark.rb +1 -1
  11. data/lib/expressir/changes/item_change.rb +0 -2
  12. data/lib/expressir/changes/mapping_change.rb +0 -2
  13. data/lib/expressir/changes/schema_change.rb +0 -3
  14. data/lib/expressir/changes/version_change.rb +0 -4
  15. data/lib/expressir/changes.rb +5 -6
  16. data/lib/expressir/cli.rb +10 -24
  17. data/lib/expressir/commands/changes.rb +0 -2
  18. data/lib/expressir/commands/changes_import_eengine.rb +2 -5
  19. data/lib/expressir/commands/changes_validate.rb +0 -2
  20. data/lib/expressir/commands/format.rb +1 -1
  21. data/lib/expressir/commands/manifest.rb +0 -7
  22. data/lib/expressir/commands/package.rb +16 -29
  23. data/lib/expressir/commands/validate.rb +0 -2
  24. data/lib/expressir/commands/validate_ascii.rb +0 -1
  25. data/lib/expressir/commands/validate_load.rb +1 -1
  26. data/lib/expressir/commands.rb +20 -0
  27. data/lib/expressir/config.rb +0 -2
  28. data/lib/expressir/coverage.rb +11 -4
  29. data/lib/expressir/eengine/arm_compare_report.rb +1 -5
  30. data/lib/expressir/eengine/changes_section.rb +1 -4
  31. data/lib/expressir/eengine/compare_report.rb +1 -13
  32. data/lib/expressir/eengine/mim_compare_report.rb +1 -5
  33. data/lib/expressir/eengine/modified_object.rb +1 -3
  34. data/lib/expressir/eengine.rb +9 -0
  35. data/lib/expressir/errors.rb +3 -5
  36. data/lib/expressir/express/builder.rb +82 -24
  37. data/lib/expressir/express/builder_registry.rb +411 -0
  38. data/lib/expressir/express/builders/attribute_decl_builder.rb +0 -6
  39. data/lib/expressir/express/builders/built_in_builder.rb +5 -18
  40. data/lib/expressir/express/builders/constant_builder.rb +4 -19
  41. data/lib/expressir/express/builders/declaration_builder.rb +0 -4
  42. data/lib/expressir/express/builders/derive_clause_builder.rb +0 -2
  43. data/lib/expressir/express/builders/derived_attr_builder.rb +0 -2
  44. data/lib/expressir/express/builders/domain_rule_builder.rb +0 -2
  45. data/lib/expressir/express/builders/entity_decl_builder.rb +11 -13
  46. data/lib/expressir/express/builders/explicit_attr_builder.rb +5 -8
  47. data/lib/expressir/express/builders/expression_builder.rb +25 -67
  48. data/lib/expressir/express/builders/function_decl_builder.rb +20 -18
  49. data/lib/expressir/express/builders/interface_builder.rb +0 -20
  50. data/lib/expressir/express/builders/inverse_attr_builder.rb +0 -2
  51. data/lib/expressir/express/builders/inverse_attr_type_builder.rb +0 -6
  52. data/lib/expressir/express/builders/inverse_clause_builder.rb +0 -2
  53. data/lib/expressir/express/builders/literal_builder.rb +1 -15
  54. data/lib/expressir/express/builders/procedure_decl_builder.rb +20 -19
  55. data/lib/expressir/express/builders/qualifier_builder.rb +0 -27
  56. data/lib/expressir/express/builders/reference_builder.rb +1 -10
  57. data/lib/expressir/express/builders/rule_decl_builder.rb +21 -19
  58. data/lib/expressir/express/builders/schema_body_decl_builder.rb +0 -4
  59. data/lib/expressir/express/builders/schema_decl_builder.rb +7 -13
  60. data/lib/expressir/express/builders/schema_version_builder.rb +0 -6
  61. data/lib/expressir/express/builders/simple_id_builder.rb +1 -10
  62. data/lib/expressir/express/builders/statement_builder.rb +4 -32
  63. data/lib/expressir/express/builders/subtype_constraint_builder.rb +6 -30
  64. data/lib/expressir/express/builders/syntax_builder.rb +18 -7
  65. data/lib/expressir/express/builders/type_builder.rb +3 -45
  66. data/lib/expressir/express/builders/type_decl_builder.rb +1 -7
  67. data/lib/expressir/express/builders/unique_clause_builder.rb +1 -3
  68. data/lib/expressir/express/builders/unique_rule_builder.rb +0 -2
  69. data/lib/expressir/express/builders/where_clause_builder.rb +1 -3
  70. data/lib/expressir/express/builders.rb +47 -35
  71. data/lib/expressir/express/error.rb +0 -3
  72. data/lib/expressir/express/formatter.rb +17 -19
  73. data/lib/expressir/express/formatters/data_types_formatter.rb +295 -293
  74. data/lib/expressir/express/formatters/declarations_formatter.rb +617 -615
  75. data/lib/expressir/express/formatters/expressions_formatter.rb +146 -144
  76. data/lib/expressir/express/formatters/literals_formatter.rb +35 -33
  77. data/lib/expressir/express/formatters/references_formatter.rb +34 -32
  78. data/lib/expressir/express/formatters/remark_formatter.rb +174 -209
  79. data/lib/expressir/express/formatters/remark_item_formatter.rb +18 -16
  80. data/lib/expressir/express/formatters/statements_formatter.rb +190 -188
  81. data/lib/expressir/express/formatters/supertype_expressions_formatter.rb +41 -39
  82. data/lib/expressir/express/formatters.rb +22 -0
  83. data/lib/expressir/express/parser.rb +266 -47
  84. data/lib/expressir/express/pretty_formatter.rb +68 -47
  85. data/lib/expressir/express/remark_attacher.rb +254 -162
  86. data/lib/expressir/express/streaming_builder.rb +0 -3
  87. data/lib/expressir/express/transformer/remark_handling.rb +1 -3
  88. data/lib/expressir/express.rb +29 -0
  89. data/lib/expressir/manifest/resolver.rb +0 -3
  90. data/lib/expressir/manifest/validator.rb +0 -3
  91. data/lib/expressir/manifest.rb +6 -0
  92. data/lib/expressir/model/cache.rb +1 -1
  93. data/lib/expressir/model/concerns.rb +19 -0
  94. data/lib/expressir/model/data_types/aggregate.rb +1 -1
  95. data/lib/expressir/model/data_types/array.rb +1 -1
  96. data/lib/expressir/model/data_types/bag.rb +1 -1
  97. data/lib/expressir/model/data_types/binary.rb +1 -1
  98. data/lib/expressir/model/data_types/boolean.rb +1 -1
  99. data/lib/expressir/model/data_types/enumeration.rb +1 -1
  100. data/lib/expressir/model/data_types/enumeration_item.rb +1 -1
  101. data/lib/expressir/model/data_types/generic.rb +1 -1
  102. data/lib/expressir/model/data_types/generic_entity.rb +1 -1
  103. data/lib/expressir/model/data_types/integer.rb +1 -1
  104. data/lib/expressir/model/data_types/list.rb +1 -1
  105. data/lib/expressir/model/data_types/logical.rb +1 -1
  106. data/lib/expressir/model/data_types/number.rb +1 -1
  107. data/lib/expressir/model/data_types/real.rb +1 -1
  108. data/lib/expressir/model/data_types/select.rb +1 -1
  109. data/lib/expressir/model/data_types/set.rb +1 -1
  110. data/lib/expressir/model/data_types/string.rb +1 -1
  111. data/lib/expressir/model/data_types.rb +25 -0
  112. data/lib/expressir/model/declarations/attribute.rb +1 -1
  113. data/lib/expressir/model/declarations/constant.rb +1 -1
  114. data/lib/expressir/model/declarations/derived_attribute.rb +1 -1
  115. data/lib/expressir/model/declarations/entity.rb +4 -1
  116. data/lib/expressir/model/declarations/function.rb +3 -1
  117. data/lib/expressir/model/declarations/informal_proposition_rule.rb +2 -1
  118. data/lib/expressir/model/declarations/interface.rb +1 -1
  119. data/lib/expressir/model/declarations/interface_item.rb +1 -1
  120. data/lib/expressir/model/declarations/interfaced_item.rb +1 -1
  121. data/lib/expressir/model/declarations/inverse_attribute.rb +1 -1
  122. data/lib/expressir/model/declarations/parameter.rb +1 -1
  123. data/lib/expressir/model/declarations/procedure.rb +3 -1
  124. data/lib/expressir/model/declarations/remark_item.rb +1 -1
  125. data/lib/expressir/model/declarations/rule.rb +4 -1
  126. data/lib/expressir/model/declarations/schema.rb +2 -1
  127. data/lib/expressir/model/declarations/schema_version.rb +1 -1
  128. data/lib/expressir/model/declarations/schema_version_item.rb +1 -1
  129. data/lib/expressir/model/declarations/subtype_constraint.rb +1 -1
  130. data/lib/expressir/model/declarations/type.rb +4 -1
  131. data/lib/expressir/model/declarations/unique_rule.rb +1 -1
  132. data/lib/expressir/model/declarations/variable.rb +1 -1
  133. data/lib/expressir/model/declarations/where_rule.rb +1 -1
  134. data/lib/expressir/model/declarations.rb +31 -0
  135. data/lib/expressir/model/dependency_resolver.rb +0 -2
  136. data/lib/expressir/model/exp_file.rb +39 -0
  137. data/lib/expressir/model/expressions/aggregate_initializer.rb +1 -1
  138. data/lib/expressir/model/expressions/aggregate_initializer_item.rb +1 -1
  139. data/lib/expressir/model/expressions/binary_expression.rb +1 -1
  140. data/lib/expressir/model/expressions/entity_constructor.rb +1 -1
  141. data/lib/expressir/model/expressions/function_call.rb +1 -1
  142. data/lib/expressir/model/expressions/interval.rb +1 -1
  143. data/lib/expressir/model/expressions/query_expression.rb +1 -1
  144. data/lib/expressir/model/expressions/unary_expression.rb +1 -1
  145. data/lib/expressir/model/expressions.rb +18 -0
  146. data/lib/expressir/model/identifier.rb +5 -1
  147. data/lib/expressir/model/indexes.rb +11 -0
  148. data/lib/expressir/model/literals/binary.rb +1 -1
  149. data/lib/expressir/model/literals/integer.rb +1 -1
  150. data/lib/expressir/model/literals/logical.rb +1 -1
  151. data/lib/expressir/model/literals/real.rb +1 -1
  152. data/lib/expressir/model/literals/string.rb +1 -1
  153. data/lib/expressir/model/literals.rb +13 -0
  154. data/lib/expressir/model/model_element.rb +7 -15
  155. data/lib/expressir/model/references/attribute_reference.rb +1 -1
  156. data/lib/expressir/model/references/group_reference.rb +1 -1
  157. data/lib/expressir/model/references/index_reference.rb +1 -1
  158. data/lib/expressir/model/references/simple_reference.rb +1 -1
  159. data/lib/expressir/model/references.rb +12 -0
  160. data/lib/expressir/model/remark_info.rb +1 -7
  161. data/lib/expressir/model/repository.rb +76 -41
  162. data/lib/expressir/model/repository_validator.rb +0 -2
  163. data/lib/expressir/model/search_engine.rb +12 -35
  164. data/lib/expressir/model/statements/alias.rb +1 -1
  165. data/lib/expressir/model/statements/assignment.rb +1 -1
  166. data/lib/expressir/model/statements/case.rb +1 -1
  167. data/lib/expressir/model/statements/case_action.rb +1 -1
  168. data/lib/expressir/model/statements/compound.rb +1 -1
  169. data/lib/expressir/model/statements/escape.rb +1 -1
  170. data/lib/expressir/model/statements/if.rb +1 -1
  171. data/lib/expressir/model/statements/null.rb +1 -1
  172. data/lib/expressir/model/statements/procedure_call.rb +1 -1
  173. data/lib/expressir/model/statements/repeat.rb +1 -1
  174. data/lib/expressir/model/statements/return.rb +1 -1
  175. data/lib/expressir/model/statements/skip.rb +1 -1
  176. data/lib/expressir/model/statements.rb +20 -0
  177. data/lib/expressir/model/supertype_expressions/binary_supertype_expression.rb +1 -1
  178. data/lib/expressir/model/supertype_expressions/oneof_supertype_expression.rb +1 -1
  179. data/lib/expressir/model/supertype_expressions.rb +12 -0
  180. data/lib/expressir/model.rb +28 -4
  181. data/lib/expressir/package/builder.rb +35 -4
  182. data/lib/expressir/package/metadata.rb +0 -2
  183. data/lib/expressir/package/reader.rb +0 -1
  184. data/lib/expressir/package.rb +8 -0
  185. data/lib/expressir/schema_manifest.rb +5 -7
  186. data/lib/expressir/schema_manifest_entry.rb +3 -5
  187. data/lib/expressir/transformer.rb +7 -0
  188. data/lib/expressir/version.rb +1 -1
  189. data/lib/expressir.rb +23 -171
  190. metadata +46 -6
  191. data/lib/expressir/express/builders/token_builder.rb +0 -15
@@ -1,9 +1,4 @@
1
1
  require "parsanol"
2
- require_relative "error"
3
- require_relative "builder"
4
- require_relative "builders"
5
- require_relative "remark_attacher"
6
- require_relative "streaming_builder"
7
2
 
8
3
  module Expressir
9
4
  module Express
@@ -19,6 +14,13 @@ module Expressir
19
14
  @@cached_parser = nil
20
15
  @@parser_mutex = Mutex.new
21
16
 
17
+ # Cache for schemaDecl grammar JSON (used for streaming parse)
18
+ @@cached_schema_grammar_json = nil
19
+
20
+ # Threshold for using memory-bounded fresh parse (bytes)
21
+ # Files above this use parse_fresh which has no packrat cache
22
+ LARGE_FILE_THRESHOLD = 1024 * 1024 # 1 MB
23
+
22
24
  # Get cached parser instance (thread-safe)
23
25
  # Reusing the parser avoids the overhead of reinitializing all rule definitions
24
26
  def self.cached_parser
@@ -64,14 +66,36 @@ module Expressir
64
66
  @@cached_grammar_json
65
67
  end
66
68
 
67
- # Parse using native engine (with caching)
69
+ # Get cached grammar JSON for schemaDecl (used for streaming parse)
70
+ def self.cached_schema_grammar_json
71
+ return @@cached_schema_grammar_json if @@cached_schema_grammar_json
72
+
73
+ schema_atom = cached_parser.schemaDecl
74
+ @@cached_schema_grammar_json = Parsanol::Native.serialize_grammar(schema_atom)
75
+ @@cached_schema_grammar_json
76
+ end
77
+
78
+ # Parse using native engine with Rust-side transformation (fastest)
79
+ #
80
+ # This method provides ~17x speedup over pure Ruby parsing.
81
+ # The transformation happens in Rust using to_parslet_compatible,
82
+ # producing Parslet-compatible output that Builder.build can consume directly.
83
+ #
84
+ # @param source [String] EXPRESS source code to parse
85
+ # @return [Hash, Array] Transformed AST in Parslet-compatible format
86
+ # @raise [LoadError] If native parser is not available
68
87
  def self.parse_native(source)
69
88
  unless native_available?
70
89
  raise LoadError, "Native parser not available"
71
90
  end
72
91
 
73
- # Use Parsanol 2.0 API - parse returns Slice objects with position info
74
- new.parse(source, mode: :native)
92
+ grammar_atom = cached_parser.syntax
93
+ # Use fresh-parse (no cache) for large files to bound memory
94
+ if source.bytesize > LARGE_FILE_THRESHOLD
95
+ Parsanol::Native.parse_fresh(grammar_atom, source)
96
+ else
97
+ Parsanol::Native.parse(grammar_atom, source)
98
+ end
75
99
  end
76
100
 
77
101
  def cts(atom)
@@ -218,7 +242,7 @@ module Expressir
218
242
  (attributeDecl >> op_colon >> parameterType >> op_decl >> expression >> op_delim).as(:derivedAttr)
219
243
  end
220
244
  rule(:digit) { match["0-9"] }
221
- rule(:digits) { (digit >> digit.repeat) }
245
+ rule(:digits) { digit >> digit.repeat }
222
246
  rule(:domainRule) do
223
247
  ((ruleLabelId >> op_colon).maybe >> expression).as(:domainRule)
224
248
  end
@@ -228,7 +252,7 @@ module Expressir
228
252
  rule(:embeddedRemark) do
229
253
  (str("(*") >> (str("*)").absent? >> (embeddedRemark | any)).repeat >> str("*)")).as(:embeddedRemark)
230
254
  end
231
- rule(:encodedCharacter) { (octet >> octet >> octet >> octet) }
255
+ rule(:encodedCharacter) { octet >> octet >> octet >> octet }
232
256
  rule(:encodedStringLiteral) do
233
257
  cts((str('"') >> encodedCharacter.repeat(1) >> str('"')).as(:str)).as(:encodedStringLiteral)
234
258
  end
@@ -667,26 +691,41 @@ module Expressir
667
691
  # @param [String] file Express file path
668
692
  # @param [Boolean] skip_references skip resolving references
669
693
  # @param [Boolean] include_source attach original source code to model elements
670
- # @return [Model::Repository]
694
+ # @param [Boolean] use_native use native parser (default: true when available)
695
+ # @return [Model::ExpFile] ExpFile containing parsed schemas
671
696
  # @raise [SchemaParseFailure] if the schema file fails to parse
672
- def self.from_file(file, skip_references: nil, include_source: nil, root_path: nil) # rubocop:disable Metrics/AbcSize
697
+ def self.from_file(file, skip_references: nil, include_source: nil, root_path: nil, use_native: nil) # rubocop:disable Metrics/AbcSize
673
698
  Expressir::Benchmark.measure_file(file) do
674
699
  source = File.read file
675
700
 
676
701
  # remove root path from file path
677
702
  schema_file = root_path ? Pathname.new(file.to_s).relative_path_from(root_path).to_s : file.to_s
678
703
 
704
+ use_native = Parser.native_available? if use_native.nil?
705
+
679
706
  begin
680
- ast = Parser.cached_parser.parse source
707
+ ast = if use_native && Parser.native_available?
708
+ begin
709
+ Parser.parse_native(source)
710
+ rescue StandardError
711
+ # Native parser may fail on non-ASCII or edge cases;
712
+ # fall back to Ruby parser
713
+ Parser.cached_parser.parse source
714
+ end
715
+ else
716
+ Parser.cached_parser.parse source
717
+ end
681
718
  rescue Parsanol::ParseFailed => e
682
719
  # Instead of just printing, raise a proper error with file context
683
720
  raise Error::SchemaParseFailure.new(schema_file, e)
684
721
  end
685
722
 
686
- @repository = Builder.build_with_remarks(ast, source: source,
687
- include_source: include_source)
723
+ @exp_file = ::Expressir::Express::Builder.build_with_remarks(ast, source: source,
724
+ include_source: include_source)
688
725
 
689
- @repository.schemas.each do |schema|
726
+ # Set file path on the ExpFile and propagate to schemas
727
+ @exp_file.path = schema_file
728
+ @exp_file.schemas.each do |schema|
690
729
  schema.file = schema_file
691
730
  schema.file_basename = File.basename(schema_file, ".exp")
692
731
  schema.formatted = schema.to_s(no_remarks: true)
@@ -695,11 +734,11 @@ module Expressir
695
734
  unless skip_references
696
735
  Expressir::Benchmark.measure_references do
697
736
  @resolve_references_model_visitor = ResolveReferencesModelVisitor.new
698
- @resolve_references_model_visitor.visit(@repository)
737
+ @resolve_references_model_visitor.visit(@exp_file)
699
738
  end
700
739
  end
701
740
 
702
- @repository
741
+ @exp_file
703
742
  end
704
743
  end
705
744
 
@@ -707,23 +746,23 @@ module Expressir
707
746
  # @param [Array<String>] files Express file paths
708
747
  # @param [Boolean] skip_references skip resolving references
709
748
  # @param [Boolean] include_source attach original source code to model elements
749
+ # @param [Boolean] use_native use native parser (default: true when available)
710
750
  # @yield [filename, schemas, error] Optional block called for each file processed
711
751
  # @yieldparam filename [String] Name of the file being processed
712
752
  # @yieldparam schemas [Array, nil] Array of parsed schemas (nil if parsing failed)
713
753
  # @yieldparam error [Exception, nil] Error that occurred (nil if parsing succeeded)
714
- # @return [Model::Repository]
754
+ # @return [Model::Repository] Repository containing all parsed ExpFiles
715
755
  def self.from_files(files, skip_references: nil, include_source: nil,
716
- root_path: nil)
717
- all_schemas = []
756
+ root_path: nil, use_native: nil)
757
+ all_exp_files = []
718
758
 
719
759
  files.each do |file|
720
- repository = from_file(file, skip_references: true,
721
- root_path: root_path)
722
- file_schemas = repository.schemas
723
- all_schemas.concat(file_schemas)
760
+ exp_file = from_file(file, skip_references: true,
761
+ root_path: root_path, use_native: use_native)
762
+ all_exp_files << exp_file
724
763
 
725
764
  # Call the progress block if provided
726
- yield(file, file_schemas, nil) if block_given?
765
+ yield(file, exp_file&.schemas, nil) if block_given?
727
766
  rescue StandardError => e
728
767
  # Call the progress block with the error if provided
729
768
  yield(file, nil, e) if block_given?
@@ -733,9 +772,7 @@ root_path: nil)
733
772
  raise unless e.is_a?(Error::SchemaParseFailure)
734
773
  end
735
774
 
736
- @repository = Model::Repository.new(
737
- schemas: all_schemas,
738
- )
775
+ @repository = Model::Repository.new(files: all_exp_files)
739
776
 
740
777
  unless skip_references
741
778
  Expressir::Benchmark.measure_references do
@@ -751,18 +788,20 @@ root_path: nil)
751
788
  # @param [String] content Express content as string
752
789
  # @param [Boolean] skip_references skip resolving references
753
790
  # @param [Boolean] include_source attach original source code to model elements
754
- # @param [Boolean] use_native use native parser if available (default: false - AST format differs slightly)
791
+ # @param [Boolean] use_native use native parser (default: true when available)
755
792
  # @param [Boolean] use_streaming use streaming builder for maximum performance (default: false)
756
- # @return [Model::Repository] Parsed repository
793
+ # @return [Model::ExpFile] Parsed ExpFile
757
794
  # @raise [SchemaParseFailure] if the content fails to parse
758
795
  def self.from_exp(content, skip_references: nil, include_source: nil,
759
- use_native: false, use_streaming: false)
796
+ use_native: nil, use_streaming: false)
760
797
  # Streaming builder mode - uses Parsanol streaming callbacks
761
798
  if use_streaming && Parser.native_available? && defined?(Parsanol::Native.parse_with_builder)
762
799
  return from_exp_streaming(content, skip_references: skip_references,
763
800
  include_source: include_source)
764
801
  end
765
802
 
803
+ use_native = Parser.native_available? if use_native.nil?
804
+
766
805
  begin
767
806
  # Use cached parser instance for performance (avoids ~7ms Parser.new overhead)
768
807
  ast = if use_native && Parser.native_available?
@@ -774,10 +813,11 @@ root_path: nil)
774
813
  raise Error::SchemaParseFailure.new("(from string)", e)
775
814
  end
776
815
 
777
- repository = Builder.build_with_remarks(ast, source: content,
778
- include_source: include_source)
816
+ exp_file = ::Expressir::Express::Builder.build_with_remarks(ast,
817
+ source: content,
818
+ include_source: include_source)
779
819
 
780
- repository.schemas.each do |schema|
820
+ exp_file.schemas.each do |schema|
781
821
  schema.file = nil
782
822
  schema.file_basename = nil
783
823
  schema.formatted = schema.to_s(no_remarks: true)
@@ -786,33 +826,33 @@ root_path: nil)
786
826
  unless skip_references
787
827
  Expressir::Benchmark.measure_references do
788
828
  resolve_references_model_visitor = ResolveReferencesModelVisitor.new
789
- resolve_references_model_visitor.visit(repository)
829
+ resolve_references_model_visitor.visit(exp_file)
790
830
  end
791
831
  end
792
832
 
793
- repository
833
+ exp_file
794
834
  end
795
835
 
796
- # Parse using streaming builder (maximum performance)
836
+ # Parse using streaming builder (construct-by-construct)
797
837
  # @param [String] content Express content as string
798
838
  # @param [Boolean] skip_references skip resolving references
799
839
  # @param [Boolean] include_source attach original source code to model elements
800
- # @return [Model::Repository] Parsed repository
840
+ # @return [Model::ExpFile] Parsed ExpFile
801
841
  # @raise [SchemaParseFailure] if the content fails to parse
802
- def self.from_exp_streaming(content, skip_references: nil,
842
+ def self.from_exp_streaming_builder(content, skip_references: nil,
803
843
  include_source: nil)
804
844
  grammar_json = Parser.cached_grammar_json
805
- builder = StreamingBuilder.new(source: content,
806
- include_source: include_source)
845
+ builder = ::Expressir::Express::StreamingBuilder.new(source: content,
846
+ include_source: include_source)
807
847
 
808
848
  begin
809
- repository = Parsanol::Native.parse_with_builder(grammar_json,
810
- content, builder)
849
+ exp_file = Parsanol::Native.parse_with_builder(grammar_json,
850
+ content, builder)
811
851
  rescue StandardError => e
812
852
  raise Error::SchemaParseFailure.new("(streaming)", e)
813
853
  end
814
854
 
815
- repository.schemas.each do |schema|
855
+ exp_file.schemas.each do |schema|
816
856
  schema.file = nil
817
857
  schema.file_basename = nil
818
858
  schema.formatted = schema.to_s(no_remarks: true)
@@ -821,12 +861,191 @@ include_source: nil)
821
861
  unless skip_references
822
862
  Expressir::Benchmark.measure_references do
823
863
  resolve_references_model_visitor = ResolveReferencesModelVisitor.new
824
- resolve_references_model_visitor.visit(repository)
864
+ resolve_references_model_visitor.visit(exp_file)
825
865
  end
826
866
  end
827
867
 
828
- repository
868
+ exp_file
829
869
  end
870
+
871
+ # Parse each schema separately with fresh arena (memory-bounded)
872
+ #
873
+ # This splits the source into schema blocks and parses each independently.
874
+ # Memory is bounded by the largest schema, not the entire file.
875
+ #
876
+ # @param content [String] EXPRESS source code
877
+ # @param skip_references [Boolean] Whether to skip reference resolution
878
+ # @param include_source [Boolean] Whether to include source in model
879
+ # @return [Expressir::Model::ExpFile] Parsed EXPRESS file
880
+ def self.from_exp_streaming(content, skip_references: nil,
881
+ include_source: nil)
882
+ grammar_json = Parser.cached_schema_grammar_json
883
+
884
+ # Extract schema blocks from source
885
+ schema_blocks = extract_schema_blocks(content)
886
+
887
+ # Parse each schema with fresh arena
888
+ schemas = schema_blocks.map do |block|
889
+ ast = Parsanol::Native.parse_fresh(grammar_json, block[:source])
890
+ schema_model = Builder.build(ast)
891
+ schema_model.source = block[:source]
892
+ schema_model
893
+ rescue StandardError => e
894
+ raise Error::SchemaParseFailure.new(
895
+ "(schema #{block[:name] || 'unknown'})", e
896
+ )
897
+ end
898
+
899
+ # Build the file model
900
+ exp_file = Expressir::Model::ExpFile.new
901
+ exp_file.schemas = schemas
902
+
903
+ exp_file.schemas.each do |schema|
904
+ schema.file = nil
905
+ schema.file_basename = nil
906
+ schema.formatted = schema.to_s(no_remarks: true)
907
+ end
908
+
909
+ unless skip_references
910
+ Expressir::Benchmark.measure_references do
911
+ resolve_references_model_visitor = ResolveReferencesModelVisitor.new
912
+ resolve_references_model_visitor.visit(exp_file)
913
+ end
914
+ end
915
+
916
+ exp_file
917
+ end
918
+
919
+ # Extract individual schema blocks from EXPRESS source
920
+ #
921
+ # This uses a state machine to properly handle nested comments and strings.
922
+ #
923
+ # @param source [String] EXPRESS source
924
+ # @return [Array<Hash>] Array of {name: String, source: String} for each schema
925
+ def self.extract_schema_blocks(source)
926
+ blocks = []
927
+ pos = 0
928
+ len = source.length
929
+
930
+ while pos < len
931
+ # Skip whitespace and find SCHEMA keyword
932
+ skip_ws_and_comments(source, pos)
933
+ pos = skip_ws_and_comments(source, pos)
934
+
935
+ break if pos >= len
936
+
937
+ # Check for SCHEMA
938
+ if source[pos..].start_with?("SCHEMA")
939
+ result = parse_schema_block(source, pos)
940
+ if result
941
+ blocks << result
942
+ pos = result[:end_pos]
943
+ next
944
+ end
945
+ end
946
+
947
+ pos += 1
948
+ end
949
+
950
+ blocks
951
+ end
952
+
953
+ private_class_method :extract_schema_blocks
954
+
955
+ def self.parse_schema_block(source, start_pos)
956
+ # Must be at SCHEMA keyword
957
+ return nil unless source[start_pos..].start_with?("SCHEMA")
958
+
959
+ pos = start_pos + "SCHEMA".length
960
+ pos = skip_spaces(source, pos)
961
+
962
+ # Extract schema name (identifier)
963
+ name_start = pos
964
+ while pos < source.length && (source[pos] =~ /[a-zA-Z0-9_]/ || source[pos] == "_")
965
+ pos += 1
966
+ end
967
+ schema_name = source[name_start...pos]
968
+
969
+ return nil if schema_name.empty?
970
+
971
+ # Skip to END_SCHEMA
972
+ depth = 1
973
+ search_pos = pos
974
+ end_pos = nil
975
+
976
+ while search_pos < source.length
977
+ if source[search_pos] == '"'
978
+ # String literal - skip to end
979
+ search_pos += 1
980
+ while search_pos < source.length && source[search_pos] != '"'
981
+ search_pos += 1
982
+ end
983
+ search_pos += 1
984
+ elsif source[search_pos] == "(" && source[search_pos + 1] == "*"
985
+ # Comment - skip to end
986
+ search_pos += 2
987
+ while search_pos < source.length && !(source[search_pos] == "*" && source[search_pos + 1] == ")")
988
+ search_pos += 1
989
+ end
990
+ search_pos += 2
991
+ elsif source[search_pos..].start_with?("END_SCHEMA")
992
+ depth -= 1
993
+ if depth.zero?
994
+ end_pos = search_pos + "END_SCHEMA".length
995
+ # Skip trailing whitespace and semicolon
996
+ pos = end_pos
997
+ pos = skip_spaces(source, pos)
998
+ pos += 1 if source[pos] == ";" # semicolon
999
+ break
1000
+ end
1001
+ search_pos += "END_SCHEMA".length
1002
+ else
1003
+ search_pos += 1
1004
+ end
1005
+ end
1006
+
1007
+ return nil unless end_pos
1008
+
1009
+ schema_source = source[start_pos...end_pos]
1010
+ {
1011
+ name: schema_name,
1012
+ source: schema_source,
1013
+ start_pos: start_pos,
1014
+ end_pos: pos,
1015
+ }
1016
+ end
1017
+ private_class_method :parse_schema_block
1018
+
1019
+ def self.skip_spaces(source, pos)
1020
+ while pos < source.length && [" ", "\t", "\n",
1021
+ "\r"].include?(source[pos])
1022
+ pos += 1
1023
+ end
1024
+ pos
1025
+ end
1026
+ private_class_method :skip_spaces
1027
+
1028
+ def self.skip_ws_and_comments(source, pos)
1029
+ len = source.length
1030
+ while pos < len
1031
+ c = source[pos]
1032
+ if [" ", "\t", "\n", "\r"].include?(c)
1033
+ pos += 1
1034
+ elsif c == "(" && source[pos + 1] == "*"
1035
+ # Comment - skip to end
1036
+ pos += 2
1037
+ while pos < len - 1 && !(source[pos] == "*" && source[pos + 1] == ")")
1038
+ pos += 1
1039
+ end
1040
+ pos += 2
1041
+ else
1042
+ break
1043
+ end
1044
+ end
1045
+ pos
1046
+ end
1047
+ private_class_method :skip_ws_and_comments
1048
+
830
1049
  private_class_method :from_exp_streaming
831
1050
  end
832
1051
  end
@@ -1,5 +1,3 @@
1
- require_relative "formatter"
2
-
3
1
  module Expressir
4
2
  module Express
5
3
  # Pretty formatter for EXPRESS schemas following ELF specification
@@ -100,22 +98,16 @@ module Expressir
100
98
  remark = node.untagged_remarks.first
101
99
  return "" if remark.nil?
102
100
 
103
- # Handle both RemarkInfo and String
104
- if remark.is_a?(Model::RemarkInfo)
105
- text = remark.text
106
- return "" if text.nil? || text.empty?
101
+ return "" unless remark.is_a?(Model::RemarkInfo)
107
102
 
108
- # Include tag if present
109
- formatted_text = remark.tagged? ? "\"#{remark.tag}\" #{text}" : text
103
+ text = remark.text
104
+ return "" if text.nil? || text.empty?
110
105
 
111
- # Use format from RemarkInfo
112
- remark.tail? ? " -- #{formatted_text}" : " (* #{formatted_text} *)"
113
- else
114
- # Legacy string - default to tail format
115
- return "" if remark.empty?
106
+ # Include tag if present
107
+ formatted_text = remark.tagged? ? "\"#{remark.tag}\" #{text}" : text
116
108
 
117
- " -- #{remark}"
118
- end
109
+ # Use format from RemarkInfo
110
+ remark.tail? ? " -- #{formatted_text}" : " (* #{formatted_text} *)"
119
111
  end
120
112
 
121
113
  # Override to preserve tail remarks on attributes
@@ -254,33 +246,27 @@ module Expressir
254
246
  end
255
247
 
256
248
  # Format a single preamble remark
257
- # @param remark [String, RemarkInfo] Remark text or RemarkInfo object
249
+ # @param remark [RemarkInfo] RemarkInfo object
258
250
  # @param indent_str [String] Indentation to use (optional)
259
251
  # @return [String] Formatted remark
260
252
  def format_preamble_remark(remark, _indent_str = "")
261
- # Handle both String (legacy) and RemarkInfo
262
- if remark.is_a?(Model::RemarkInfo)
263
- text = remark.text
264
-
265
- # Include tag if present
266
- text = "\"#{remark.tag}\" #{text}" if remark.tagged?
267
-
268
- # Use format from RemarkInfo
269
- if remark.tail?
270
- "-- #{text}"
271
- elsif text.include?("\n")
272
- # Embedded remark - always use embedded format for preamble
273
- ["(*", text,
274
- "*)"].join("\n")
275
- else
276
- "(* #{text} *)"
277
- end
278
- elsif remark.include?("\n")
279
- # Legacy string handling
280
- ["(*", remark,
253
+ return "" unless remark.is_a?(Model::RemarkInfo)
254
+
255
+ text = remark.text
256
+ return "" if text.nil? || text.empty?
257
+
258
+ # Include tag if present
259
+ text = "\"#{remark.tag}\" #{text}" if remark.tagged?
260
+
261
+ # Use format from RemarkInfo
262
+ if remark.tail?
263
+ "-- #{text}"
264
+ elsif text.include?("\n")
265
+ # Embedded remark - always use embedded format for preamble
266
+ ["(*", text,
281
267
  "*)"].join("\n")
282
268
  else
283
- "(* #{remark} *)"
269
+ "(* #{text} *)"
284
270
  end
285
271
  end
286
272
 
@@ -307,18 +293,53 @@ module Expressir
307
293
  def format_repository(node)
308
294
  result = []
309
295
 
310
- # Add preamble if source has remarks
311
- # Use begin/rescue for duck typing (RBS-safe)
312
- source_remarks = begin
313
- node.source_remarks if node.is_a?(Model::Repository)
314
- rescue NoMethodError
315
- nil
296
+ # Format files if present
297
+ node.files&.each do |exp_file|
298
+ file_output = format_exp_file(exp_file)
299
+ result << file_output if file_output && !file_output.empty?
300
+ end
301
+
302
+ # Handle schemas directly added to repository (not via files)
303
+ direct_schemas = node.schemas.select do |s|
304
+ # Schema is direct if it's not in any file
305
+ node.files.nil? || node.files.none? { |f| f.schemas&.include?(s) }
316
306
  end
317
- if source_remarks
318
- result.concat(format_preamble(source_remarks))
307
+
308
+ if direct_schemas.any?
309
+ # Add preamble if repository has untagged remarks
310
+ if node.untagged_remarks && !node.untagged_remarks.empty?
311
+ result.concat(format_preamble(node.untagged_remarks))
312
+ end
313
+
314
+ # Add provenance
315
+ provenance = format_provenance
316
+ result << provenance if provenance
317
+ result << "" if provenance
318
+
319
+ # Add schemas
320
+ schemas_output = direct_schemas.map { |x| format(x) }.join("\n\n")
321
+ result << schemas_output if schemas_output
322
+ elsif result.empty?
323
+ # Empty repository - just add provenance
324
+ provenance = format_provenance
325
+ result << provenance if provenance
326
+ end
327
+
328
+ result.any? ? "#{result.join("\n")}\n" : ""
329
+ end
330
+
331
+ # Format ExpFile with file-level preamble
332
+ # @param node [Model::ExpFile] ExpFile node
333
+ # @return [String] Formatted file
334
+ def format_exp_file(node)
335
+ result = []
336
+
337
+ # Add file-level preamble if present
338
+ if node.untagged_remarks && !node.untagged_remarks.empty?
339
+ result.concat(format_preamble(node.untagged_remarks))
319
340
  end
320
341
 
321
- # Add provenance
342
+ # Add provenance (only for first file or single file)
322
343
  provenance = format_provenance
323
344
  result << provenance if provenance
324
345
  result << "" if provenance
@@ -327,7 +348,7 @@ module Expressir
327
348
  schemas = node.schemas&.map { |x| format(x) }&.join("\n\n")
328
349
  result << schemas if schemas
329
350
 
330
- "#{result.join("\n")}\n"
351
+ result.empty? ? "" : "#{result.join("\n")}\n"
331
352
  end
332
353
 
333
354
  # Format a block of constants with aligned colons and assignments