expressir 2.1.31 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/docs.yml +3 -2
- data/.github/workflows/release.yml +6 -0
- data/.rubocop_todo.yml +106 -92
- data/Gemfile +1 -1
- data/README.adoc +372 -1
- data/docs/_guides/formatter/formatter-architecture.adoc +401 -0
- data/docs/_guides/ruby-api/parsing-files.adoc +1 -1
- data/docs/_pages/parsers.adoc +31 -5
- data/docs/lychee.toml +3 -0
- data/expressir.gemspec +3 -2
- data/lib/expressir/benchmark.rb +6 -6
- data/lib/expressir/cli.rb +9 -0
- data/lib/expressir/commands/base.rb +2 -9
- data/lib/expressir/commands/format.rb +30 -0
- data/lib/expressir/commands/package.rb +92 -87
- data/lib/expressir/commands/validate_ascii.rb +2 -4
- data/lib/expressir/commands/validate_load.rb +8 -5
- data/lib/expressir/coverage.rb +15 -11
- data/lib/expressir/errors.rb +115 -0
- data/lib/expressir/express/builder.rb +350 -0
- data/lib/expressir/express/builders/attribute_decl_builder.rb +38 -0
- data/lib/expressir/express/builders/built_in_builder.rb +88 -0
- data/lib/expressir/express/builders/constant_builder.rb +115 -0
- data/lib/expressir/express/builders/declaration_builder.rb +24 -0
- data/lib/expressir/express/builders/derive_clause_builder.rb +16 -0
- data/lib/expressir/express/builders/derived_attr_builder.rb +28 -0
- data/lib/expressir/express/builders/domain_rule_builder.rb +21 -0
- data/lib/expressir/express/builders/entity_decl_builder.rb +108 -0
- data/lib/expressir/express/builders/explicit_attr_builder.rb +52 -0
- data/lib/expressir/express/builders/expression_builder.rb +453 -0
- data/lib/expressir/express/builders/function_decl_builder.rb +84 -0
- data/lib/expressir/express/builders/helpers.rb +148 -0
- data/lib/expressir/express/builders/interface_builder.rb +171 -0
- data/lib/expressir/express/builders/inverse_attr_builder.rb +45 -0
- data/lib/expressir/express/builders/inverse_attr_type_builder.rb +36 -0
- data/lib/expressir/express/builders/inverse_clause_builder.rb +16 -0
- data/lib/expressir/express/builders/literal_builder.rb +107 -0
- data/lib/expressir/express/builders/procedure_decl_builder.rb +80 -0
- data/lib/expressir/express/builders/qualifier_builder.rb +128 -0
- data/lib/expressir/express/builders/reference_builder.rb +27 -0
- data/lib/expressir/express/builders/rule_decl_builder.rb +95 -0
- data/lib/expressir/express/builders/schema_body_decl_builder.rb +22 -0
- data/lib/expressir/express/builders/schema_decl_builder.rb +62 -0
- data/lib/expressir/express/builders/schema_version_builder.rb +40 -0
- data/lib/expressir/express/builders/simple_id_builder.rb +26 -0
- data/lib/expressir/express/builders/statement_builder.rb +250 -0
- data/lib/expressir/express/builders/subtype_constraint_builder.rb +188 -0
- data/lib/expressir/express/builders/syntax_builder.rb +19 -0
- data/lib/expressir/express/builders/token_builder.rb +15 -0
- data/lib/expressir/express/builders/type_builder.rb +264 -0
- data/lib/expressir/express/builders/type_decl_builder.rb +32 -0
- data/lib/expressir/express/builders/unique_clause_builder.rb +22 -0
- data/lib/expressir/express/builders/unique_rule_builder.rb +36 -0
- data/lib/expressir/express/builders/where_clause_builder.rb +22 -0
- data/lib/expressir/express/builders.rb +43 -0
- data/lib/expressir/express/error.rb +18 -2
- data/lib/expressir/express/formatter.rb +18 -1508
- data/lib/expressir/express/formatters/data_types_formatter.rb +317 -0
- data/lib/expressir/express/formatters/declarations_formatter.rb +689 -0
- data/lib/expressir/express/formatters/expressions_formatter.rb +160 -0
- data/lib/expressir/express/formatters/literals_formatter.rb +46 -0
- data/lib/expressir/express/formatters/references_formatter.rb +42 -0
- data/lib/expressir/express/formatters/remark_formatter.rb +296 -0
- data/lib/expressir/express/formatters/statements_formatter.rb +224 -0
- data/lib/expressir/express/formatters/supertype_expressions_formatter.rb +48 -0
- data/lib/expressir/express/parser.rb +129 -14
- data/lib/expressir/express/pretty_formatter.rb +624 -0
- data/lib/expressir/express/remark_attacher.rb +1155 -0
- data/lib/expressir/express/resolve_references_model_visitor.rb +1 -0
- data/lib/expressir/express/streaming_builder.rb +467 -0
- data/lib/expressir/express/transformer/remark_handling.rb +196 -0
- data/lib/expressir/model/identifier.rb +1 -1
- data/lib/expressir/model/model_element.rb +30 -2
- data/lib/expressir/model/remark_info.rb +51 -0
- data/lib/expressir/model/search_engine.rb +58 -9
- data/lib/expressir/version.rb +1 -1
- data/lib/expressir.rb +6 -4
- metadata +71 -7
- data/lib/expressir/express/visitor.rb +0 -2815
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
== Formatter architecture
|
|
2
|
+
|
|
3
|
+
=== General
|
|
4
|
+
|
|
5
|
+
The Expressir formatter uses a modular architecture that separates formatting
|
|
6
|
+
concerns into focused, composable modules. This design improves maintainability,
|
|
7
|
+
testability, and extensibility while preserving full backward compatibility with
|
|
8
|
+
existing code.
|
|
9
|
+
|
|
10
|
+
The formatter architecture follows object-oriented design principles with clear
|
|
11
|
+
separation of concerns. Each module handles a specific aspect of EXPRESS
|
|
12
|
+
formatting, and they work together through Ruby's module inclusion mechanism.
|
|
13
|
+
|
|
14
|
+
The concept of Profiles is used to define different formatting styles or
|
|
15
|
+
conventions. The base `Formatter` class provides standard formatting, while
|
|
16
|
+
specialized formatters like `PrettyFormatter` extend it with additional features
|
|
17
|
+
and ELF compliance.
|
|
18
|
+
|
|
19
|
+
=== Feature
|
|
20
|
+
|
|
21
|
+
==== Remark preservation
|
|
22
|
+
|
|
23
|
+
Expressir fully preserves EXPRESS remarks (comments) during parsing and formatting, maintaining them in their original positions:
|
|
24
|
+
|
|
25
|
+
==== Preamble remarks
|
|
26
|
+
|
|
27
|
+
Remarks between a scope declaration and its first child are preserved as preamble remarks:
|
|
28
|
+
|
|
29
|
+
[source,express]
|
|
30
|
+
----
|
|
31
|
+
SCHEMA example;
|
|
32
|
+
-- This is a preamble remark
|
|
33
|
+
-- It appears after SCHEMA but before declarations
|
|
34
|
+
|
|
35
|
+
ENTITY person;
|
|
36
|
+
-- Entity preamble remark
|
|
37
|
+
name : STRING;
|
|
38
|
+
END_ENTITY;
|
|
39
|
+
|
|
40
|
+
END_SCHEMA;
|
|
41
|
+
----
|
|
42
|
+
|
|
43
|
+
==== Inline tail remarks
|
|
44
|
+
|
|
45
|
+
Remarks on the same line as attribute or enumeration item declarations:
|
|
46
|
+
|
|
47
|
+
[source,express]
|
|
48
|
+
----
|
|
49
|
+
ENTITY person;
|
|
50
|
+
name : STRING; -- Inline remark for name attribute
|
|
51
|
+
age : INTEGER; -- Inline remark for age attribute
|
|
52
|
+
END_ENTITY;
|
|
53
|
+
|
|
54
|
+
TYPE status = ENUMERATION OF
|
|
55
|
+
(active, -- Active status
|
|
56
|
+
inactive, -- Inactive status
|
|
57
|
+
pending); -- Pending status
|
|
58
|
+
END_TYPE;
|
|
59
|
+
----
|
|
60
|
+
|
|
61
|
+
==== END_* scope remarks
|
|
62
|
+
|
|
63
|
+
Remarks on END_TYPE, END_ENTITY, END_SCHEMA, etc. lines:
|
|
64
|
+
|
|
65
|
+
[source,express]
|
|
66
|
+
----
|
|
67
|
+
TYPE status = ENUMERATION OF
|
|
68
|
+
(active,
|
|
69
|
+
inactive);
|
|
70
|
+
END_TYPE; -- Status enumeration type
|
|
71
|
+
|
|
72
|
+
ENTITY person;
|
|
73
|
+
name : STRING;
|
|
74
|
+
END_ENTITY; -- Person entity
|
|
75
|
+
|
|
76
|
+
END_SCHEMA; -- schema_name
|
|
77
|
+
----
|
|
78
|
+
|
|
79
|
+
==== Unicode support
|
|
80
|
+
|
|
81
|
+
All remark types support full Unicode content:
|
|
82
|
+
|
|
83
|
+
[source,express]
|
|
84
|
+
----
|
|
85
|
+
SCHEMA test;
|
|
86
|
+
-- 日本語、中文、한글 in remarks
|
|
87
|
+
|
|
88
|
+
ENTITY person;
|
|
89
|
+
name : STRING; -- Name in Japanese: 名前
|
|
90
|
+
END_ENTITY;
|
|
91
|
+
|
|
92
|
+
END_SCHEMA; -- test
|
|
93
|
+
----
|
|
94
|
+
|
|
95
|
+
For implementation details, see link:docs/ARCHITECTURE.md#remark-attachment-system[Remark Attachment System].
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
=== Module organization
|
|
99
|
+
|
|
100
|
+
The formatter consists of a main `Formatter` class that includes specialized
|
|
101
|
+
formatting modules, each responsible for a distinct category of EXPRESS language
|
|
102
|
+
constructs.
|
|
103
|
+
|
|
104
|
+
==== Formatter modules
|
|
105
|
+
|
|
106
|
+
RemarkFormatter:: Handles formatting of remarks (comments) in all forms
|
|
107
|
+
+
|
|
108
|
+
[example]
|
|
109
|
+
====
|
|
110
|
+
* Embedded remarks: `(* comment *)`
|
|
111
|
+
* Tail remarks: `-- comment`
|
|
112
|
+
* Tagged remarks with identifiers
|
|
113
|
+
* Preamble remarks before first declarations
|
|
114
|
+
* END_* scope remarks on closing statements
|
|
115
|
+
====
|
|
116
|
+
|
|
117
|
+
RemarkItemFormatter:: Formats individual remark items and remark metadata
|
|
118
|
+
+
|
|
119
|
+
[example]
|
|
120
|
+
====
|
|
121
|
+
Handles the internal structure of remark items, including tags, format specification, and text content.
|
|
122
|
+
====
|
|
123
|
+
|
|
124
|
+
LiteralsFormatter:: Formats literal values (strings, numbers, booleans, binary)
|
|
125
|
+
+
|
|
126
|
+
[example]
|
|
127
|
+
====
|
|
128
|
+
[source,express]
|
|
129
|
+
----
|
|
130
|
+
'string literal'
|
|
131
|
+
123
|
|
132
|
+
3.14
|
|
133
|
+
TRUE
|
|
134
|
+
%10101011
|
|
135
|
+
----
|
|
136
|
+
====
|
|
137
|
+
|
|
138
|
+
ReferencesFormatter:: Formats references to entities, attributes, and other elements
|
|
139
|
+
+
|
|
140
|
+
[example]
|
|
141
|
+
====
|
|
142
|
+
[source,express]
|
|
143
|
+
----
|
|
144
|
+
entity_ref
|
|
145
|
+
entity_ref.attribute_ref
|
|
146
|
+
entity_ref[index]
|
|
147
|
+
----
|
|
148
|
+
====
|
|
149
|
+
|
|
150
|
+
SupertypeExpressionsFormatter:: Formats supertype constraint expressions
|
|
151
|
+
+
|
|
152
|
+
[example]
|
|
153
|
+
====
|
|
154
|
+
[source,express]
|
|
155
|
+
----
|
|
156
|
+
SUPERTYPE OF (ONEOF(subtype1, subtype2))
|
|
157
|
+
ABSTRACT SUPERTYPE OF (subtype1 AND subtype2)
|
|
158
|
+
----
|
|
159
|
+
====
|
|
160
|
+
|
|
161
|
+
StatementsFormatter:: Formats procedural statements (assignment, if, case, repeat, etc.)
|
|
162
|
+
+
|
|
163
|
+
[example]
|
|
164
|
+
====
|
|
165
|
+
[source,express]
|
|
166
|
+
----
|
|
167
|
+
IF condition THEN
|
|
168
|
+
statement;
|
|
169
|
+
END_IF;
|
|
170
|
+
----
|
|
171
|
+
====
|
|
172
|
+
|
|
173
|
+
ExpressionsFormatter:: Formats expressions (binary, unary, function calls, queries)
|
|
174
|
+
+
|
|
175
|
+
[example]
|
|
176
|
+
====
|
|
177
|
+
[source,express]
|
|
178
|
+
----
|
|
179
|
+
a + b * c
|
|
180
|
+
QUERY(x <* entity | condition)
|
|
181
|
+
entity_constructor(arg1, arg2)
|
|
182
|
+
----
|
|
183
|
+
====
|
|
184
|
+
|
|
185
|
+
DataTypesFormatter:: Formats data type declarations (INTEGER, STRING, ENUMERATION, SELECT, etc.)
|
|
186
|
+
+
|
|
187
|
+
[example]
|
|
188
|
+
====
|
|
189
|
+
[source,express]
|
|
190
|
+
----
|
|
191
|
+
STRING(255)
|
|
192
|
+
ENUMERATION OF (red, green, blue)
|
|
193
|
+
SELECT (type1, type2, type3)
|
|
194
|
+
----
|
|
195
|
+
====
|
|
196
|
+
|
|
197
|
+
DeclarationsFormatter:: Formats declarations (ENTITY, TYPE, FUNCTION, SCHEMA, etc.)
|
|
198
|
+
+
|
|
199
|
+
[example]
|
|
200
|
+
====
|
|
201
|
+
[source,express]
|
|
202
|
+
----
|
|
203
|
+
ENTITY person;
|
|
204
|
+
name : STRING;
|
|
205
|
+
END_ENTITY;
|
|
206
|
+
----
|
|
207
|
+
====
|
|
208
|
+
|
|
209
|
+
=== RemarkInfo model
|
|
210
|
+
|
|
211
|
+
==== General
|
|
212
|
+
|
|
213
|
+
Remarks were previously represented as plain strings, which lost important formatting information. The [`RemarkInfo`](lib/expressir/model/remark_info.rb:6) class properly models remarks with their complete metadata.
|
|
214
|
+
|
|
215
|
+
==== Attributes
|
|
216
|
+
|
|
217
|
+
The `RemarkInfo` class has three attributes:
|
|
218
|
+
|
|
219
|
+
`text`:: The remark content (String)
|
|
220
|
+
|
|
221
|
+
`format`:: The remark format: 'tail' or 'embedded' (String)
|
|
222
|
+
|
|
223
|
+
`tag`:: Optional tag for associating the remark with specific items (String or nil)
|
|
224
|
+
|
|
225
|
+
==== Methods
|
|
226
|
+
|
|
227
|
+
`tail?`:: Returns true if the remark uses tail format (`-- comment`)
|
|
228
|
+
|
|
229
|
+
`embedded?`:: Returns true if the remark uses embedded format (`(* comment *)`)
|
|
230
|
+
|
|
231
|
+
`tagged?`:: Returns true if the remark has an associated tag
|
|
232
|
+
|
|
233
|
+
`to_s`:: Returns the remark text for backward compatibility
|
|
234
|
+
|
|
235
|
+
==== Benefits over plain strings
|
|
236
|
+
|
|
237
|
+
Type safety:: Explicit format information prevents format confusion
|
|
238
|
+
|
|
239
|
+
Preservation:: Original format is maintained through parse/format cycles
|
|
240
|
+
|
|
241
|
+
Extensibility:: Easy to add metadata (tags, positions, etc.) without breaking existing code
|
|
242
|
+
|
|
243
|
+
Clarity:: Code explicitly shows whether a remark is tail or embedded
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
=== Using the base formatter
|
|
247
|
+
|
|
248
|
+
The base [`Formatter`](lib/expressir/express/formatter.rb:13) class provides
|
|
249
|
+
standard EXPRESS formatting with fixed 2-space indentation.
|
|
250
|
+
|
|
251
|
+
.Format a repository
|
|
252
|
+
[example]
|
|
253
|
+
====
|
|
254
|
+
[source,ruby]
|
|
255
|
+
----
|
|
256
|
+
# Parse an EXPRESS schema
|
|
257
|
+
repository = Expressir::Express::Parser.from_file("schema.exp")
|
|
258
|
+
|
|
259
|
+
# Format to string
|
|
260
|
+
formatted = Expressir::Express::Formatter.format(repository)
|
|
261
|
+
puts formatted
|
|
262
|
+
|
|
263
|
+
# Or create instance for custom options
|
|
264
|
+
formatter = Expressir::Express::Formatter.new(no_remarks: true)
|
|
265
|
+
formatted = formatter.format(repository)
|
|
266
|
+
----
|
|
267
|
+
====
|
|
268
|
+
|
|
269
|
+
.Format without remarks
|
|
270
|
+
[example]
|
|
271
|
+
====
|
|
272
|
+
[source,ruby]
|
|
273
|
+
----
|
|
274
|
+
# Useful for generating clean schemas without documentation
|
|
275
|
+
formatter = Expressir::Express::Formatter.new(no_remarks: true)
|
|
276
|
+
clean_schema = formatter.format(repository)
|
|
277
|
+
----
|
|
278
|
+
====
|
|
279
|
+
|
|
280
|
+
=== Using ELF PrettyFormatter
|
|
281
|
+
|
|
282
|
+
The [`PrettyFormatter`](lib/expressir/express/pretty_formatter.rb:7) extends the
|
|
283
|
+
base `Formatter` with ELF (EXPRESS Language Foundation) compliance and
|
|
284
|
+
additional features.
|
|
285
|
+
|
|
286
|
+
See the <<_pretty_print_with_elf_compliance,Pretty print with ELF compliance>>
|
|
287
|
+
section for detailed usage examples and configuration options.
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
=== Extending the formatter
|
|
291
|
+
|
|
292
|
+
==== Creating a custom formatter
|
|
293
|
+
|
|
294
|
+
You can create custom formatters by extending `Formatter` or any class that
|
|
295
|
+
inherits from it.
|
|
296
|
+
|
|
297
|
+
.Custom formatter with specific behavior
|
|
298
|
+
[example]
|
|
299
|
+
====
|
|
300
|
+
[source,ruby]
|
|
301
|
+
----
|
|
302
|
+
class MyCustomFormatter < Expressir::Express::Formatter
|
|
303
|
+
# Override specific formatting methods
|
|
304
|
+
def format_declarations_entity(node)
|
|
305
|
+
# Custom entity formatting logic
|
|
306
|
+
super(node) # Or completely custom implementation
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
# Override indentation
|
|
310
|
+
def indent(str)
|
|
311
|
+
return if str.nil?
|
|
312
|
+
|
|
313
|
+
# Use 3 spaces instead of 2
|
|
314
|
+
indent_str = " "
|
|
315
|
+
str.split("\n").map { |x| "#{indent_str}#{x}" }.join("\n")
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# Use the custom formatter
|
|
320
|
+
formatter = MyCustomFormatter.new
|
|
321
|
+
formatted = formatter.format(repository)
|
|
322
|
+
----
|
|
323
|
+
====
|
|
324
|
+
|
|
325
|
+
==== Adding new formatter modules
|
|
326
|
+
|
|
327
|
+
To add a new formatting module:
|
|
328
|
+
|
|
329
|
+
. Create module in `lib/expressir/express/formatters/`
|
|
330
|
+
. Define private formatting methods
|
|
331
|
+
. Include module in `Formatter` class
|
|
332
|
+
. Add tests in `spec/expressir/express/formatters/`
|
|
333
|
+
|
|
334
|
+
.Example: Creating a new formatter module
|
|
335
|
+
[example]
|
|
336
|
+
====
|
|
337
|
+
[source,ruby]
|
|
338
|
+
----
|
|
339
|
+
# lib/expressir/express/formatters/my_formatter.rb
|
|
340
|
+
module Expressir
|
|
341
|
+
module Express
|
|
342
|
+
module MyFormatter
|
|
343
|
+
private
|
|
344
|
+
|
|
345
|
+
def format_my_construct(node)
|
|
346
|
+
# Formatting logic here
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# In lib/expressir/express/formatter.rb
|
|
353
|
+
require_relative "formatters/my_formatter"
|
|
354
|
+
|
|
355
|
+
class Formatter
|
|
356
|
+
include MyFormatter
|
|
357
|
+
# ... other includes ...
|
|
358
|
+
end
|
|
359
|
+
----
|
|
360
|
+
====
|
|
361
|
+
|
|
362
|
+
=== Design decisions
|
|
363
|
+
|
|
364
|
+
==== Why modules instead of inheritance
|
|
365
|
+
|
|
366
|
+
The formatter uses module composition instead of class inheritance because:
|
|
367
|
+
|
|
368
|
+
Separation of concerns:: Each module handles one category of formatting
|
|
369
|
+
+
|
|
370
|
+
Each formatter module is focused on a single responsibility (remarks, literals,
|
|
371
|
+
expressions, etc.), making the code easier to understand and maintain.
|
|
372
|
+
|
|
373
|
+
Composability:: Modules can be mixed and matched as needed
|
|
374
|
+
+
|
|
375
|
+
Different formatters can include only the modules they need, or override
|
|
376
|
+
specific modules without affecting others.
|
|
377
|
+
|
|
378
|
+
Testability:: Each module can be tested independently
|
|
379
|
+
+
|
|
380
|
+
Unit tests can focus on individual modules without needing to set up the entire
|
|
381
|
+
formatter.
|
|
382
|
+
|
|
383
|
+
Maintainability:: Changes to one area don't affect others
|
|
384
|
+
+
|
|
385
|
+
Bug fixes or enhancements to one formatter module don't risk breaking other
|
|
386
|
+
formatting logic.
|
|
387
|
+
|
|
388
|
+
==== Why RemarkInfo instead of strings
|
|
389
|
+
|
|
390
|
+
The `RemarkInfo` model was introduced to:
|
|
391
|
+
|
|
392
|
+
Preserve format information:: Tail vs embedded format is crucial for round-trip
|
|
393
|
+
formatting
|
|
394
|
+
|
|
395
|
+
Support tags:: Tags associate remarks with specific schema elements
|
|
396
|
+
|
|
397
|
+
Enable future extensions:: Easy to add line numbers, positions, or other
|
|
398
|
+
metadata
|
|
399
|
+
|
|
400
|
+
Improve type safety:: Explicit object type prevents formatting errors
|
|
401
|
+
|
|
@@ -250,7 +250,7 @@ end
|
|
|
250
250
|
The exception provides:
|
|
251
251
|
|
|
252
252
|
`filename`:: The file that failed to parse
|
|
253
|
-
`parse_failure_cause`:: The underlying
|
|
253
|
+
`parse_failure_cause`:: The underlying Parsanol parse error
|
|
254
254
|
`message`:: Human-readable error description
|
|
255
255
|
|
|
256
256
|
=== Other common errors
|
data/docs/_pages/parsers.adoc
CHANGED
|
@@ -38,8 +38,8 @@ Expressir uses a multi-stage parsing pipeline:
|
|
|
38
38
|
│
|
|
39
39
|
▼
|
|
40
40
|
┌──────────┐
|
|
41
|
-
│
|
|
42
|
-
│ Grammar
|
|
41
|
+
│ Parsanol │ (PEG parser)
|
|
42
|
+
│ Grammar │
|
|
43
43
|
└────┬─────┘
|
|
44
44
|
│
|
|
45
45
|
▼
|
|
@@ -71,9 +71,35 @@ Expressir uses a multi-stage parsing pipeline:
|
|
|
71
71
|
└────────────────────┘
|
|
72
72
|
----
|
|
73
73
|
|
|
74
|
+
==== Backend: Parsanol
|
|
75
|
+
|
|
76
|
+
Expressir uses the **Parsanol** parser backend:
|
|
77
|
+
|
|
78
|
+
**Parsanol (High-Performance Rust)**::
|
|
79
|
+
* High-performance Rust backend
|
|
80
|
+
* 18-44x faster parsing
|
|
81
|
+
* 99.5% fewer allocations
|
|
82
|
+
* Supports source position tracking (Slice)
|
|
83
|
+
* Available automatically when Parsanol gem is installed
|
|
84
|
+
|
|
85
|
+
Expressir automatically uses Parsanol when available:
|
|
86
|
+
|
|
87
|
+
[source,ruby]
|
|
88
|
+
----
|
|
89
|
+
# Automatically uses native Rust parser (Parsanol)
|
|
90
|
+
repo = Expressir::Express::Parser.from_file("geometry.exp")
|
|
91
|
+
|
|
92
|
+
# Check if native parser is available
|
|
93
|
+
if defined?(Parsanol::Native) && Parsanol::Native.available?
|
|
94
|
+
# Using Rust parser - 20-30x faster
|
|
95
|
+
end
|
|
96
|
+
----
|
|
97
|
+
|
|
98
|
+
See the link:https://github.com/parsanol/parsanol-ruby[Parsanol documentation] for performance details.
|
|
99
|
+
|
|
74
100
|
==== Stage 1: Lexical Analysis and Parsing
|
|
75
101
|
|
|
76
|
-
Expressir uses **
|
|
102
|
+
Expressir uses **Parsanol**, a high-performance Parsing Expression Grammar (PEG) parser:
|
|
77
103
|
|
|
78
104
|
[source,ruby]
|
|
79
105
|
----
|
|
@@ -87,7 +113,7 @@ rule(:entityHead) do
|
|
|
87
113
|
end
|
|
88
114
|
----
|
|
89
115
|
|
|
90
|
-
**
|
|
116
|
+
**Parsanol advantages**:
|
|
91
117
|
|
|
92
118
|
* **Pure Ruby**: No external dependencies
|
|
93
119
|
* **Composable rules**: Complex grammars from simple parts
|
|
@@ -677,7 +703,7 @@ link:../guides/cli/benchmark-performance.html[Benchmark and optimize]
|
|
|
677
703
|
|
|
678
704
|
=== Bibliography
|
|
679
705
|
|
|
680
|
-
* https://github.com/
|
|
706
|
+
* https://github.com/parsanon/parsanon-ruby[Parsanol] - High-performance PEG parser for Ruby
|
|
681
707
|
* https://en.wikipedia.org/wiki/Parsing_expression_grammar[PEG on Wikipedia] - Understanding PEG parsers
|
|
682
708
|
* link:express-language.html[EXPRESS Language] - Understanding what is being parsed
|
|
683
709
|
* link:data-model.html[Data Model] - Understanding the parsing result
|
data/docs/lychee.toml
CHANGED
|
@@ -52,6 +52,9 @@ exclude = [
|
|
|
52
52
|
"https://twitter\\.com/.*",
|
|
53
53
|
"https://x\\.com/.*",
|
|
54
54
|
|
|
55
|
+
# Self-referential documentation links (site under development)
|
|
56
|
+
"https://www\\.lutaml\\.org/expressir/.*",
|
|
57
|
+
|
|
55
58
|
# External sites with broken or moved URLs
|
|
56
59
|
"https://www\\.nist\\.gov/.*",
|
|
57
60
|
"https://www\\.steptools\\.com/.*",
|
data/expressir.gemspec
CHANGED
|
@@ -23,7 +23,7 @@ Gem::Specification.new do |spec|
|
|
|
23
23
|
spec.metadata["changelog_uri"] = "https://github.com/lutaml/expressir/releases"
|
|
24
24
|
spec.metadata["rubygems_mfa_required"] = "true"
|
|
25
25
|
|
|
26
|
-
spec.required_ruby_version = Gem::Requirement.new(">= 3.
|
|
26
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 3.2.0")
|
|
27
27
|
|
|
28
28
|
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
|
29
29
|
`git ls-files -z`.split("\x0").reject do |f|
|
|
@@ -32,13 +32,14 @@ Gem::Specification.new do |spec|
|
|
|
32
32
|
end
|
|
33
33
|
|
|
34
34
|
spec.add_dependency "base64"
|
|
35
|
+
spec.add_dependency "benchmark"
|
|
35
36
|
spec.add_dependency "benchmark-ips"
|
|
36
37
|
spec.add_dependency "csv"
|
|
37
38
|
spec.add_dependency "liquid"
|
|
38
39
|
spec.add_dependency "lutaml-model"
|
|
39
40
|
spec.add_dependency "moxml"
|
|
40
41
|
spec.add_dependency "paint"
|
|
41
|
-
spec.add_dependency "
|
|
42
|
+
spec.add_dependency "parsanol", "~> 1.1"
|
|
42
43
|
spec.add_dependency "ruby-progressbar", "~> 1.11"
|
|
43
44
|
spec.add_dependency "rubyzip", "~> 2.3"
|
|
44
45
|
spec.add_dependency "table_tennis"
|
data/lib/expressir/benchmark.rb
CHANGED
|
@@ -30,7 +30,7 @@ module Expressir
|
|
|
30
30
|
end
|
|
31
31
|
|
|
32
32
|
# Calculate objects per second
|
|
33
|
-
if result.
|
|
33
|
+
if result.is_a?(Model::Repository)
|
|
34
34
|
objects_per_second = calculate_objects_per_second(result)
|
|
35
35
|
puts "Objects per second: #{objects_per_second}" if Expressir.configuration.benchmark_verbose?
|
|
36
36
|
end
|
|
@@ -42,7 +42,7 @@ module Expressir
|
|
|
42
42
|
result = yield
|
|
43
43
|
end
|
|
44
44
|
|
|
45
|
-
if result.
|
|
45
|
+
if result.is_a?(Model::Repository)
|
|
46
46
|
objects_per_second = calculate_objects_per_second(result, time.real)
|
|
47
47
|
output_benchmark_result(file, time.real, objects_per_second)
|
|
48
48
|
else
|
|
@@ -157,7 +157,7 @@ module Expressir
|
|
|
157
157
|
puts "Cache write time: #{results[:cache_write_time].round(4)}s"
|
|
158
158
|
puts "Cache read time: #{results[:cache_read_time].round(4)}s"
|
|
159
159
|
|
|
160
|
-
if results[:repository].
|
|
160
|
+
if results[:repository].is_a?(Model::Repository)
|
|
161
161
|
objects = count_objects(results[:repository])
|
|
162
162
|
puts "Total objects: #{objects}"
|
|
163
163
|
puts "Objects per second (parsing): #{(objects / results[:parsing_time]).round(2)}"
|
|
@@ -189,7 +189,7 @@ module Expressir
|
|
|
189
189
|
# @param repository [Object] The repository object
|
|
190
190
|
# @return [Integer] Number of objects
|
|
191
191
|
def count_objects(repository)
|
|
192
|
-
return 0 unless repository.
|
|
192
|
+
return 0 unless repository.is_a?(Model::Repository)
|
|
193
193
|
|
|
194
194
|
count = repository.schemas.size
|
|
195
195
|
|
|
@@ -261,12 +261,12 @@ module Expressir
|
|
|
261
261
|
|
|
262
262
|
# Try to count total objects and schemas
|
|
263
263
|
results.each do |result|
|
|
264
|
-
if result.
|
|
264
|
+
if result.is_a?(Model::Repository)
|
|
265
265
|
schema_count += result.schemas.size
|
|
266
266
|
total_objects += count_objects(result)
|
|
267
267
|
elsif result.is_a?(Array)
|
|
268
268
|
result.each do |r|
|
|
269
|
-
if r.
|
|
269
|
+
if r.is_a?(Model::Declarations::Schema)
|
|
270
270
|
schema_count += 1
|
|
271
271
|
total_objects += 1 # Count the schema itself
|
|
272
272
|
end
|
data/lib/expressir/cli.rb
CHANGED
|
@@ -23,6 +23,15 @@ module Expressir
|
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
desc "format PATH", "pretty print EXPRESS schema located at PATH"
|
|
26
|
+
method_option :profile, type: :string,
|
|
27
|
+
desc: "Formatting profile: 'iso' (ISO/TC 184/SC 4) or 'elf' (ELF Pretty Print)",
|
|
28
|
+
default: "iso"
|
|
29
|
+
method_option :indent, type: :numeric,
|
|
30
|
+
desc: "Indentation width (ELF profile only)",
|
|
31
|
+
default: 4
|
|
32
|
+
method_option :provenance, type: :boolean,
|
|
33
|
+
desc: "Include provenance information (ELF profile only)",
|
|
34
|
+
default: true
|
|
26
35
|
def format(path)
|
|
27
36
|
Commands::Format.new(options).run(path)
|
|
28
37
|
end
|
|
@@ -17,15 +17,8 @@ module Expressir
|
|
|
17
17
|
end
|
|
18
18
|
end
|
|
19
19
|
|
|
20
|
-
def exit_with_error(message,
|
|
21
|
-
|
|
22
|
-
# In test mode, raise an exception instead of exiting
|
|
23
|
-
# This makes it easier to test error cases
|
|
24
|
-
if defined?(@test_mode) && @test_mode
|
|
25
|
-
raise message # Just raise the message as an exception
|
|
26
|
-
else
|
|
27
|
-
exit exit_code
|
|
28
|
-
end
|
|
20
|
+
def exit_with_error(message, _exit_code = 1)
|
|
21
|
+
raise Expressir::CommandError.new(message)
|
|
29
22
|
end
|
|
30
23
|
end
|
|
31
24
|
end
|
|
@@ -3,6 +3,36 @@ module Expressir
|
|
|
3
3
|
class Format < Base
|
|
4
4
|
def run(path)
|
|
5
5
|
repository = Expressir::Express::Parser.from_file(path)
|
|
6
|
+
|
|
7
|
+
profile = options[:profile] || "iso"
|
|
8
|
+
|
|
9
|
+
case profile.downcase
|
|
10
|
+
when "elf"
|
|
11
|
+
format_with_elf_profile(repository)
|
|
12
|
+
when "iso"
|
|
13
|
+
format_with_iso_profile(repository)
|
|
14
|
+
else
|
|
15
|
+
raise Expressir::InvalidOptionError.new(
|
|
16
|
+
"profile",
|
|
17
|
+
profile,
|
|
18
|
+
valid_options: ["iso", "elf"]
|
|
19
|
+
)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def format_with_elf_profile(repository)
|
|
26
|
+
formatter_options = {
|
|
27
|
+
indent: options[:indent] || 4,
|
|
28
|
+
provenance: options.fetch(:provenance, true),
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
formatter = Expressir::Express::PrettyFormatter.new(formatter_options)
|
|
32
|
+
say formatter.format(repository)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def format_with_iso_profile(repository)
|
|
6
36
|
repository.schemas.each do |schema|
|
|
7
37
|
say "\n(* Expressir formatted schema: #{schema.id} *)\n"
|
|
8
38
|
say schema.to_s(no_remarks: true)
|