@algosail/tree-sitter 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/grammar.js CHANGED
@@ -1,39 +1,95 @@
1
1
  /// <reference types="tree-sitter-cli/dsl" />
2
2
  // @ts-check
3
3
 
4
- const UPPERNAME = /[A-Z][a-zA-Z0-9_]*/
5
- const LOWERNAME = /[a-z][a-zA-Z0-9_]*/
6
-
7
4
  module.exports = grammar({
8
5
  name: 'sail',
9
6
 
10
7
  extras: ($) => [/\s+/],
11
8
 
12
9
  conflicts: ($) => [
13
- // '(' can start a comment OR a signature tree-sitter resolves by
14
- // looking for '--' inside.
15
- [$.comment, $.signature],
16
- [$.comment, $.sig_quotation],
17
- // After word_def's signature, '(' is ambiguous: body comment vs next
18
- // top-level item. Declare as GLR self-conflict so both are explored.
19
- [$.word_def],
20
- // After tag_group_name + type_variables, '(' is ambiguous: doc comment
21
- // for this tag_group vs a top-level comment that follows it.
22
- [$.tag_group],
23
- [$.tag_def],
24
- [$.map_def],
25
- [$.map_field],
26
- // tag_group contains tag_def children; both can follow an uppercase name
27
- [$.tag_def, $.tag_group],
28
- // module_alias and type_name are both /[A-Z][a-zA-Z0-9_]*/
29
- // [$.type_name, $.module_alias],
30
- // type_variable and identifier are both lowercase words
10
+ // GLR self-conflicts: after the rule's main tokens, '(' is ambiguous
11
+ // it could be the doc comment for THIS node or the start of the NEXT
12
+ // top-level construct. Cannot be resolved by LALR(1).
13
+ [$.group],
14
+ [$.tag],
15
+ [$.map],
16
+ [$.field],
17
+
18
+ // After word_def + sig + optional(doc), a comment is ambiguous:
19
+ // it could be the doc field OR an _expr in the body.
20
+ [$.word, $._expr],
31
21
  ],
32
22
 
33
23
  rules: {
34
24
  source_file: ($) => repeat($._top_level),
25
+ _top_level: ($) => choice($.comment, $.import, $.group, $.map, $.word),
26
+
27
+ // ~Module
28
+ module_def: ($) => /\+[A-Z][a-zA-Z0-9_]*/,
29
+ // ~Module
30
+ module_ref: ($) => /~[A-Z][a-zA-Z0-9_]*/,
31
+ // &Group
32
+ group_def: ($) => /&[A-Z][a-zA-Z0-9_]*/,
33
+ // &Group
34
+ group_ref: ($) => /&[A-Z][a-zA-Z0-9_]*/,
35
+ // #Tag
36
+ tag_def: ($) => /#[A-Z][a-zA-Z0-9_]*/,
37
+ // #Tag
38
+ tag_ref: ($) => /#[A-Z][a-zA-Z0-9_]*/,
39
+ // _Tag
40
+ tag_pattern: ($) => /\_[A-Z][a-zA-Z0-9_]*/,
41
+ // _
42
+ default_pattern: ($) => token('_'),
43
+ // $Map
44
+ map_def: ($) => /\$[A-Z][a-zA-Z0-9_]*/,
45
+ // $Map
46
+ map_ref: ($) => /\$[A-Z][a-zA-Z0-9_]*/,
47
+ // .field
48
+ field_def: ($) => /\.[a-z][a-zA-Z0-9_]*/,
49
+ // $Map.field
50
+ field_ref: ($) => /\$[A-Z][a-zA-Z0-9_]*\.[a-z][a-zA-Z0-9_]*/,
51
+ // @word
52
+ word_def: ($) => /@[a-z][a-zA-Z0-9_]*/,
53
+ // /word
54
+ word_ref: ($) => /\/[a-z][a-zA-Z0-9_]*/,
55
+
56
+ // ~Module&Group
57
+ module_group_ref: ($) => /~[A-Z][a-zA-Z0-9_]*&[A-Z][a-zA-Z0-9_]*/,
58
+ // ~Module#Tag
59
+ module_tag_ref: ($) => /~[A-Z][a-zA-Z0-9_]*#[A-Z][a-zA-Z0-9_]*/,
60
+ // ~Module$Map
61
+ module_map_ref: ($) => /~[A-Z][a-zA-Z0-9_]*\$[A-Z][a-zA-Z0-9_]*/,
62
+ // ~Module$Map.field
63
+ module_field_ref: ($) => /~[A-Z][a-zA-Z0-9_]*\$[A-Z][a-zA-Z0-9_]*\.[a-z][a-zA-Z0-9_]*/,
64
+ // ~Module/word
65
+ module_word_ref: ($) => /~[A-Z][a-zA-Z0-9_]*\/[a-z][a-zA-Z0-9_]*/,
66
+
67
+ // Uppercase type: Int, Str, Maybe, List, etc.
68
+ type: ($) => /[A-Z][a-zA-Z0-9_]*/,
69
+ // Lowercase type variable: a, b, elem, etc.
70
+ type_var: ($) => /[a-z][a-zA-Z0-9_]*/,
71
+ // Lowercase type variable: a, b, elem, etc.
72
+ spread: ($) => /\.\.[a-z][a-zA-Z0-9_]*/,
73
+
74
+ // +Effect
75
+ effect_add: ($) => /\+[A-Z][a-zA-Z0-9_]*/,
76
+ // -Effect
77
+ effect_remove: ($) => /\-[A-Z][a-zA-Z0-9_]*/,
35
78
 
36
- _top_level: ($) => choice($.comment, $.import_decl, $.tag_group, $.map_def, $.word_def),
79
+ // :name
80
+ slot_push: ($) => /:[a-z][a-zA-Z0-9_]*/,
81
+
82
+ // ;name
83
+ slot_pop: ($) => /;[a-z][a-zA-Z0-9_]*/,
84
+
85
+ // 'raw string literal'
86
+ raw_string: ($) => /\'[^\']*\'/,
87
+
88
+ // Catch-all: any non-whitespace sequence that doesn't match a more specific
89
+ // rule. prec(-1) gives it the lowest priority so every other token wins
90
+ // when there is a tie. Structural characters ( ) [ ] are excluded because
91
+ // they are needed by the parser to delimit blocks and comments.
92
+ raw_value: ($) => token(prec(-1, /[^\s\[\]()']+/)),
37
93
 
38
94
  // Comment / doc block: ( any text )
39
95
  // comment_content is recursive: it can contain plain text and/or nested
@@ -41,47 +97,53 @@ module.exports = grammar({
41
97
  comment: ($) => seq('(', optional($.comment_content), ')'),
42
98
  comment_content: ($) => repeat1(choice(/[^()]+/, $.comment)),
43
99
 
44
- // Import: +path/or/+pkg Alias
45
- import_decl: ($) => seq(field('path', $.import_path), field('alias', $.module_alias)),
46
- import_path: ($) => seq('+', field('url', alias(/[^\s]+/, $.url))),
47
- module_alias: ($) => seq('~', field('module', alias(UPPERNAME, $.module_ref))),
100
+ // Import: +Alias path/or/+pkg
101
+ import: ($) => seq(field('module', $.module_def), field('path', $.path)),
102
+ path: ($) => /[^\s]+/,
103
+ // seq('+', field('url', alias(/[^\s]+/, $.url))),
48
104
 
49
105
  // Tag group: &Name typeParam* (#TagCase typeParam*)*
50
- tag_group: ($) =>
106
+ group: ($) =>
51
107
  seq(
52
- field('name_def', $.tag_group_name),
53
- repeat($.type_variable),
108
+ field('def', $.group_def),
109
+ repeat($.type_var),
54
110
  optional(field('doc', $.comment)),
55
- repeat($.tag_def),
111
+ repeat($.tag),
56
112
  ),
57
- tag_group_name: ($) => seq('&', field('name', alias(UPPERNAME, $.group_ref))),
58
- tag_def: ($) =>
113
+ tag: ($) =>
59
114
  seq(
60
- field('name_def', $.tag_name),
61
- optional(field('type_param', $.type_variable)),
115
+ field('def', $.tag_def),
116
+ optional(field('type_param', $.type_var)),
62
117
  optional(field('doc', $.comment)),
63
118
  ),
64
- tag_name: ($) => seq('#', field('name', alias(UPPERNAME, $.tag_ref))),
119
+ group_type: ($) =>
120
+ seq(field('group', choice($.group_ref, $.module_group_ref)), optional($._generic)),
121
+ _generic: ($) => seq('{', field('params', repeat($._generic_content)), '}'),
122
+ _generic_content: ($) => choice($.type, $.group_type, $.map_ref, $.module_map_ref),
65
123
 
66
124
  // Map definition: %Name (.field Type)*
67
- map_def: ($) =>
68
- seq(field('name_def', $.map_name), optional(field('doc', $.comment)), repeat($.map_field)),
69
- map_name: ($) => seq('$', field('name', alias(UPPERNAME, $.map_ref))),
70
- map_field: ($) =>
125
+ map: ($) => seq(field('def', $.map_def), optional(field('doc', $.comment)), repeat($.field)),
126
+ field: ($) =>
71
127
  seq(
72
- field('key', $.map_field_name),
73
- field('type', $.type_name),
128
+ field('key', $.field_def),
129
+ field('type', $._field_types),
74
130
  optional(field('doc', $.comment)),
75
131
  ),
76
- map_field_name: ($) => seq('.', field('name', alias(LOWERNAME, $.field_ref))),
132
+ _field_types: ($) => choice($.type, $.group_type, $.map_ref, $.module_map_ref),
77
133
 
78
134
  // Word definition: @name ( sig ) expr*
79
135
  // Signature is required per the spec ("Word definition must have a signature").
80
136
  // prec.right makes the body's repeat greedy: prefer consuming '(' as a body
81
137
  // comment rather than ending the word_def early.
82
- word_def: ($) =>
83
- prec.right(seq(field('name_def', $.word_name), field('sig', $.signature), repeat($._expr))),
84
- word_name: ($) => seq('@', field('name', alias(LOWERNAME, $.word_ref))),
138
+ word: ($) =>
139
+ prec.right(
140
+ seq(
141
+ field('name_def', $.word_def),
142
+ field('sig', $.signature),
143
+ optional(field('doc', $.comment)),
144
+ repeat($._expr),
145
+ ),
146
+ ),
85
147
 
86
148
  // Signature: ( inputs -- outputs +effects )
87
149
  // The required '--' token is what makes it unambiguous vs a comment.
@@ -92,10 +154,13 @@ module.exports = grammar({
92
154
  $.effect_add,
93
155
  $.effect_remove,
94
156
  $.spread,
95
- $.type_name,
96
- $.type_variable,
157
+ $.type,
158
+ $.type_var,
97
159
  $.sig_list,
98
160
  $.sig_quotation,
161
+ $.group_type,
162
+ $.map_ref,
163
+ $.module_map_ref,
99
164
  ),
100
165
 
101
166
  // [ Type Type ... ] — list / tuple type in a signature
@@ -104,35 +169,20 @@ module.exports = grammar({
104
169
  // ( a b -- c d ) — higher-order function type nested inside a signature
105
170
  sig_quotation: ($) => seq('(', repeat($._sig_item), $.sig_arrow, repeat($._sig_item), ')'),
106
171
 
107
- // +IO, +FAIL, etc.
108
- effect_add: ($) => seq('+', field('name', alias(UPPERNAME, $.effect_ref))),
109
-
110
- // -IO, -FAIL, etc. (uppercase after dash avoids matching negative numbers)
111
- effect_remove: ($) => seq('-', field('name', alias(UPPERNAME, $.effect_ref))),
112
-
113
- // ..a, ..row — spread / row-variable in a signature
114
- spread: ($) => seq('..', field('name', alias(/[a-zA-Z][a-zA-Z0-9_]*/, $.spread_ref))),
115
-
116
- // Uppercase type: Int, Str, Maybe, List, etc.
117
- type_name: ($) => UPPERNAME,
118
-
119
- // Lowercase type variable: a, b, elem, etc.
120
- type_variable: ($) => LOWERNAME,
121
-
122
172
  // Expressions inside word bodies
123
173
  _expr: ($) =>
124
174
  choice(
125
175
  $.comment, // doc / inline comment block
126
176
  $.quotation,
127
177
  $.builtin_word,
128
- $.word_call,
129
- $.module_word_call,
130
- $.module_tag_constructor,
131
- $.module_map_access,
132
- $.map_access,
133
- $.tag_constructor,
178
+ $.word_ref,
179
+ $.module_word_ref,
180
+ $.tag_ref,
181
+ $.module_tag_ref,
134
182
  $.tag_pattern,
135
183
  $.default_pattern,
184
+ $.field_ref,
185
+ $.module_field_ref,
136
186
  $.slot_push,
137
187
  $.slot_pop,
138
188
  $.raw_string,
@@ -170,71 +220,5 @@ module.exports = grammar({
170
220
  'ERROR',
171
221
  ),
172
222
  ),
173
-
174
- // /wordName — call a locally defined word
175
- word_call: ($) => seq('/', field('word', alias(LOWERNAME, $.word_ref))),
176
-
177
- // ~Module/word — module-qualified word call
178
- // Broken into named fields so the AST exposes module_ref and word_ref nodes.
179
- module_word_call: ($) =>
180
- seq(
181
- '~',
182
- field('module', alias(UPPERNAME, $.module_ref)),
183
- '/',
184
- field('word', alias(LOWERNAME, $.word_ref)),
185
- ),
186
-
187
- // ~Module#TagName — module-qualified tag constructor
188
- module_tag_constructor: ($) =>
189
- seq(
190
- '~',
191
- field('module', alias(UPPERNAME, $.module_ref)),
192
- '#',
193
- field('tag', alias(UPPERNAME, $.tag_ref)),
194
- ),
195
-
196
- // ~Module,Map.field — module-qualified map field accessor / lens
197
- module_map_access: ($) =>
198
- seq(
199
- '~',
200
- field('module', alias(UPPERNAME, $.module_ref)),
201
- ',',
202
- field('map', alias(UPPERNAME, $.map_ref)),
203
- '.',
204
- field('field', alias(LOWERNAME, $.field_ref)),
205
- ),
206
-
207
- // ,Map.field — local map field accessor / lens
208
- map_access: ($) =>
209
- seq(
210
- ',',
211
- field('map', alias(UPPERNAME, $.map_ref)),
212
- '.',
213
- field('field', alias(LOWERNAME, $.field_ref)),
214
- ),
215
-
216
- // #TagName — construct a tagged union value
217
- tag_constructor: ($) => seq('#', field('name', alias(UPPERNAME, $.tag_ref))),
218
-
219
- // _TagName — match/destructure a tag in MATCH
220
- tag_pattern: ($) => seq('_', field('name', alias(UPPERNAME, $.tag_ref))),
221
-
222
- // _ — match/destructure a default in MATCH
223
- default_pattern: ($) => token('_'),
224
-
225
- // :name — pop the top of the stack into a named local slot
226
- slot_push: ($) => seq(':', field('name', alias(LOWERNAME, $.slot_ref))),
227
-
228
- // ;name — push a named local slot back onto the stack
229
- slot_pop: ($) => seq(';', field('name', alias(LOWERNAME, $.slot_ref))),
230
-
231
- // 'raw string literal'
232
- raw_string: ($) => /\'[^\']*\'/,
233
-
234
- // Catch-all: any non-whitespace sequence that doesn't match a more specific
235
- // rule. prec(-1) gives it the lowest priority so every other token wins
236
- // when there is a tie. Structural characters ( ) [ ] are excluded because
237
- // they are needed by the parser to delimit blocks and comments.
238
- raw_value: ($) => token(prec(-1, /[^\s\[\]()']+/)),
239
223
  },
240
224
  })
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@algosail/tree-sitter",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "description": "Tree-sitter grammar for the Sail language",
5
5
  "main": "bindings/node",
6
6
  "types": "bindings/node",