@algosail/tree-sitter 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/grammar.js CHANGED
@@ -1,6 +1,9 @@
1
1
  /// <reference types="tree-sitter-cli/dsl" />
2
2
  // @ts-check
3
3
 
4
+ const UPPERNAME = /[A-Z][a-zA-Z0-9_]*/
5
+ const LOWERNAME = /[a-z][a-zA-Z0-9_]*/
6
+
4
7
  module.exports = grammar({
5
8
  name: 'sail',
6
9
 
@@ -14,70 +17,76 @@ module.exports = grammar({
14
17
  // After word_def's signature, '(' is ambiguous: body comment vs next
15
18
  // top-level item. Declare as GLR self-conflict so both are explored.
16
19
  [$.word_def],
20
+ // After tag_group_name + type_variables, '(' is ambiguous: doc comment
21
+ // for this tag_group vs a top-level comment that follows it.
22
+ [$.tag_group],
23
+ [$.tag_def],
24
+ [$.map_def],
25
+ [$.map_field],
17
26
  // tag_group contains tag_def children; both can follow an uppercase name
18
27
  [$.tag_def, $.tag_group],
19
28
  // module_alias and type_name are both /[A-Z][a-zA-Z0-9_]*/
20
29
  // [$.type_name, $.module_alias],
21
30
  // type_variable and identifier are both lowercase words
22
- [$.type_variable, $.identifier],
23
31
  ],
24
32
 
25
33
  rules: {
26
34
  source_file: ($) => repeat($._top_level),
27
35
 
28
- _top_level: ($) =>
29
- choice($.comment, $.module_def, $.import_decl, $.tag_group, $.map_def, $.word_def),
36
+ _top_level: ($) => choice($.comment, $.import_decl, $.tag_group, $.map_def, $.word_def),
30
37
 
31
38
  // Comment / doc block: ( any text )
32
- // Intentionally simple no nested-paren support inside comment text.
39
+ // comment_content is recursive: it can contain plain text and/or nested
40
+ // parenthesised groups, so ( use f(x) here ) parses correctly.
33
41
  comment: ($) => seq('(', optional($.comment_content), ')'),
34
- comment_content: ($) => /[^()]+/,
35
-
36
- // Module definition: !ModuleName
37
- module_def: ($) => field('name', $.module_name),
38
- module_name: ($) => /![A-Z][a-zA-Z0-9_]*/,
42
+ comment_content: ($) => repeat1(choice(/[^()]+/, $.comment)),
39
43
 
40
- // Import: |path/or/npm:pkg Alias
44
+ // Import: +path/or/+pkg Alias
41
45
  import_decl: ($) => seq(field('path', $.import_path), field('alias', $.module_alias)),
46
+ import_path: ($) => seq('+', field('url', alias(/[^\s]+/, $.url))),
47
+ module_alias: ($) => seq('~', field('module', alias(UPPERNAME, $.module_ref))),
42
48
 
43
- import_path: ($) => /\|[^\s]+/,
44
-
45
- module_alias: ($) => /~[A-Z][a-zA-Z0-9_]*/,
46
-
47
- // Tag group: &Name typeParam* (>TagCase typeParam*)*
49
+ // Tag group: &Name typeParam* (#TagCase typeParam*)*
48
50
  tag_group: ($) =>
49
- seq(field('name', $.tag_group_name), repeat($.type_variable), repeat($.tag_def)),
50
-
51
- tag_group_name: ($) => /&[A-Z][a-zA-Z0-9_]*/,
52
-
53
- tag_def: ($) => seq(field('name', $.tag_name), repeat($.type_variable)),
54
-
55
- tag_name: ($) => />[A-Z][a-zA-Z0-9_]*/,
56
-
57
- // Map definition: %Name (:field Type)*
58
- map_def: ($) => seq(field('name', $.map_name), repeat($.map_field)),
59
-
60
- map_name: ($) => /%[A-Z][a-zA-Z0-9_]*/,
61
-
62
- map_field: ($) => seq(field('key', $.map_field_name), field('type', $.type_name)),
63
-
64
- map_field_name: ($) => /:[a-z][a-zA-Z0-9_]*/,
51
+ seq(
52
+ field('name_def', $.tag_group_name),
53
+ repeat($.type_variable),
54
+ optional(field('doc', $.comment)),
55
+ repeat($.tag_def),
56
+ ),
57
+ tag_group_name: ($) => seq('&', field('name', alias(UPPERNAME, $.group_ref))),
58
+ tag_def: ($) =>
59
+ seq(
60
+ field('name_def', $.tag_name),
61
+ optional(field('type_param', $.type_variable)),
62
+ optional(field('doc', $.comment)),
63
+ ),
64
+ tag_name: ($) => seq('#', field('name', alias(UPPERNAME, $.tag_ref))),
65
+
66
+ // Map definition: %Name (.field Type)*
67
+ map_def: ($) =>
68
+ seq(field('name_def', $.map_name), optional(field('doc', $.comment)), repeat($.map_field)),
69
+ map_name: ($) => seq('$', field('name', alias(UPPERNAME, $.map_ref))),
70
+ map_field: ($) =>
71
+ seq(
72
+ field('key', $.map_field_name),
73
+ field('type', $.type_name),
74
+ optional(field('doc', $.comment)),
75
+ ),
76
+ map_field_name: ($) => seq('.', field('name', alias(LOWERNAME, $.field_ref))),
65
77
 
66
78
  // Word definition: @name ( sig ) expr*
67
79
  // Signature is required per the spec ("Word definition must have a signature").
68
80
  // prec.right makes the body's repeat greedy: prefer consuming '(' as a body
69
81
  // comment rather than ending the word_def early.
70
82
  word_def: ($) =>
71
- prec.right(seq(field('name', $.word_name), field('sig', $.signature), repeat($._expr))),
72
-
73
- word_name: ($) => /@[a-z][a-zA-Z0-9_]*/,
83
+ prec.right(seq(field('name_def', $.word_name), field('sig', $.signature), repeat($._expr))),
84
+ word_name: ($) => seq('@', field('name', alias(LOWERNAME, $.word_ref))),
74
85
 
75
86
  // Signature: ( inputs -- outputs +effects )
76
87
  // The required '--' token is what makes it unambiguous vs a comment.
77
88
  signature: ($) => seq('(', repeat($._sig_item), $.sig_arrow, repeat($._sig_item), ')'),
78
-
79
89
  sig_arrow: ($) => token('--'),
80
-
81
90
  _sig_item: ($) =>
82
91
  choice(
83
92
  $.effect_add,
@@ -96,19 +105,19 @@ module.exports = grammar({
96
105
  sig_quotation: ($) => seq('(', repeat($._sig_item), $.sig_arrow, repeat($._sig_item), ')'),
97
106
 
98
107
  // +IO, +FAIL, etc.
99
- effect_add: ($) => /\+[A-Z][a-zA-Z0-9_]*/,
108
+ effect_add: ($) => seq('+', field('name', alias(UPPERNAME, $.effect_ref))),
100
109
 
101
110
  // -IO, -FAIL, etc. (uppercase after dash avoids matching negative numbers)
102
- effect_remove: ($) => /-[A-Z][a-zA-Z0-9_]*/,
111
+ effect_remove: ($) => seq('-', field('name', alias(UPPERNAME, $.effect_ref))),
103
112
 
104
113
  // ..a, ..row — spread / row-variable in a signature
105
- spread: ($) => /\.\.[a-zA-Z][a-zA-Z0-9_]*/,
114
+ spread: ($) => seq('..', field('name', alias(/[a-zA-Z][a-zA-Z0-9_]*/, $.spread_ref))),
106
115
 
107
116
  // Uppercase type: Int, Str, Maybe, List, etc.
108
- type_name: ($) => /[A-Z][a-zA-Z0-9_]*/,
117
+ type_name: ($) => UPPERNAME,
109
118
 
110
119
  // Lowercase type variable: a, b, elem, etc.
111
- type_variable: ($) => /[a-z][a-zA-Z0-9_]*/,
120
+ type_variable: ($) => LOWERNAME,
112
121
 
113
122
  // Expressions inside word bodies
114
123
  _expr: ($) =>
@@ -117,15 +126,17 @@ module.exports = grammar({
117
126
  $.quotation,
118
127
  $.builtin_word,
119
128
  $.word_call,
120
- $.module_call,
129
+ $.module_word_call,
130
+ $.module_tag_constructor,
131
+ $.module_map_access,
121
132
  $.map_access,
122
133
  $.tag_constructor,
123
134
  $.tag_pattern,
135
+ $.default_pattern,
124
136
  $.slot_push,
125
137
  $.slot_pop,
126
138
  $.raw_string,
127
- $.number,
128
- $.identifier,
139
+ $.raw_value,
129
140
  ),
130
141
 
131
142
  // [ expr* ] — quotation (anonymous code block or list literal)
@@ -161,33 +172,69 @@ module.exports = grammar({
161
172
  ),
162
173
 
163
174
  // /wordName — call a locally defined word
164
- word_call: ($) => /\/[a-z][a-zA-Z0-9_]*/,
175
+ word_call: ($) => seq('/', field('word', alias(LOWERNAME, $.word_ref))),
176
+
177
+ // ~Module/word — module-qualified word call
178
+ // Broken into named fields so the AST exposes module_ref and word_ref nodes.
179
+ module_word_call: ($) =>
180
+ seq(
181
+ '~',
182
+ field('module', alias(UPPERNAME, $.module_ref)),
183
+ '/',
184
+ field('word', alias(LOWERNAME, $.word_ref)),
185
+ ),
165
186
 
166
- // ~Module/word or ~Module — module-qualified word call
167
- module_call: ($) => /~[A-Z][a-zA-Z0-9_]*(\/[a-zA-Z][a-zA-Z0-9_]*)?/,
187
+ // ~Module#TagName — module-qualified tag constructor
188
+ module_tag_constructor: ($) =>
189
+ seq(
190
+ '~',
191
+ field('module', alias(UPPERNAME, $.module_ref)),
192
+ '#',
193
+ field('tag', alias(UPPERNAME, $.tag_ref)),
194
+ ),
195
+
196
+ // ~Module,Map.field — module-qualified map field accessor / lens
197
+ module_map_access: ($) =>
198
+ seq(
199
+ '~',
200
+ field('module', alias(UPPERNAME, $.module_ref)),
201
+ ',',
202
+ field('map', alias(UPPERNAME, $.map_ref)),
203
+ '.',
204
+ field('field', alias(LOWERNAME, $.field_ref)),
205
+ ),
168
206
 
169
- // *Map/field — map field accessor / lens
170
- map_access: ($) => /\*[A-Z][a-zA-Z0-9_]*\/[a-z][a-zA-Z0-9_]*/,
207
+ // ,Map.field — local map field accessor / lens
208
+ map_access: ($) =>
209
+ seq(
210
+ ',',
211
+ field('map', alias(UPPERNAME, $.map_ref)),
212
+ '.',
213
+ field('field', alias(LOWERNAME, $.field_ref)),
214
+ ),
171
215
 
172
216
  // #TagName — construct a tagged union value
173
- tag_constructor: ($) => /#[A-Z][a-zA-Z0-9_]*/,
217
+ tag_constructor: ($) => seq('#', field('name', alias(UPPERNAME, $.tag_ref))),
174
218
 
175
219
  // _TagName — match/destructure a tag in MATCH
176
- tag_pattern: ($) => /_[A-Z][a-zA-Z0-9_]*/,
220
+ tag_pattern: ($) => seq('_', field('name', alias(UPPERNAME, $.tag_ref))),
221
+
222
+ // _ — match/destructure a default in MATCH
223
+ default_pattern: ($) => token('_'),
177
224
 
178
- // .name — pop the top of the stack into a named local slot
179
- slot_push: ($) => /\.[a-z][a-zA-Z0-9_]*/,
225
+ // :name — pop the top of the stack into a named local slot
226
+ slot_push: ($) => seq(':', field('name', alias(LOWERNAME, $.slot_ref))),
180
227
 
181
- // ,name — push a named local slot back onto the stack
182
- slot_pop: ($) => /,[a-z][a-zA-Z0-9_]*/,
228
+ // ;name — push a named local slot back onto the stack
229
+ slot_pop: ($) => seq(';', field('name', alias(LOWERNAME, $.slot_ref))),
183
230
 
184
231
  // 'raw string literal'
185
232
  raw_string: ($) => /\'[^\']*\'/,
186
233
 
187
- // Numeric literal: integer or decimal
188
- number: ($) => /[0-9]+(\.[0-9]+)?/,
189
-
190
- // Generic bare identifier (raw data tokens, unrecognised lowercase words)
191
- identifier: ($) => /[a-zA-Z_][a-zA-Z0-9_]*/,
234
+ // Catch-all: any non-whitespace sequence that doesn't match a more specific
235
+ // rule. prec(-1) gives it the lowest priority so every other token wins
236
+ // when there is a tie. Structural characters ( ) [ ] are excluded because
237
+ // they are needed by the parser to delimit blocks and comments.
238
+ raw_value: ($) => token(prec(-1, /[^\s\[\]()']+/)),
192
239
  },
193
240
  })
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@algosail/tree-sitter",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "description": "Tree-sitter grammar for the Sail language",
5
5
  "main": "bindings/node",
6
6
  "types": "bindings/node",
@@ -8,7 +8,8 @@
8
8
  "build:wasm": "tree-sitter build --wasm",
9
9
  "build:native": "node-gyp rebuild",
10
10
  "generate": "tree-sitter generate",
11
- "install": "echo 'Skipping native build. Run npm run build:native manually if needed.'"
11
+ "install": "echo 'Skipping native build. Run npm run build:native manually if needed.'",
12
+ "playground": "tree-sitter playground"
12
13
  },
13
14
  "devDependencies": {
14
15
  "node-addon-api": "^8.6.0",