@algosail/parser 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/group.js CHANGED
@@ -15,13 +15,14 @@ export const getTagNodes = (rootNode, language) => {
15
15
  }
16
16
 
17
17
  function parseGroup(node) {
18
+ const defNode = field(node, 'def')
18
19
  return {
19
- name: groupDef(field(node, 'def')),
20
+ name: groupDef(defNode),
20
21
  doc: comment(field(node, 'doc')),
21
22
  params: childrenOfType(node, 'type_var').map((n) => n.text),
22
23
  tags: childrenOfType(node, 'tag').map(parseTag),
23
- startPosition: node.startPosition,
24
- endPosition: node.endPosition,
24
+ startPosition: defNode.startPosition,
25
+ endPosition: defNode.endPosition,
25
26
  }
26
27
  }
27
28
 
package/lib/jsDoc.js CHANGED
@@ -12,7 +12,7 @@ export async function parseJsDoc(uri, text, parser) {
12
12
  const query = new Query(jsLanguage, '(comment) @comment')
13
13
  const matches = query.matches(jsTree.rootNode)
14
14
 
15
- const result = { modules: {}, groups: {}, tags: {}, maps: {}, words: [] }
15
+ const result = { modules: {}, groups: {}, tags: {}, maps: {}, words: {} }
16
16
 
17
17
  for (const match of matches) {
18
18
  const commentNode = capture(match.captures, 'comment')
@@ -69,7 +69,10 @@ export async function parseJsDoc(uri, text, parser) {
69
69
  }
70
70
  }
71
71
 
72
- result.words.push(...words)
72
+ result.words = {
73
+ ...result.words,
74
+ ...words,
75
+ }
73
76
  }
74
77
 
75
78
  return result
package/lib/load.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import { readFile, access } from 'node:fs/promises'
2
2
  import { createRequire } from 'node:module'
3
3
  import { dirname, extname, join, resolve } from 'node:path'
4
+ import { fileURLToPath } from 'node:url'
4
5
 
5
6
  function parsePackageImport(packageImport) {
6
7
  if (packageImport.startsWith('@')) {
@@ -72,7 +73,7 @@ export async function resolveModulePath(execPath, packageImport) {
72
73
 
73
74
  const sailPath = resolveExportsCondition(packageJson.exports, path)
74
75
  if (sailPath) {
75
- return { type: 'js', path: join(packageDir, sailPath) }
76
+ return { type: 'sail', path: join(packageDir, sailPath) }
76
77
  }
77
78
 
78
79
  const importId = path === '.' ? name : `${name}/${path.slice(2)}`
@@ -81,7 +82,8 @@ export async function resolveModulePath(execPath, packageImport) {
81
82
  }
82
83
 
83
84
  export async function resolveFilePath(execPath, filePath) {
84
- const base = dirname(execPath)
85
+ const execFilePath = execPath.startsWith('file://') ? fileURLToPath(execPath) : execPath
86
+ const base = dirname(execFilePath)
85
87
  const ext = extname(filePath)
86
88
 
87
89
  if (ext === '.sail') {
package/lib/map.js CHANGED
@@ -19,12 +19,13 @@ export function getMapNodes(rootNode, language) {
19
19
  }
20
20
 
21
21
  function parseMap(node) {
22
+ const defNode = field(node, 'def')
22
23
  return {
23
- name: mapDef(field(node, 'def')),
24
+ name: mapDef(defNode),
24
25
  doc: comment(field(node, 'doc')),
25
26
  fields: childrenOfType(node, 'field').map(parseMapField),
26
- startPosition: node.startPosition,
27
- endPosition: node.endPosition,
27
+ startPosition: defNode.startPosition,
28
+ endPosition: defNode.endPosition,
28
29
  }
29
30
  }
30
31
 
package/lib/tokens.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { firstOfType, childrenOfField } from './utils.js'
2
- export const pathDef = (node) => node?.text?.slice(1) ?? null
2
+ export const pathDef = (node) => node?.text ?? null
3
3
 
4
4
  export const moduleDef = (node) => node?.text?.slice(1) ?? null
5
5
  export const moduleRef = (node) => node?.text?.slice(1) ?? null
package/lib/word.js CHANGED
@@ -20,17 +20,21 @@ import { sigType } from './types.js'
20
20
 
21
21
  export function getWordNodes(rootNode, language) {
22
22
  const query = new Query(language, `(word) @word`)
23
- const matches = query.matches(rootNode)
24
- const words = matches
23
+ const words = query
24
+ .matches(rootNode)
25
25
  .map((match) => capture(match.captures, 'word'))
26
26
  .filter(Boolean)
27
27
  .map(parseWord)
28
-
28
+ .reduce((acc, node) => {
29
+ acc[node.name] = node
30
+ return acc
31
+ }, {})
29
32
  return { words }
30
33
  }
31
34
 
32
35
  function parseWord(node) {
33
- const name = wordDef(field(node, 'name_def'))
36
+ const defNode = field(node, 'name_def')
37
+ const name = wordDef(defNode)
34
38
  const signature = sigType(field(node, 'sig'))
35
39
  const body = childrenOfField(node, 'body').map(step).filter(Boolean)
36
40
 
@@ -40,8 +44,8 @@ function parseWord(node) {
40
44
  doc: comment(field(node, 'doc')),
41
45
  signature,
42
46
  body,
43
- startPosition: node.startPosition,
44
- endPosition: node.endPosition,
47
+ startPosition: defNode.startPosition,
48
+ endPosition: defNode.endPosition,
45
49
  }
46
50
  }
47
51
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@algosail/parser",
3
- "version": "0.0.8",
3
+ "version": "0.0.10",
4
4
  "description": "Parser for Sail language.",
5
5
  "license": "MIT",
6
6
  "author": "algosail",
package/prototype.js DELETED
@@ -1,459 +0,0 @@
1
- import { Query } from 'web-tree-sitter'
2
-
3
- // symbol-table.js
4
- //
5
- // Transforms a tree-sitter AST (from @algosail/tree-sitter) into a symbol
6
- // table that mirrors the output of the hand-written parser in parser.js.
7
- //
8
- // Visitor pattern: instead of walking a flat token array with a pos counter,
9
- // we dispatch on node.type and read structured children via the tree-sitter
10
- // node API.
11
- //
12
- // Key tree-sitter node APIs used here:
13
- // node.type — the grammar rule name
14
- // node.text — raw source text of the node
15
- // node.namedChildren — array of named child nodes (no anonymous punctuation)
16
- // node.childForFieldName('name') — get the child bound to a field() in grammar.js
17
- // node.startPosition / .endPosition — { row, column }
18
- // node.hasError() — true if any descendant is an ERROR node
19
- // node.isError — true if this node itself is an ERROR
20
-
21
- // ─── Helpers ────────────────────────────────────────────────────────────────
22
-
23
- /** Get the child node bound to a named field, or null. */
24
- const field = (node, name) => node.childForFieldName(name) ?? null
25
-
26
- /** Get the text of a named field, or null. */
27
- const fieldText = (node, name) => field(node, name)?.text ?? null
28
-
29
- /** Get all named children of a specific type. */
30
- const childrenOfType = (node, type) => node.namedChildren.filter((c) => c.type === type)
31
-
32
- /** Get the first named child of a specific type, or null. */
33
- const firstOfType = (node, type) => node.namedChildren.find((c) => c.type === type) ?? null
34
-
35
- /**
36
- * Extract plain text from a comment node.
37
- * comment → '(' optional(comment_content) ')'
38
- * comment_content → repeat1(choice(/[^()]+/, comment))
39
- * We return the inner text trimmed, with nested parens preserved.
40
- */
41
- const commentText = (commentNode) => {
42
- if (!commentNode) return null
43
- const content = firstOfType(commentNode, 'comment_content')
44
- if (!content) return null
45
- return content.text.trim()
46
- }
47
-
48
- /** Convert a tree-sitter position to a range object matching the existing spec. */
49
- const toRange = (node) => ({
50
- startLine: node.startPosition.row,
51
- startCol: node.startPosition.column,
52
- endLine: node.endPosition.row,
53
- endCol: node.endPosition.column,
54
- })
55
-
56
- // ─── Import types ────────────────────────────────────────────────────────────
57
-
58
- export const IMPORT_TYPE = {
59
- FFI: 'FFI', // ./path or ../path or /absolute
60
- NPM: 'NPM', // npm:package-name
61
- MODULE: 'MODULE', // bare module name
62
- }
63
-
64
- const detectImportType = (path) => {
65
- if (!path) return IMPORT_TYPE.MODULE
66
- if (path.startsWith('./') || path.startsWith('../') || path.startsWith('/'))
67
- return IMPORT_TYPE.FFI
68
- if (path.startsWith('npm:')) return IMPORT_TYPE.NPM
69
- return IMPORT_TYPE.MODULE
70
- }
71
-
72
- const cleanImportPath = (path) => {
73
- if (!path) return null
74
- return path.startsWith('npm:') ? path.slice(4) : path
75
- }
76
-
77
- // ─── Visitors ────────────────────────────────────────────────────────────────
78
-
79
- /**
80
- * import_decl
81
- * path: import_path — raw path text (e.g. "./lib/io.js", "npm:pkg")
82
- * alias: module_alias — e.g. "~Io"
83
- */
84
- function visitImport(node) {
85
- const rawPath = fieldText(node, 'path')
86
- return {
87
- type: detectImportType(rawPath),
88
- path: cleanImportPath(rawPath),
89
- alias: fieldText(node, 'alias'),
90
- range: toRange(node),
91
- }
92
- }
93
-
94
- /**
95
- * tag_group
96
- * name: tag_group_name — e.g. "&Maybe"
97
- * (unnamed) type_variable* — type parameters
98
- * (unnamed optional) comment — doc comment
99
- * (unnamed) tag_def* — tag cases
100
- */
101
- function visitTagGroup(node) {
102
- return {
103
- name: fieldText(node, 'name')?.slice(1) ?? null, // strip leading &
104
- doc: commentText(firstOfType(node, 'comment')),
105
- params: childrenOfType(node, 'type_variable').map((n) => n.text),
106
- tags: childrenOfType(node, 'tag_def').map(visitTagDef),
107
- range: toRange(node),
108
- }
109
- }
110
-
111
- /**
112
- * tag_def
113
- * name: tag_name — e.g. ">Just"
114
- * (unnamed) type_variable* — per-case type parameters
115
- * (unnamed) comment? — doc comment
116
- */
117
- function visitTagDef(node) {
118
- return {
119
- name: fieldText(node, 'name')?.slice(1) ?? null, // strip leading >
120
- doc: commentText(firstOfType(node, 'comment')),
121
- params: childrenOfType(node, 'type_variable').map((n) => n.text),
122
- }
123
- }
124
-
125
- /**
126
- * map_def
127
- * name: map_name — e.g. "$Person"
128
- * (unnamed) map_field* — struct fields
129
- * (unnamed) comment? — doc comment
130
- */
131
- function visitMapDef(node) {
132
- return {
133
- name: fieldText(node, 'name')?.slice(1) ?? null, // strip leading $
134
- doc: commentText(firstOfType(node, 'comment')),
135
- fields: childrenOfType(node, 'map_field').map(visitMapField),
136
- range: toRange(node),
137
- }
138
- }
139
-
140
- /**
141
- * map_field
142
- * key: map_field_name — e.g. ".name"
143
- * type: type_name — e.g. "Str"
144
- */
145
- function visitMapField(node) {
146
- return {
147
- key: fieldText(node, 'key')?.slice(1) ?? null, // strip leading .
148
- type: fieldText(node, 'type'),
149
- doc: commentText(firstOfType(node, 'comment')),
150
- }
151
- }
152
-
153
- /**
154
- * word_def
155
- * name: word_name — e.g. "@createPerson"
156
- * sig: signature — the ( inputs -- outputs effects ) block
157
- * body: _expr* — expressions; first comment is treated as doc
158
- */
159
- /**
160
- * @param {import('web-tree-sitter').SyntaxNode} node
161
- * @param {{ tagIndex: Map, mapIndex: Map, moduleIndex: Map, errors: Array }} ctx
162
- * Pass the type context collected in passes 1–2 so we can resolve and
163
- * validate references inside the word body.
164
- */
165
- function visitWordDef(node, ctx = {}) {
166
- const nameNode = field(node, 'name')
167
- const sigNode = field(node, 'sig')
168
-
169
- // Doc comment: first comment among the body expressions (after sig)
170
- const docNode = node.namedChildren
171
- .filter((c) => c !== nameNode && c !== sigNode)
172
- .find((c) => c.type === 'comment')
173
-
174
- return {
175
- name: fieldText(node, 'name')?.slice(1) ?? null, // strip leading @
176
- doc: commentText(docNode),
177
- signature: sigNode ? visitSignature(sigNode) : null,
178
- refs: collectRefs(node, ctx),
179
- range: toRange(node),
180
- }
181
- }
182
-
183
- /**
184
- * Walk the body of a word_def and collect all outbound references
185
- * (word calls, module calls, tag constructors, map accesses).
186
- * Unknown references are reported into ctx.errors.
187
- *
188
- * @param {import('web-tree-sitter').SyntaxNode} wordNode
189
- * @param {{ tagIndex: Map, mapIndex: Map, moduleIndex: Map, errors: Array }} ctx
190
- * @returns {{ words: string[], modules: string[], tags: string[], maps: string[] }}
191
- */
192
- /**
193
- * Collect outbound references from a word body using pre-compiled queries.
194
- * tree-sitter automatically searches the entire subtree, so nested
195
- * quotations [ ... ] are covered without manual recursion.
196
- *
197
- * @param {import('web-tree-sitter').SyntaxNode} wordNode
198
- * @param {{ tagIndex: Map, mapIndex: Map, moduleIndex: Map, errors: Array, queries: object }} ctx
199
- */
200
- function collectRefs(wordNode, ctx) {
201
- const {
202
- tagIndex = new Map(),
203
- mapIndex = new Map(),
204
- moduleIndex = new Map(),
205
- errors = [],
206
- queries = {},
207
- } = ctx
208
- const refs = { words: [], modules: [], tags: [], maps: [] }
209
-
210
- // /wordName — local word call (regex token, no fields — strip leading /)
211
- for (const { captures } of queries.wordCalls.matches(wordNode)) {
212
- const name = capture(captures, 'ref')?.text?.slice(1)
213
- if (name) refs.words.push(name)
214
- }
215
-
216
- // #TagName — local tag constructor
217
- for (const { captures } of queries.tagRefs.matches(wordNode)) {
218
- const name = capture(captures, 'name')?.text
219
- if (!name) continue
220
- refs.tags.push(name)
221
- if (!tagIndex.has(name))
222
- errors.push({ message: `Unknown tag: "${name}"`, range: toRange(capture(captures, 'ref')) })
223
- }
224
-
225
- // _TagName — tag match arm
226
- for (const { captures } of queries.tagPatterns.matches(wordNode)) {
227
- const name = capture(captures, 'name')?.text
228
- if (!name) continue
229
- refs.tags.push(name)
230
- if (!tagIndex.has(name))
231
- errors.push({
232
- message: `Unknown tag pattern: "${name}"`,
233
- range: toRange(capture(captures, 'ref')),
234
- })
235
- }
236
-
237
- // $Map.field — local map accessor
238
- for (const { captures } of queries.mapRefs.matches(wordNode)) {
239
- const name = capture(captures, 'map_name')?.text
240
- if (!name) continue
241
- refs.maps.push(name)
242
- if (!mapIndex.has(name))
243
- errors.push({ message: `Unknown map: "${name}"`, range: toRange(capture(captures, 'ref')) })
244
- }
245
-
246
- // ~Module/word ~Module#Tag ~Module$Map.field
247
- for (const { captures } of queries.moduleRefs.matches(wordNode)) {
248
- const moduleName = capture(captures, 'module')?.text
249
- if (!moduleName) continue
250
- const alias = '~' + moduleName
251
- refs.modules.push(alias)
252
- if (!moduleIndex.has(alias))
253
- errors.push({
254
- message: `Unknown module: "${alias}"`,
255
- range: toRange(capture(captures, 'ref')),
256
- })
257
- }
258
-
259
- return refs
260
- }
261
-
262
- /**
263
- * signature (also used for sig_quotation — same structure)
264
- * Sequence: '(' _sig_item* sig_arrow _sig_item* ')'
265
- *
266
- * We split named children at the sig_arrow node to separate inputs/outputs,
267
- * then partition effects (+IO, -FAIL) out of the output items.
268
- */
269
- function visitSignature(node) {
270
- const items = node.namedChildren // includes sig_arrow + all sig items
271
- const arrowIdx = items.findIndex((c) => c.type === 'sig_arrow')
272
-
273
- const inputItems = arrowIdx >= 0 ? items.slice(0, arrowIdx) : items
274
- const outputItems = arrowIdx >= 0 ? items.slice(arrowIdx + 1) : []
275
-
276
- const isEffect = (n) => n.type === 'effect_add' || n.type === 'effect_remove'
277
-
278
- return {
279
- inputs: inputItems.filter((n) => !isEffect(n)).map(visitSigItem),
280
- outputs: outputItems.filter((n) => !isEffect(n)).map(visitSigItem),
281
- effects: outputItems.filter((n) => n.type === 'effect_add').map((n) => n.text.slice(1)), // strip leading +
282
- negatedEffects: outputItems
283
- .filter((n) => n.type === 'effect_remove')
284
- .map((n) => n.text.slice(1)), // strip leading -
285
- }
286
- }
287
-
288
- /**
289
- * Map a single _sig_item node to a plain object.
290
- * Mirrors signatureTypeAST / signatureRowVarAST / etc. from the hand-written parser.
291
- */
292
- function visitSigItem(node) {
293
- switch (node.type) {
294
- case 'type_name':
295
- return { kind: 'type', name: node.text }
296
-
297
- case 'type_variable':
298
- return { kind: 'var', name: node.text }
299
-
300
- case 'spread': {
301
- // "..name" — row variable or spread type
302
- const name = node.text.slice(2)
303
- const isLower = name[0] === name[0].toLowerCase()
304
- return { kind: 'spread', name, isType: !isLower }
305
- }
306
-
307
- case 'sig_list':
308
- // [ Type Type ... ] — list/tuple type inside a signature
309
- return {
310
- kind: 'list',
311
- items: node.namedChildren.map(visitSigItem),
312
- }
313
-
314
- case 'sig_quotation':
315
- // ( a b -- c d ) — higher-order function type (nested signature)
316
- return { kind: 'quotation', ...visitSignature(node) }
317
-
318
- default:
319
- return { kind: 'unknown', text: node.text }
320
- }
321
- }
322
-
323
- // ─── Query strings ────────────────────────────────────────────────────────────
324
- // Written in tree-sitter's S-expression query language.
325
- // Each @capture-name marks a node that will be extracted from a match result.
326
- // Queries are compiled once per language object and reused across calls.
327
-
328
- const QUERY_SOURCES = {
329
- // ── Pass 1 ──
330
- imports: `
331
- (import_decl
332
- path: (import_path) @path
333
- alias: (module_alias) @alias) @decl`,
334
-
335
- // ── Pass 2 ──
336
- tags: `(tag_group name_def: (tag_group_name name: (tag_ref))) @group`,
337
- maps: `(map_def name_def: (map_name name: (map_ref))) @map`,
338
-
339
- // ── Pass 3 ──
340
- words: `
341
- (word_def
342
- name_def: (word_name name: (word_ref)) @name
343
- sig: (signature) @sig) @word`,
344
-
345
- // ── Reference queries (scoped to a word node in collectRefs) ──
346
- // word_call is still a regex token — no sub-fields, capture the whole node.
347
- wordCalls: `(word_call) @ref`,
348
-
349
- tagRefs: `(tag_constructor name: (_) @name) @ref`,
350
- tagPatterns: `(tag_pattern name: (_) @name) @ref`,
351
- mapRefs: `(map_access map: (_) @map_name) @ref`,
352
-
353
- // Alternation [...] matches any of the three module-qualified constructs.
354
- moduleRefs: `[
355
- (module_word_call module: (_) @module)
356
- (module_tag_constructor module: (_) @module)
357
- (module_map_access module: (_) @module)
358
- ] @ref`,
359
-
360
- // Syntax errors inserted by tree-sitter's error recovery
361
- errors: `(ERROR) @error`,
362
- }
363
-
364
- // ─── Query helper ─────────────────────────────────────────────────────────────
365
-
366
- /**
367
- * Get a captured node by name from a single match's captures array.
368
- *
369
- * @param {Array<{name: string, node: import('web-tree-sitter').SyntaxNode}>} captures
370
- * @param {string} name The @capture-name from the query string.
371
- */
372
- const capture = (captures, name) => captures.find((c) => c.name === name)?.node ?? null
373
-
374
- // ─── Entry point ─────────────────────────────────────────────────────────────
375
-
376
- /**
377
- * Build a symbol table from the root node of a tree-sitter parse tree.
378
- *
379
- * Uses three passes so that word bodies can be resolved against already-
380
- * collected type and map definitions:
381
- *
382
- * Pass 1 — imports: build the module alias → path index
383
- * Pass 2 — types: collect tag_group and map_def, build name indexes
384
- * Pass 3 — words: visit word_def bodies with full type context
385
- *
386
- * @param {import('web-tree-sitter').SyntaxNode} rootNode
387
- * @param {import('web-tree-sitter').Language} language
388
- * The compiled Sail language object — needed to compile query strings.
389
- * @returns {{ imports, tags, maps, words, errors, moduleIndex, tagIndex, mapIndex, wordIndex }}
390
- */
391
- export function buildSymbolTable(rootNode, language) {
392
- // Compile all queries once — this is cheap but not free, so callers that
393
- // call buildSymbolTable repeatedly should cache the result themselves.
394
- const queries = Object.fromEntries(
395
- Object.entries(QUERY_SOURCES).map(([key, src]) => [key, new Query(language, src)]),
396
- )
397
-
398
- const errors = []
399
-
400
- // ── Pass 0: syntax errors ─────────────────────────────────────────────────
401
- // Collect ERROR nodes inserted by tree-sitter's error-recovery mechanism.
402
- for (const { captures } of queries.errors.matches(rootNode)) {
403
- const node = capture(captures, 'error')
404
- if (node)
405
- errors.push({ message: `Syntax error: unexpected "${node.text}"`, range: toRange(node) })
406
- }
407
-
408
- // ── Pass 1: imports ───────────────────────────────────────────────────────
409
- const imports = []
410
- const moduleIndex = new Map() // alias "~Io" → import record
411
-
412
- for (const { captures } of queries.imports.matches(rootNode)) {
413
- const node = capture(captures, 'decl')
414
- if (!node) continue
415
- const imp = visitImport(node)
416
- imports.push(imp)
417
- if (imp.alias) moduleIndex.set(imp.alias, imp)
418
- }
419
-
420
- // ── Pass 2: types (tag groups + maps) ────────────────────────────────────
421
- const tags = []
422
- const maps = []
423
- const tagIndex = new Map() // "Maybe" → tag group record
424
- const mapIndex = new Map() // "Person" → map def record
425
-
426
- for (const { captures } of queries.tags.matches(rootNode)) {
427
- const node = capture(captures, 'group')
428
- if (!node) continue
429
- const tag = visitTagGroup(node)
430
- tags.push(tag)
431
- if (tag.name) tagIndex.set(tag.name, tag)
432
- }
433
-
434
- for (const { captures } of queries.maps.matches(rootNode)) {
435
- const node = capture(captures, 'map')
436
- if (!node) continue
437
- const map = visitMapDef(node)
438
- maps.push(map)
439
- if (map.name) mapIndex.set(map.name, map)
440
- }
441
-
442
- // ── Pass 3: words ─────────────────────────────────────────────────────────
443
- // Pass pre-compiled reference queries via context so collectRefs can use
444
- // them without recompiling on every word.
445
- const typeContext = { tagIndex, mapIndex, moduleIndex, errors, queries }
446
-
447
- const words = []
448
- const wordIndex = new Map()
449
-
450
- for (const { captures } of queries.words.matches(rootNode)) {
451
- const node = capture(captures, 'word')
452
- if (!node) continue
453
- const word = visitWordDef(node, typeContext)
454
- words.push(word)
455
- if (word.name) wordIndex.set(word.name, word)
456
- }
457
-
458
- return { imports, tags, maps, words, errors, moduleIndex, tagIndex, mapIndex, wordIndex }
459
- }