@algosail/parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js ADDED
@@ -0,0 +1,30 @@
1
+ import { getSailTokenizer } from './lib/sailTokenizer.js'
2
+ import { getJsTokenizer } from './lib/jsTokenizer.js'
3
+ import { buildSymbolTable } from './lib/symbolTable.js'
4
+ import { parseJsDoc } from './lib/jsDoc.js'
5
+
6
+ export async function createParser() {
7
+ const sailTokenizer = await getSailTokenizer()
8
+ const jsTokenizer = await getJsTokenizer()
9
+ const cache = new Map()
10
+
11
+ const parser = {
12
+ parseSail,
13
+ parseJs,
14
+ sailTokenizer,
15
+ jsTokenizer,
16
+ cache,
17
+ }
18
+
19
+ async function parseSail(uri, text) {
20
+ const res = await buildSymbolTable(uri, text, parser)
21
+ cache.set(uri, res)
22
+ return res
23
+ }
24
+
25
+ async function parseJs(uri, text) {
26
+ const res = await parseJsDoc(uri, text, parser)
27
+ cache.set(uri, res)
28
+ return res
29
+ }
30
+ }
package/lib/group.js ADDED
@@ -0,0 +1,58 @@
1
+ import { Query } from 'web-tree-sitter'
2
+
3
+ import { capture, field, childrenOfType, firstOfType } from './utils.js'
4
+ import { groupDef, tagDef, comment } from './tokens.js'
5
+
6
+ export const getTagNodes = (rootNode, language) => {
7
+ const query = new Query(language, `(group) @group`)
8
+ const matches = query.matches(rootNode)
9
+ const nodes = matches
10
+ .map((match) => capture(match.captures, 'group'))
11
+ .filter(Boolean)
12
+ .map(parseGroup)
13
+
14
+ return getTable(nodes)
15
+ }
16
+
17
+ function parseGroup(node) {
18
+ return {
19
+ name: groupDef(field(node, 'def')),
20
+ doc: comment(field(node, 'doc')),
21
+ params: childrenOfType(node, 'type_var').map((n) => n.text),
22
+ tags: childrenOfType(node, 'tag').map(parseTag),
23
+ startPosition: node.startPosition,
24
+ endPosition: node.endPosition,
25
+ }
26
+ }
27
+
28
+ function parseTag(node) {
29
+ return {
30
+ name: tagDef(field(node, 'def')),
31
+ doc: comment(field(node, 'doc')),
32
+ param: firstOfType(node, 'type_var')?.text ?? null,
33
+ startPosition: node.startPosition,
34
+ endPosition: node.endPosition,
35
+ }
36
+ }
37
+
38
+ function getTable(nodes) {
39
+ const groups = {}
40
+ const tags = {}
41
+
42
+ for (const group of nodes) {
43
+ groups[group.name] = group
44
+
45
+ for (const tag of group.tags) {
46
+ tags[tag.name] = {
47
+ ...tag,
48
+ group: {
49
+ name: group.name,
50
+ startPosition: group.startPosition,
51
+ endPosition: group.endPosition,
52
+ },
53
+ }
54
+ }
55
+ }
56
+
57
+ return { groups, tags }
58
+ }
package/lib/imports.js ADDED
@@ -0,0 +1,30 @@
1
+ import { Query } from 'web-tree-sitter'
2
+ import { capture, field } from './utils.js'
3
+ import { pathDef, moduleDef } from './tokens.js'
4
+
5
+ export function getImportNodes(rootNode, language) {
6
+ const query = new Query(language, `(import) @import`)
7
+ const matches = query.matches(rootNode)
8
+ return matches
9
+ .map((match) => capture(match.captures, 'import'))
10
+ .filter(Boolean)
11
+ .map(parseImport)
12
+ }
13
+
14
+ export function parseImport(node) {
15
+ const path = pathDef(field(node, 'path'))
16
+ return {
17
+ type: getImportType(path),
18
+ path,
19
+ module: moduleDef(field(node, 'module')),
20
+ startPosition: node.startPosition,
21
+ endPosition: node.endPosition,
22
+ }
23
+ }
24
+
25
+ function getImportType(path) {
26
+ if (path.startsWith('./') || path.startsWith('../') || path.startsWith('/')) {
27
+ return 'file'
28
+ }
29
+ return 'package'
30
+ }
package/lib/jsDoc.js ADDED
@@ -0,0 +1,120 @@
1
+ import { Query } from 'web-tree-sitter'
2
+ import { capture } from './utils.js'
3
+ import { getTagNodes } from './group.js'
4
+ import { getMapNodes } from './map.js'
5
+ import { getWordNodes } from './word.js'
6
+
7
+ export async function parseJsDoc(uri, text, parser) {
8
+ const { tokenizer: jsTokenizer, language: jsLanguage } = parser.jsTokenizer
9
+ const { tokenizer: sailTokenizer, language: sailLanguage } = parser.sailTokenizer
10
+
11
+ const jsTree = jsTokenizer.parse(text)
12
+ const query = new Query(jsLanguage, '(comment) @comment')
13
+ const matches = query.matches(jsTree.rootNode)
14
+
15
+ const result = { modules: {}, groups: {}, tags: {}, maps: {}, words: [] }
16
+
17
+ for (const match of matches) {
18
+ const commentNode = capture(match.captures, 'comment')
19
+ if (!commentNode) continue
20
+
21
+ const extracted = extractSailBlock(commentNode)
22
+ if (!extracted) continue
23
+
24
+ const { sailText, rowOffset, colOffset } = extracted
25
+
26
+ const sailTree = sailTokenizer.parse(sailText)
27
+ const sailRoot = sailTree.rootNode
28
+
29
+ const { groups, tags } = getTagNodes(sailRoot, sailLanguage)
30
+ const { maps } = getMapNodes(sailRoot, sailLanguage)
31
+ const { words } = getWordNodes(sailRoot, sailLanguage)
32
+
33
+ for (const [name, group] of Object.entries(groups)) {
34
+ result.groups[name] = {
35
+ ...group,
36
+ startPosition: offsetPosition(group.startPosition, rowOffset, colOffset),
37
+ endPosition: offsetPosition(group.endPosition, rowOffset, colOffset),
38
+ tags: group.tags.map((tag) => ({
39
+ ...tag,
40
+ startPosition: offsetPosition(tag.startPosition, rowOffset, colOffset),
41
+ endPosition: offsetPosition(tag.endPosition, rowOffset, colOffset),
42
+ })),
43
+ }
44
+ }
45
+
46
+ for (const [name, tag] of Object.entries(tags)) {
47
+ result.tags[name] = {
48
+ ...tag,
49
+ startPosition: offsetPosition(tag.startPosition, rowOffset, colOffset),
50
+ endPosition: offsetPosition(tag.endPosition, rowOffset, colOffset),
51
+ group: {
52
+ ...tag.group,
53
+ startPosition: offsetPosition(tag.group.startPosition, rowOffset, colOffset),
54
+ endPosition: offsetPosition(tag.group.endPosition, rowOffset, colOffset),
55
+ },
56
+ }
57
+ }
58
+
59
+ for (const [name, map] of Object.entries(maps)) {
60
+ result.maps[name] = {
61
+ ...map,
62
+ startPosition: offsetPosition(map.startPosition, rowOffset, colOffset),
63
+ endPosition: offsetPosition(map.endPosition, rowOffset, colOffset),
64
+ fields: map.fields.map((fieldItem) => ({
65
+ ...fieldItem,
66
+ startPosition: offsetPosition(fieldItem.startPosition, rowOffset, colOffset),
67
+ endPosition: offsetPosition(fieldItem.endPosition, rowOffset, colOffset),
68
+ })),
69
+ }
70
+ }
71
+
72
+ result.words.push(...words)
73
+ }
74
+
75
+ return result
76
+ }
77
+
78
+ function offsetPosition(position, rowOffset, colOffset) {
79
+ return {
80
+ row: position.row + rowOffset,
81
+ column: position.row === 0 ? position.column + colOffset : position.column,
82
+ }
83
+ }
84
+
85
+ function extractSailBlock(commentNode) {
86
+ const text = commentNode.text
87
+
88
+ if (!text.startsWith('/**')) return null
89
+ if (!text.includes('@sail')) return null
90
+
91
+ const sailIndex = text.indexOf('@sail')
92
+
93
+ const beforeSail = text.slice(0, sailIndex)
94
+ const sailLineIndex = (beforeSail.match(/\n/g) ?? []).length
95
+
96
+ const afterSail = text.slice(sailIndex + '@sail'.length)
97
+ const lastClose = afterSail.lastIndexOf('*/')
98
+ const rawContent = lastClose !== -1 ? afterSail.slice(0, lastClose) : afterSail
99
+
100
+ const lines = rawContent.split('\n').map((line) => line.replace(/^\s*\*\s?/, ''))
101
+
102
+ let firstNonEmpty = 0
103
+ while (firstNonEmpty < lines.length && lines[firstNonEmpty].trim() === '') {
104
+ firstNonEmpty++
105
+ }
106
+
107
+ let lastNonEmpty = lines.length - 1
108
+ while (lastNonEmpty >= firstNonEmpty && lines[lastNonEmpty].trim() === '') {
109
+ lastNonEmpty--
110
+ }
111
+
112
+ const sailLines = lines.slice(firstNonEmpty, lastNonEmpty + 1)
113
+ if (sailLines.length === 0) return null
114
+
115
+ const rowOffset = commentNode.startPosition.row + sailLineIndex + firstNonEmpty + 1
116
+ const colOffset = commentNode.startPosition.column + 3
117
+ const sailText = sailLines.join('\n')
118
+
119
+ return { sailText, rowOffset, colOffset }
120
+ }
@@ -0,0 +1,11 @@
1
+ import { Parser, Language } from 'web-tree-sitter'
2
+
3
+ export async function getJsTokenizer() {
4
+ await Parser.init()
5
+ const tokenizer = new Parser()
6
+ const language = await Language.load(
7
+ 'node_modules/tree-sitter-javascript/tree-sitter-javascript.wasm',
8
+ )
9
+ tokenizer.setLanguage(language)
10
+ return { tokenizer, language }
11
+ }
package/lib/load.js ADDED
@@ -0,0 +1,110 @@
1
+ import { readFile, access } from 'node:fs/promises'
2
+ import { createRequire } from 'node:module'
3
+ import { dirname, extname, join, resolve } from 'node:path'
4
+
5
+ function parsePackageImport(packageImport) {
6
+ if (packageImport.startsWith('@')) {
7
+ const parts = packageImport.split('/')
8
+ const name = `${parts[0]}/${parts[1]}`
9
+ const path = parts.length > 2 ? `./${parts.slice(2).join('/')}` : `./`
10
+ return { name, path }
11
+ }
12
+
13
+ const slashIdx = packageImport.indexOf('/')
14
+ if (slashIdx === -1) return { name: packageImport, path: '.' }
15
+
16
+ return {
17
+ name: packageImport.slice(0, slashIdx),
18
+ path: `./${packageImport.slice(slashIdx + 1)}`,
19
+ }
20
+ }
21
+
22
+ function findPackageDir(execPath, packageImport) {
23
+ const { name } = parsePackageImport(packageImport)
24
+ const require = createRequire(execPath)
25
+ const packageJsonPath = require.resolve(`${name}/package.json`)
26
+ return dirname(packageJsonPath)
27
+ }
28
+
29
+ function resolveConditionEntry(entry) {
30
+ if (typeof entry === 'string') return entry
31
+ if (!entry || typeof entry !== 'object') return null
32
+
33
+ if (Object.prototype.hasOwnProperty.call(entry, 'sail')) {
34
+ const value = entry.sail
35
+ if (typeof value === 'object' && value !== null) {
36
+ const resolved = resolveConditionEntry(value)
37
+ if (resolved !== null) return resolved
38
+ } else if (typeof value === 'string') {
39
+ return value
40
+ }
41
+ }
42
+
43
+ return null
44
+ }
45
+
46
+ function resolveExportsCondition(exports, subpath) {
47
+ if (!exports) return null
48
+
49
+ if (typeof exports === 'string') {
50
+ return subpath === '.' ? exports : null
51
+ }
52
+
53
+ const keys = Object.keys(exports)
54
+ const isSubpathMap = keys.length > 0 && keys[0].startsWith('.')
55
+
56
+ let entry
57
+ if (isSubpathMap) {
58
+ entry = exports[subpath]
59
+ if (entry == null) return null
60
+ } else {
61
+ entry = exports
62
+ }
63
+
64
+ return resolveConditionEntry(entry)
65
+ }
66
+
67
+ export async function resolveModulePath(execPath, packageImport) {
68
+ const { name, path } = parsePackageImport(packageImport)
69
+
70
+ const packageDir = findPackageDir(execPath, packageImport)
71
+ const packageJson = JSON.parse(await readFile(join(packageDir, 'package.json'), 'utf8'))
72
+
73
+ const sailPath = resolveExportsCondition(packageJson.exports, path)
74
+ if (sailPath) {
75
+ return readFile(join(packageDir, sailPath), 'utf8')
76
+ }
77
+
78
+ const importId = path === '.' ? name : `${name}/${path.slice(2)}`
79
+ const absolutePath = createRequire(execPath).resolve(importId)
80
+ return { type: 'js', path: absolutePath }
81
+ }
82
+
83
+ export async function resolveFilePath(execPath, filePath) {
84
+ const base = dirname(execPath)
85
+ const ext = extname(filePath)
86
+
87
+ if (ext === '.sail') {
88
+ const absolutePath = resolve(base, filePath)
89
+ return { type: 'sail', path: absolutePath }
90
+ }
91
+
92
+ if (ext === '.js') {
93
+ const absolutePath = resolve(base, filePath)
94
+ return { type: 'js', path: absolutePath }
95
+ }
96
+
97
+ const sailPath = resolve(base, `${filePath}.sail`)
98
+ try {
99
+ await access(sailPath)
100
+ return { type: 'sail', path: sailPath }
101
+ } catch {}
102
+
103
+ const jsPath = resolve(base, `${filePath}.js`)
104
+ try {
105
+ await access(jsPath)
106
+ return { type: 'js', path: jsPath }
107
+ } catch {}
108
+
109
+ throw new Error(`Cannot find file "${filePath}" from "${base}"`)
110
+ }
package/lib/map.js ADDED
@@ -0,0 +1,52 @@
1
+ import { Query } from 'web-tree-sitter'
2
+ import { capture, fieldText, field, childrenOfType } from './utils.js'
3
+ import { mapDef, fieldDef, comment } from './tokens.js'
4
+ import { litType, groupType, mapType, moduleMapType } from './types.js'
5
+
6
+ export function getMapNodes(rootNode, language) {
7
+ const query = new Query(language, `(map) @map`)
8
+ const matches = query.matches(rootNode)
9
+ const nodes = matches
10
+ .map((match) => capture(match.captures, 'map'))
11
+ .filter(Boolean)
12
+ .map(parseMap)
13
+
14
+ const maps = {}
15
+ for (const node of nodes) {
16
+ maps[node.name] = node
17
+ }
18
+ return { maps }
19
+ }
20
+
21
+ function parseMap(node) {
22
+ return {
23
+ name: mapDef(field(node, 'def')),
24
+ doc: comment(field(node, 'doc')),
25
+ fields: childrenOfType(node, 'field').map(parseMapField),
26
+ startPosition: node.startPosition,
27
+ endPosition: node.endPosition,
28
+ }
29
+ }
30
+
31
+ function parseMapField(node) {
32
+ return {
33
+ name: fieldDef(field(node, 'key')),
34
+ type: patseFieldType(field(node, 'type')),
35
+ doc: comment(field(node, 'doc')),
36
+ startPosition: node.startPosition,
37
+ endPosition: node.endPosition,
38
+ }
39
+ }
40
+
41
+ function patseFieldType(node) {
42
+ switch (node.type) {
43
+ case 'group_type':
44
+ return groupType(node)
45
+ case 'map_ref':
46
+ return mapType(node)
47
+ case 'module_map_ref':
48
+ return moduleMapType(node)
49
+ default:
50
+ return litType(node)
51
+ }
52
+ }
@@ -0,0 +1,9 @@
1
+ import { Parser, Language } from 'web-tree-sitter'
2
+
3
+ export async function getSailTokenizer() {
4
+ await Parser.init()
5
+ const tokenizer = new Parser()
6
+ const language = await Language.load('node_modules/@algosail/tree-sitter/tree-sitter-sail.wasm')
7
+ tokenizer.setLanguage(language)
8
+ return { tokenizer, language }
9
+ }
@@ -0,0 +1,95 @@
1
+ import { readFile } from 'node:fs/promises'
2
+
3
+ import { getImportNodes } from './imports.js'
4
+ import { getTagNodes } from './group.js'
5
+ import { getMapNodes } from './map.js'
6
+ import { getWordNodes } from './word.js'
7
+ import { resolveModulePath, resolveFilePath } from './load.js'
8
+
9
+ export async function buildSymbolTable(uri, text, parser) {
10
+ const rootNode = parser.parseSail.tokenizer.parse(text)
11
+ const imports = getImportNodes(rootNode, parser.parseSail.language)
12
+ const { groups, tags } = getTagNodes(rootNode, parser.parseSail.language)
13
+ const { maps } = getMapNodes(rootNode, parser.parseSail.language)
14
+ const { words } = getWordNodes(rootNode, parser.parseSail.language)
15
+ const errors = getErrors(rootNode)
16
+
17
+ const modules = await getModules(imports, uri, parser)
18
+
19
+ return { modules, groups, tags, maps, words, errors }
20
+ }
21
+
22
+ function getErrors(rootNode) {
23
+ const errors = []
24
+ const stack = [...rootNode.children]
25
+
26
+ while (stack.length > 0) {
27
+ const node = stack.pop()
28
+
29
+ if (node.type === 'ERROR') {
30
+ errors.push({
31
+ type: 'error',
32
+ text: node.text,
33
+ startPosition: node.startPosition,
34
+ endPosition: node.endPosition,
35
+ })
36
+ } else if (node.isMissing) {
37
+ errors.push({
38
+ type: 'missing',
39
+ text: node.type,
40
+ startPosition: node.startPosition,
41
+ endPosition: node.endPosition,
42
+ })
43
+ }
44
+
45
+ if (node.hasError) {
46
+ stack.push(...node.children)
47
+ }
48
+ }
49
+
50
+ return errors
51
+ }
52
+
53
+ async function getModules(imports, uri, parser) {
54
+ const modules = {}
55
+
56
+ for (const imp of imports) {
57
+ if (imp.type === 'package') {
58
+ const moduleUri = await resolveModulePath(imp.path, uri)
59
+ const content = await readFile(moduleUri, 'utf8')
60
+ const res = await parser.parseSail(moduleUri, content)
61
+ modules[imp.module] = {
62
+ ...res,
63
+ uri: moduleUri,
64
+ startPosition: imp.startPosition,
65
+ endPosition: imp.endPosition,
66
+ }
67
+ }
68
+
69
+ if (imp.type === 'file') {
70
+ const { type, path: fileUri } = await resolveFilePath(imp.path, execPath)
71
+ const content = await readFile(fileUri, 'utf8')
72
+ if (type === 'sail') {
73
+ const res = await parser.parseSail(fileUri, content)
74
+ modules[imp.module] = {
75
+ ...res,
76
+ uri: fileUri,
77
+ startPosition: imp.startPosition,
78
+ endPosition: imp.endPosition,
79
+ }
80
+ }
81
+
82
+ if (type === 'js') {
83
+ const res = await parser.parseJs(fileUri, content)
84
+ modules[imp.module] = {
85
+ ...res,
86
+ uri: fileUri,
87
+ startPosition: imp.startPosition,
88
+ endPosition: imp.endPosition,
89
+ }
90
+ }
91
+ }
92
+ }
93
+
94
+ return modules
95
+ }
package/lib/tokens.js ADDED
@@ -0,0 +1,79 @@
1
+ import { firstOfType, childrenOfField } from './utils.js'
2
+ export const pathDef = (node) => node?.text?.slice(1) ?? null
3
+
4
+ export const moduleDef = (node) => node?.text?.slice(1) ?? null
5
+ export const moduleRef = (node) => node?.text?.slice(1) ?? null
6
+ export const groupDef = (node) => node?.text?.slice(1) ?? null
7
+ export const groupRef = (node) => node?.text?.slice(1) ?? null
8
+ export const tagDef = (node) => node?.text?.slice(1) ?? null
9
+ export const tagRef = (node) => node?.text?.slice(1) ?? null
10
+ export const tagPattern = (node) => node?.text?.slice(1) ?? null
11
+ export const mapDef = (node) => node?.text?.slice(1) ?? null
12
+ export const mapRef = (node) => node?.text?.slice(1) ?? null
13
+ export const fieldDef = (node) => node?.text?.slice(1) ?? null
14
+
15
+ export const fieldRef = (node) => {
16
+ if (!node) return null
17
+ const [map, field] = node.text.split('.')
18
+ return { map: mapRef(map), field }
19
+ }
20
+
21
+ export const wordDef = (node) => node?.text?.slice(1) ?? null
22
+ export const wordRef = (node) => node?.text?.slice(1) ?? null
23
+
24
+ export const moduleGroupRef = (node) => {
25
+ if (!node) return null
26
+ const [module, group] = node.text.split('&')
27
+ return { module: module.slice(1), group }
28
+ }
29
+
30
+ export const moduleTagRef = (node) => {
31
+ if (!node) return null
32
+ const [module, tag] = node.text.split('#')
33
+ return { module: module.slice(1), tag }
34
+ }
35
+
36
+ export const moduleTagPattern = (node) => {
37
+ if (!node) return null
38
+ const [module, tag] = node.text.split('_')
39
+ return { module: module.slice(1), tag }
40
+ }
41
+
42
+ export const moduleMapRef = (node) => {
43
+ if (!node) return null
44
+ const [module, map] = node.text.split('$')
45
+ return { module: module.slice(1), map }
46
+ }
47
+
48
+ export const moduleFieldRef = (node) => {
49
+ if (!node) return null
50
+ const [module, fieldRef] = node.text.split('$')
51
+ const [map, field] = fieldRef.split('.')
52
+ return { module: module.slice(1), map, field }
53
+ }
54
+
55
+ export const moduleWordRef = (node) => {
56
+ if (!node) return null
57
+ const [module, word] = node.text.split('/')
58
+ return { module: module.slice(1), word }
59
+ }
60
+
61
+ export const type = (node) => node?.text ?? null
62
+ export const typeVar = (node) => node?.text ?? null
63
+ export const spread = (node) => node?.text.slice(2) ?? null
64
+
65
+ export const effectAdd = (node) => node?.text.slice(1) ?? null
66
+ export const effectRemove = (node) => node?.text.slice(1) ?? null
67
+
68
+ export const slotWrite = (node) => node?.text.slice(1) ?? null
69
+ export const slotRead = (node) => node?.text.slice(1) ?? null
70
+
71
+ export const rawString = (node) => node?.text.slice(1, -1) ?? null
72
+ export const rawValue = (node) => node?.text ?? null
73
+
74
+ export const comment = (commentNode) => {
75
+ if (!commentNode) return null
76
+ const content = firstOfType(commentNode, 'comment_content')
77
+ if (!content) return null
78
+ return content.text.trim()
79
+ }