@atproto/lex-builder 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/filter.d.ts +7 -0
  2. package/dist/filter.d.ts.map +1 -0
  3. package/dist/filter.js +30 -0
  4. package/dist/filter.js.map +1 -0
  5. package/dist/filtered-indexer.d.ts +2123 -0
  6. package/dist/filtered-indexer.d.ts.map +1 -0
  7. package/dist/filtered-indexer.js +56 -0
  8. package/dist/filtered-indexer.js.map +1 -0
  9. package/dist/formatter.d.ts +13 -0
  10. package/dist/formatter.d.ts.map +1 -0
  11. package/dist/formatter.js +34 -0
  12. package/dist/formatter.js.map +1 -0
  13. package/dist/index.d.ts +6 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +13 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/lex-builder.d.ts +20 -0
  18. package/dist/lex-builder.d.ts.map +1 -0
  19. package/dist/lex-builder.js +123 -0
  20. package/dist/lex-builder.js.map +1 -0
  21. package/dist/lex-def-builder.d.ts +66 -0
  22. package/dist/lex-def-builder.d.ts.map +1 -0
  23. package/dist/lex-def-builder.js +682 -0
  24. package/dist/lex-def-builder.js.map +1 -0
  25. package/dist/lexicon-directory-indexer.d.ts +11 -0
  26. package/dist/lexicon-directory-indexer.d.ts.map +1 -0
  27. package/dist/lexicon-directory-indexer.js +51 -0
  28. package/dist/lexicon-directory-indexer.js.map +1 -0
  29. package/dist/ref-resolver.d.ts +48 -0
  30. package/dist/ref-resolver.d.ts.map +1 -0
  31. package/dist/ref-resolver.js +246 -0
  32. package/dist/ref-resolver.js.map +1 -0
  33. package/dist/ts-lang.d.ts +3 -0
  34. package/dist/ts-lang.d.ts.map +1 -0
  35. package/dist/ts-lang.js +138 -0
  36. package/dist/ts-lang.js.map +1 -0
  37. package/dist/util.d.ts +11 -0
  38. package/dist/util.d.ts.map +1 -0
  39. package/dist/util.js +67 -0
  40. package/dist/util.js.map +1 -0
  41. package/package.json +49 -0
  42. package/src/filter.ts +41 -0
  43. package/src/filtered-indexer.test.ts +83 -0
  44. package/src/filtered-indexer.ts +60 -0
  45. package/src/formatter.ts +42 -0
  46. package/src/index.ts +17 -0
  47. package/src/lex-builder.ts +149 -0
  48. package/src/lex-def-builder.ts +912 -0
  49. package/src/lexicon-directory-indexer.ts +57 -0
  50. package/src/ref-resolver.ts +301 -0
  51. package/src/ts-lang.ts +134 -0
  52. package/src/util.ts +67 -0
@@ -0,0 +1,57 @@
1
+ import { readFile, readdir } from 'node:fs/promises'
2
+ import { join } from 'node:path'
3
+ import {
4
+ LexiconDocument,
5
+ LexiconIterableIndexer,
6
+ lexiconDocumentSchema,
7
+ } from '@atproto/lex-document'
8
+
9
+ export type LexiconDirectoryIndexerOptions = ReadLexiconsOptions
10
+
11
+ export class LexiconDirectoryIndexer extends LexiconIterableIndexer {
12
+ constructor(options: LexiconDirectoryIndexerOptions) {
13
+ super(readLexicons(options))
14
+ }
15
+ }
16
+
17
+ type ReadLexiconsOptions = {
18
+ lexicons: string | string[]
19
+ ignoreErrors?: boolean
20
+ }
21
+
22
+ async function* readLexicons(
23
+ options: ReadLexiconsOptions,
24
+ ): AsyncGenerator<LexiconDocument, void, unknown> {
25
+ const dirs = Array.isArray(options.lexicons)
26
+ ? options.lexicons
27
+ : [options.lexicons]
28
+ for (const dir of dirs) {
29
+ for await (const filePath of listFiles(dir)) {
30
+ if (filePath.endsWith('.json')) {
31
+ try {
32
+ const data = await readFile(filePath, 'utf8')
33
+ yield lexiconDocumentSchema.parse(JSON.parse(data))
34
+ } catch (cause) {
35
+ const message = `Error parsing lexicon document ${filePath}`
36
+ if (options.ignoreErrors) console.error(`${message}:`, cause)
37
+ else throw new Error(message, { cause })
38
+ }
39
+ }
40
+ }
41
+ }
42
+ }
43
+
44
+ async function* listFiles(dir: string): AsyncGenerator<string> {
45
+ const dirents = await readdir(dir, { withFileTypes: true }).catch((err) => {
46
+ if ((err as any)?.code === 'ENOENT') return []
47
+ throw err
48
+ })
49
+ for (const dirent of dirents) {
50
+ const res = join(dir, dirent.name)
51
+ if (dirent.isDirectory()) {
52
+ yield* listFiles(res)
53
+ } else if (dirent.isFile() || dirent.isSymbolicLink()) {
54
+ yield res
55
+ }
56
+ }
57
+ }
@@ -0,0 +1,301 @@
1
+ import assert from 'node:assert'
2
+ import { join } from 'node:path'
3
+ import { SourceFile } from 'ts-morph'
4
+ import { LexiconDocument, LexiconIndexer } from '@atproto/lex-document'
5
+ import { isReservedWord, isSafeIdentifier } from './ts-lang.js'
6
+ import {
7
+ asRelativePath,
8
+ memoize,
9
+ toCamelCase,
10
+ toPascalCase,
11
+ ucFirst,
12
+ } from './util.js'
13
+
14
+ export type ResolvedRef = {
15
+ varName: string
16
+ typeName: string
17
+ }
18
+
19
+ /**
20
+ * Utility class to resolve lexicon references to TypeScript identifiers,
21
+ * generating "import" statements as needed.
22
+ */
23
+ export class RefResolver {
24
+ constructor(
25
+ private doc: LexiconDocument,
26
+ private file: SourceFile,
27
+ private indexer: LexiconIndexer,
28
+ ) {}
29
+
30
+ public readonly resolve = memoize(
31
+ async (ref: string): Promise<ResolvedRef> => {
32
+ const [nsid, hash = 'main'] = ref.split('#')
33
+
34
+ if (nsid === '' || nsid === this.doc.id) {
35
+ return this.resolveLocal(hash)
36
+ } else {
37
+ // @NOTE: Normalize (#main fragment) to ensure proper memoization
38
+ const fullRef = `${nsid}#${hash}`
39
+ return this.resolveExternal(fullRef)
40
+ }
41
+ },
42
+ )
43
+
44
+ #defCounters = new Map<string, number>()
45
+ private nextSafeDefinitionIdentifier(safeIdentifier: string) {
46
+ const count = this.#defCounters.get(safeIdentifier) ?? 0
47
+ this.#defCounters.set(safeIdentifier, count + 1)
48
+ // @NOTE We don't need to check against local declarations in the file here
49
+ // since we are using a naming system that should guarantee no other
50
+ // identifier has a <safeIdentifier>$<number> format.
51
+ return `${safeIdentifier}$${count}`
52
+ }
53
+
54
+ /**
55
+ * @note The returned `typeName` and `varName` are *both* guaranteed to be
56
+ * valid TypeScript identifiers.
57
+ */
58
+ public readonly resolveLocal = memoize(
59
+ async (hash: string): Promise<ResolvedRef> => {
60
+ const hashes = Object.keys(this.doc.defs)
61
+
62
+ if (!hashes.includes(hash)) {
63
+ throw new Error(`Definition ${hash} not found in ${this.doc.id}`)
64
+ }
65
+
66
+ // Because we are using predictable "public" identifiers for type names,
67
+ // we need to ensure there are no conflicts between different definitions
68
+ // in the same lexicon document.
69
+ //
70
+ // @NOTE It should be possible to implement a way to generate
71
+ // non-conflicting type names for all public (type) identifiers in a
72
+ // project. However, this would add a lot of complexity to the code
73
+ // generation process, and the likelihood of such conflicts happening in
74
+ // practice is very low, so we opt for a simpler approach of just throwing
75
+ // an error if a conflict is detected.
76
+ const pub = getPublicIdentifiers(hash)
77
+ for (const otherHash of hashes) {
78
+ if (otherHash === hash) continue
79
+ const otherPub = getPublicIdentifiers(otherHash)
80
+ if (otherPub.typeName === pub.typeName) {
81
+ throw new Error(
82
+ `Conflicting type names for definitions #${hash} and #${otherHash} in ${this.doc.id}`,
83
+ )
84
+ }
85
+ }
86
+
87
+ // Try to keep and identifier that resembles the original hash as identifier
88
+ const safeIdentifier = asSafeDefinitionIdentifier(hash)
89
+
90
+ // If the safe identifier is not conflicting with other definition names,
91
+ // or reserved words, we can use it as-is. Otherwise, we need to generate
92
+ // a unique safe identifier.
93
+ const varName = safeIdentifier
94
+ ? !hashes.some((otherHash) => {
95
+ if (otherHash === hash) return false
96
+ const otherIdentifier = asSafeDefinitionIdentifier(otherHash)
97
+ return otherIdentifier === safeIdentifier
98
+ })
99
+ ? // Safe identifier can be used as-is as it does not conflict with
100
+ // other definition names
101
+ safeIdentifier
102
+ : // In order to keep identifiers stable, we use the safe identifier
103
+ // as base, and append a counter to avoid conflicts
104
+ this.nextSafeDefinitionIdentifier(safeIdentifier)
105
+ : // hash only contained unsafe characters, generate a safe one
106
+ this.nextSafeDefinitionIdentifier('def')
107
+
108
+ const typeName = ucFirst(varName)
109
+ assert(varName !== typeName, 'Variable and type name should be different')
110
+
111
+ return { varName, typeName }
112
+ },
113
+ )
114
+
115
+ /**
116
+ * @note Since this is a memoized function, and is used to generate the name
117
+ * of local variables, we should avoid returning different results for
118
+ * similar, but non strictly equal, inputs (eg. normalized / non-normalized).
119
+ * @see {@link resolve}
120
+ */
121
+ private readonly resolveExternal = memoize(
122
+ async (fullRef: string): Promise<ResolvedRef> => {
123
+ const [nsid, hash] = fullRef.split('#')
124
+ const moduleSpecifier = `${asRelativePath(
125
+ this.file.getDirectoryPath(),
126
+ join('/', ...nsid.split('.')),
127
+ )}.defs.js`
128
+
129
+ // Lets first make sure the referenced lexicon exists
130
+ const srcDoc = await this.indexer.get(nsid)
131
+ const srcDef = Object.hasOwn(srcDoc.defs, hash) ? srcDoc.defs[hash] : null
132
+ if (!srcDef) {
133
+ throw new Error(
134
+ `Missing def "${hash}" in "${nsid}" (referenced from ${this.doc.id})`,
135
+ )
136
+ }
137
+
138
+ // import * as <nsIdentifier> from './<moduleSpecifier>'
139
+ const nsIdentifier = this.getNsIdentifier(nsid, moduleSpecifier)
140
+
141
+ const publicIds = getPublicIdentifiers(hash)
142
+
143
+ return {
144
+ varName: isSafeIdentifier(publicIds.varName)
145
+ ? `${nsIdentifier}.${publicIds.varName}`
146
+ : `${nsIdentifier}[${JSON.stringify(publicIds.varName)}]`,
147
+ typeName: `${nsIdentifier}.${publicIds.typeName}`,
148
+ }
149
+ },
150
+ )
151
+
152
+ private getNsIdentifier(nsid: string, moduleSpecifier: string) {
153
+ const namespaceImportDeclaration =
154
+ this.file.getImportDeclaration(
155
+ (imp) =>
156
+ !imp.isTypeOnly() &&
157
+ imp.getModuleSpecifierValue() === moduleSpecifier &&
158
+ imp.getNamespaceImport() != null,
159
+ ) ||
160
+ this.file.addImportDeclaration({
161
+ moduleSpecifier,
162
+ namespaceImport: this.computeSafeNamespaceIdentifierFor(nsid),
163
+ })
164
+
165
+ return namespaceImportDeclaration.getNamespaceImport()!.getText()
166
+ }
167
+
168
+ #nsIdentifiersCounters = new Map<string, number>()
169
+ private computeSafeNamespaceIdentifierFor(nsid: string) {
170
+ const baseName = nsidToIdentifier(nsid) || 'NS'
171
+
172
+ let name = baseName
173
+ while (this.isConflictingIdentifier(name)) {
174
+ const count = this.#nsIdentifiersCounters.get(baseName) ?? 0
175
+ this.#nsIdentifiersCounters.set(baseName, count + 1)
176
+ name = `${baseName}$$${count}`
177
+ }
178
+
179
+ return name
180
+ }
181
+
182
+ private isConflictingIdentifier(name: string) {
183
+ return (
184
+ this.conflictsWithKeywords(name) ||
185
+ this.conflictsWithUtils(name) ||
186
+ this.conflictsWithLocalDefs(name) ||
187
+ this.conflictsWithLocalDeclarations(name) ||
188
+ this.conflictsWithImports(name)
189
+ )
190
+ }
191
+
192
+ private conflictsWithKeywords(name: string) {
193
+ return isReservedWord(name)
194
+ }
195
+
196
+ private conflictsWithUtils(name: string) {
197
+ // Do not allow "Main" as imported ns identifier since it has a special
198
+ // meaning in the context of lexicon definitions.
199
+ if (name === 'Main') return true
200
+
201
+ // When "useRecordExport" returns true, an export named "Record" will be
202
+ // used in addition to the hash named export. So we need to make sure both
203
+ // names are not conflicting with local variables.
204
+ if (name === 'Record') return true
205
+
206
+ // Utility functions generated for lexicon schemas are prefixed with "$"
207
+ return name.startsWith('$')
208
+ }
209
+
210
+ private conflictsWithLocalDefs(name: string) {
211
+ return Object.keys(this.doc.defs).some((hash) => {
212
+ const identifier = toCamelCase(hash)
213
+
214
+ // A safe identifier will be generated, no risk of conflict.
215
+ if (!identifier) return false
216
+
217
+ // The imported name conflicts with a local definition name
218
+ if (identifier === name || `_${identifier}` === name) return true
219
+
220
+ // The imported name conflicts with the type name of a local definition
221
+ const typeName = ucFirst(identifier)
222
+ if (typeName === name || `_${typeName}` === name) return true
223
+
224
+ return false
225
+ })
226
+ }
227
+
228
+ private conflictsWithLocalDeclarations(name: string) {
229
+ return (
230
+ this.file.getVariableDeclarations().some((v) => v.getName() === name) ||
231
+ this.file
232
+ .getVariableStatements()
233
+ .some((vs) => vs.getDeclarations().some((d) => d.getName() === name)) ||
234
+ this.file.getTypeAliases().some((t) => t.getName() === name) ||
235
+ this.file.getInterfaces().some((i) => i.getName() === name) ||
236
+ this.file.getClasses().some((c) => c.getName() === name) ||
237
+ this.file.getFunctions().some((f) => f.getName() === name) ||
238
+ this.file.getEnums().some((e) => e.getName() === name)
239
+ )
240
+ }
241
+
242
+ private conflictsWithImports(name: string) {
243
+ return this.file.getImportDeclarations().some(
244
+ (imp) =>
245
+ // import name from '...'
246
+ imp.getDefaultImport()?.getText() === name ||
247
+ // import * as name from '...'
248
+ imp.getNamespaceImport()?.getText() === name ||
249
+ imp.getNamedImports().some(
250
+ (named) =>
251
+ // import { name } from '...'
252
+ // import { foo as name } from '...'
253
+ (named.getAliasNode()?.getText() ?? named.getName()) === name,
254
+ ),
255
+ )
256
+ }
257
+ }
258
+
259
+ /**
260
+ * @see {@link https://atproto.com/specs/nsid NSID syntax spec}
261
+ */
262
+ function nsidToIdentifier(nsid: string) {
263
+ const parts = nsid.split('.')
264
+
265
+ // By default, try to keep only to the last two segments of the NSID as
266
+ // contextual information. If those do not form a safe identifier (typically
267
+ // because they start with a digit), try with more segments until we reach the
268
+ // full NSID.
269
+ for (let i = 2; i < parts.length; i++) {
270
+ const identifier = toPascalCase(parts.slice(-i).join('.'))
271
+ if (isSafeIdentifier(identifier)) return identifier
272
+ }
273
+
274
+ return undefined
275
+ }
276
+
277
+ /**
278
+ * Generates predictable public identifiers for a given definition hash.
279
+ *
280
+ * @note The returned `typeName` is guaranteed to be a valid TypeScript
281
+ * identifier. `varName` may not be a valid identifier (eg. if the hash contains
282
+ * unsafe characters), and may need to be accessed using string indexing.
283
+ */
284
+ export function getPublicIdentifiers(hash: string): ResolvedRef {
285
+ const varName = hash
286
+ // @NOTE Type names *must* be valid TypeScript identifiers (this is because,
287
+ // unlike variable names, we cannot use string indexing to access exported
288
+ // types).
289
+ const typeName = toPascalCase(hash)
290
+ if (!typeName || varName === typeName || !isSafeIdentifier(typeName)) {
291
+ return { varName, typeName: `Def${typeName}` }
292
+ }
293
+ return { varName, typeName }
294
+ }
295
+
296
+ function asSafeDefinitionIdentifier(name: string) {
297
+ if (isSafeIdentifier(name) && isSafeIdentifier(ucFirst(name))) return name
298
+ const camel = toCamelCase(name)
299
+ if (isSafeIdentifier(camel) && isSafeIdentifier(ucFirst(camel))) return camel
300
+ return undefined
301
+ }
package/src/ts-lang.ts ADDED
@@ -0,0 +1,134 @@
1
+ const RESERVED_WORDS = new Set([
2
+ // JavaScript keywords
3
+ // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar
4
+ 'abstract',
5
+ 'arguments',
6
+ 'as',
7
+ 'async',
8
+ 'await',
9
+ 'boolean',
10
+ 'break',
11
+ 'byte',
12
+ 'case',
13
+ 'catch',
14
+ 'char',
15
+ 'class',
16
+ 'const',
17
+ 'continue',
18
+ 'debugger',
19
+ 'default',
20
+ 'delete',
21
+ 'do',
22
+ 'double',
23
+ 'else',
24
+ 'enum',
25
+ 'eval',
26
+ 'export',
27
+ 'extends',
28
+ 'false',
29
+ 'final',
30
+ 'finally',
31
+ 'float',
32
+ 'for',
33
+ 'from',
34
+ 'function',
35
+ 'get',
36
+ 'goto',
37
+ 'if',
38
+ 'implements',
39
+ 'import',
40
+ 'in',
41
+ 'instanceof',
42
+ 'int',
43
+ 'interface',
44
+ 'let',
45
+ 'long',
46
+ 'native',
47
+ 'new',
48
+ 'null',
49
+ 'of',
50
+ 'package',
51
+ 'private',
52
+ 'protected',
53
+ 'public',
54
+ 'return',
55
+ 'set',
56
+ 'short',
57
+ 'static',
58
+ 'super',
59
+ 'switch',
60
+ 'synchronized',
61
+ 'this',
62
+ 'throw',
63
+ 'throws',
64
+ 'transient',
65
+ 'true',
66
+ 'try',
67
+ 'typeof',
68
+ 'undefined',
69
+ 'using',
70
+ 'var',
71
+ 'void',
72
+ 'volatile',
73
+ 'while',
74
+ 'with',
75
+ 'yield',
76
+ // Constructors and globals
77
+ 'Array',
78
+ 'Boolean',
79
+ 'Buffer',
80
+ 'Date',
81
+ 'Error',
82
+ 'Function',
83
+ 'Infinity',
84
+ 'JSON',
85
+ 'Map',
86
+ 'Math',
87
+ 'NaN',
88
+ 'Number',
89
+ 'Object',
90
+ 'Set',
91
+ 'String',
92
+ 'Symbol',
93
+ 'console',
94
+ 'document',
95
+ 'global',
96
+ 'globalThis',
97
+ 'window',
98
+ // Test globals
99
+ 'afterAll',
100
+ 'afterEach',
101
+ 'assert',
102
+ 'beforeAll',
103
+ 'beforeEach',
104
+ 'describe',
105
+ 'expect',
106
+ 'it',
107
+ 'test',
108
+ // CommonJS globals
109
+ '__dirname',
110
+ '__filename',
111
+ 'require',
112
+ 'module',
113
+ 'exports',
114
+ // TypeScript
115
+ 'Record',
116
+ 'any',
117
+ 'declare',
118
+ 'never',
119
+ 'number',
120
+ 'object',
121
+ 'string',
122
+ 'symbol',
123
+ 'unknown',
124
+ // Future reserved words
125
+ 'constructor',
126
+ 'meta',
127
+ ])
128
+ export function isReservedWord(word: string) {
129
+ return RESERVED_WORDS.has(word)
130
+ }
131
+
132
+ export function isSafeIdentifier(name: string) {
133
+ return !isReservedWord(name) && /^[a-zA-Z_$][a-zA-Z0-9_$]*$/.test(name)
134
+ }
package/src/util.ts ADDED
@@ -0,0 +1,67 @@
1
+ import { relative } from 'node:path'
2
+
3
+ export function memoize<T extends (arg: string) => NonNullable<unknown> | null>(
4
+ fn: T,
5
+ ): T {
6
+ const cache = new Map<string, NonNullable<unknown> | null>()
7
+ return ((arg: string) => {
8
+ const cached = cache.get(arg)
9
+ if (cached !== undefined) return cached
10
+ const result = fn(arg)
11
+ cache.set(arg, result)
12
+ return result
13
+ }) as T
14
+ }
15
+
16
+ export function ucFirst(str: string) {
17
+ return str.charAt(0).toUpperCase() + str.slice(1)
18
+ }
19
+
20
+ export function lcFirst(str: string) {
21
+ return str.charAt(0).toLowerCase() + str.slice(1)
22
+ }
23
+
24
+ export function toPascalCase(str: string): string {
25
+ return extractWords(str).map(toLowerCase).map(ucFirst).join('')
26
+ }
27
+
28
+ export function toCamelCase(str: string): string {
29
+ return lcFirst(toPascalCase(str))
30
+ }
31
+
32
+ export function toConstantCase(str: string): string {
33
+ return extractWords(str).map(toUpperCase).join('_')
34
+ }
35
+
36
+ export function toLowerCase(str: string): string {
37
+ return str.toLowerCase()
38
+ }
39
+
40
+ export function toUpperCase(str: string): string {
41
+ return str.toUpperCase()
42
+ }
43
+
44
+ function extractWords(str: string): string[] {
45
+ const processedStr = str
46
+ .replace(/([a-z0-9])([A-Z])/g, '$1 $2') // split camelCase
47
+ .replace(/([A-Z])([A-Z][a-z])/g, '$1 $2') // split ALLCAPSWords
48
+ .replace(/([0-9])([A-Za-z])/g, '$1 $2') // split number followed by letter
49
+ .replace(/[^a-zA-Z0-9]+/g, ' ') // replace non-alphanumeric with space
50
+ .trim() // trim leading/trailing spaces
51
+
52
+ return processedStr
53
+ ? processedStr.split(/\s+/) // split by spaces
54
+ : [] // Avoid returning [''] for empty strings
55
+ }
56
+
57
+ export function asRelativePath(from: string, to: string) {
58
+ const relPath = relative(from, to)
59
+ return relPath.startsWith('./') || relPath.startsWith('../')
60
+ ? relPath
61
+ : `./${relPath}`
62
+ }
63
+
64
+ export function startsWithDigit(str: string) {
65
+ const code = str.charCodeAt(0)
66
+ return code >= 48 && code <= 57 // '0' to '9'
67
+ }