npm - @atproto/lex-builder - Versions diffs - 0.0.0 - Mend

@atproto/lex-builder 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/dist/filter.d.ts +7 -0
package/dist/filter.d.ts.map +1 -0
package/dist/filter.js +30 -0
package/dist/filter.js.map +1 -0
package/dist/filtered-indexer.d.ts +2123 -0
package/dist/filtered-indexer.d.ts.map +1 -0
package/dist/filtered-indexer.js +56 -0
package/dist/filtered-indexer.js.map +1 -0
package/dist/formatter.d.ts +13 -0
package/dist/formatter.d.ts.map +1 -0
package/dist/formatter.js +34 -0
package/dist/formatter.js.map +1 -0
package/dist/index.d.ts +6 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +13 -0
package/dist/index.js.map +1 -0
package/dist/lex-builder.d.ts +20 -0
package/dist/lex-builder.d.ts.map +1 -0
package/dist/lex-builder.js +123 -0
package/dist/lex-builder.js.map +1 -0
package/dist/lex-def-builder.d.ts +66 -0
package/dist/lex-def-builder.d.ts.map +1 -0
package/dist/lex-def-builder.js +682 -0
package/dist/lex-def-builder.js.map +1 -0
package/dist/lexicon-directory-indexer.d.ts +11 -0
package/dist/lexicon-directory-indexer.d.ts.map +1 -0
package/dist/lexicon-directory-indexer.js +51 -0
package/dist/lexicon-directory-indexer.js.map +1 -0
package/dist/ref-resolver.d.ts +48 -0
package/dist/ref-resolver.d.ts.map +1 -0
package/dist/ref-resolver.js +246 -0
package/dist/ref-resolver.js.map +1 -0
package/dist/ts-lang.d.ts +3 -0
package/dist/ts-lang.d.ts.map +1 -0
package/dist/ts-lang.js +138 -0
package/dist/ts-lang.js.map +1 -0
package/dist/util.d.ts +11 -0
package/dist/util.d.ts.map +1 -0
package/dist/util.js +67 -0
package/dist/util.js.map +1 -0
package/package.json +49 -0
package/src/filter.ts +41 -0
package/src/filtered-indexer.test.ts +83 -0
package/src/filtered-indexer.ts +60 -0
package/src/formatter.ts +42 -0
package/src/index.ts +17 -0
package/src/lex-builder.ts +149 -0
package/src/lex-def-builder.ts +912 -0
package/src/lexicon-directory-indexer.ts +57 -0
package/src/ref-resolver.ts +301 -0
package/src/ts-lang.ts +134 -0
package/src/util.ts +67 -0

package/src/lexicon-directory-indexer.ts ADDED Viewed

@@ -0,0 +1,57 @@
+import { readFile, readdir } from 'node:fs/promises'
+import { join } from 'node:path'
+import {
+  LexiconDocument,
+  LexiconIterableIndexer,
+  lexiconDocumentSchema,
+} from '@atproto/lex-document'
+export type LexiconDirectoryIndexerOptions = ReadLexiconsOptions
+export class LexiconDirectoryIndexer extends LexiconIterableIndexer {
+  constructor(options: LexiconDirectoryIndexerOptions) {
+    super(readLexicons(options))
+  }
+}
+type ReadLexiconsOptions = {
+  lexicons: string | string[]
+  ignoreErrors?: boolean
+}
+async function* readLexicons(
+  options: ReadLexiconsOptions,
+): AsyncGenerator<LexiconDocument, void, unknown> {
+  const dirs = Array.isArray(options.lexicons)
+    ? options.lexicons
+    : [options.lexicons]
+  for (const dir of dirs) {
+    for await (const filePath of listFiles(dir)) {
+      if (filePath.endsWith('.json')) {
+        try {
+          const data = await readFile(filePath, 'utf8')
+          yield lexiconDocumentSchema.parse(JSON.parse(data))
+        } catch (cause) {
+          const message = `Error parsing lexicon document ${filePath}`
+          if (options.ignoreErrors) console.error(`${message}:`, cause)
+          else throw new Error(message, { cause })
+        }
+      }
+    }
+  }
+}
+async function* listFiles(dir: string): AsyncGenerator<string> {
+  const dirents = await readdir(dir, { withFileTypes: true }).catch((err) => {
+    if ((err as any)?.code === 'ENOENT') return []
+    throw err
+  })
+  for (const dirent of dirents) {
+    const res = join(dir, dirent.name)
+    if (dirent.isDirectory()) {
+      yield* listFiles(res)
+    } else if (dirent.isFile() || dirent.isSymbolicLink()) {
+      yield res
+    }
+  }
+}

package/src/ref-resolver.ts ADDED Viewed

@@ -0,0 +1,301 @@
+import assert from 'node:assert'
+import { join } from 'node:path'
+import { SourceFile } from 'ts-morph'
+import { LexiconDocument, LexiconIndexer } from '@atproto/lex-document'
+import { isReservedWord, isSafeIdentifier } from './ts-lang.js'
+import {
+  asRelativePath,
+  memoize,
+  toCamelCase,
+  toPascalCase,
+  ucFirst,
+} from './util.js'
+export type ResolvedRef = {
+  varName: string
+  typeName: string
+}
+/**
+ * Utility class to resolve lexicon references to TypeScript identifiers,
+ * generating "import" statements as needed.
+ */
+export class RefResolver {
+  constructor(
+    private doc: LexiconDocument,
+    private file: SourceFile,
+    private indexer: LexiconIndexer,
+  ) {}
+  public readonly resolve = memoize(
+    async (ref: string): Promise<ResolvedRef> => {
+      const [nsid, hash = 'main'] = ref.split('#')
+      if (nsid === '' || nsid === this.doc.id) {
+        return this.resolveLocal(hash)
+      } else {
+        // @NOTE: Normalize (#main fragment) to ensure proper memoization
+        const fullRef = `${nsid}#${hash}`
+        return this.resolveExternal(fullRef)
+      }
+    },
+  )
+  #defCounters = new Map<string, number>()
+  private nextSafeDefinitionIdentifier(safeIdentifier: string) {
+    const count = this.#defCounters.get(safeIdentifier) ?? 0
+    this.#defCounters.set(safeIdentifier, count + 1)
+    // @NOTE We don't need to check against local declarations in the file here
+    // since we are using a naming system that should guarantee no other
+    // identifier has a <safeIdentifier>$<number> format.
+    return `${safeIdentifier}$${count}`
+  }
+  /**
+   * @note The returned `typeName` and `varName` are *both* guaranteed to be
+   * valid TypeScript identifiers.
+   */
+  public readonly resolveLocal = memoize(
+    async (hash: string): Promise<ResolvedRef> => {
+      const hashes = Object.keys(this.doc.defs)
+      if (!hashes.includes(hash)) {
+        throw new Error(`Definition ${hash} not found in ${this.doc.id}`)
+      }
+      // Because we are using predictable "public" identifiers for type names,
+      // we need to ensure there are no conflicts between different definitions
+      // in the same lexicon document.
+      //
+      // @NOTE It should be possible to implement a way to generate
+      // non-conflicting type names for all public (type) identifiers in a
+      // project. However, this would add a lot of complexity to the code
+      // generation process, and the likelihood of such conflicts happening in
+      // practice is very low, so we opt for a simpler approach of just throwing
+      // an error if a conflict is detected.
+      const pub = getPublicIdentifiers(hash)
+      for (const otherHash of hashes) {
+        if (otherHash === hash) continue
+        const otherPub = getPublicIdentifiers(otherHash)
+        if (otherPub.typeName === pub.typeName) {
+          throw new Error(
+            `Conflicting type names for definitions #${hash} and #${otherHash} in ${this.doc.id}`,
+          )
+        }
+      }
+      // Try to keep and identifier that resembles the original hash as identifier
+      const safeIdentifier = asSafeDefinitionIdentifier(hash)
+      // If the safe identifier is not conflicting with other definition names,
+      // or reserved words, we can use it as-is. Otherwise, we need to generate
+      // a unique safe identifier.
+      const varName = safeIdentifier
+        ? !hashes.some((otherHash) => {
+            if (otherHash === hash) return false
+            const otherIdentifier = asSafeDefinitionIdentifier(otherHash)
+            return otherIdentifier === safeIdentifier
+          })
+          ? // Safe identifier can be used as-is as it does not conflict with
+            // other definition names
+            safeIdentifier
+          : // In order to keep identifiers stable, we use the safe identifier
+            // as base, and append a counter to avoid conflicts
+            this.nextSafeDefinitionIdentifier(safeIdentifier)
+        : // hash only contained unsafe characters, generate a safe one
+          this.nextSafeDefinitionIdentifier('def')
+      const typeName = ucFirst(varName)
+      assert(varName !== typeName, 'Variable and type name should be different')
+      return { varName, typeName }
+    },
+  )
+  /**
+   * @note Since this is a memoized function, and is used to generate the name
+   * of local variables, we should avoid returning different results for
+   * similar, but non strictly equal, inputs (eg. normalized / non-normalized).
+   * @see {@link resolve}
+   */
+  private readonly resolveExternal = memoize(
+    async (fullRef: string): Promise<ResolvedRef> => {
+      const [nsid, hash] = fullRef.split('#')
+      const moduleSpecifier = `${asRelativePath(
+        this.file.getDirectoryPath(),
+        join('/', ...nsid.split('.')),
+      )}.defs.js`
+      // Lets first make sure the referenced lexicon exists
+      const srcDoc = await this.indexer.get(nsid)
+      const srcDef = Object.hasOwn(srcDoc.defs, hash) ? srcDoc.defs[hash] : null
+      if (!srcDef) {
+        throw new Error(
+          `Missing def "${hash}" in "${nsid}" (referenced from ${this.doc.id})`,
+        )
+      }
+      // import * as <nsIdentifier> from './<moduleSpecifier>'
+      const nsIdentifier = this.getNsIdentifier(nsid, moduleSpecifier)
+      const publicIds = getPublicIdentifiers(hash)
+      return {
+        varName: isSafeIdentifier(publicIds.varName)
+          ? `${nsIdentifier}.${publicIds.varName}`
+          : `${nsIdentifier}[${JSON.stringify(publicIds.varName)}]`,
+        typeName: `${nsIdentifier}.${publicIds.typeName}`,
+      }
+    },
+  )
+  private getNsIdentifier(nsid: string, moduleSpecifier: string) {
+    const namespaceImportDeclaration =
+      this.file.getImportDeclaration(
+        (imp) =>
+          !imp.isTypeOnly() &&
+          imp.getModuleSpecifierValue() === moduleSpecifier &&
+          imp.getNamespaceImport() != null,
+      ) ||
+      this.file.addImportDeclaration({
+        moduleSpecifier,
+        namespaceImport: this.computeSafeNamespaceIdentifierFor(nsid),
+      })
+    return namespaceImportDeclaration.getNamespaceImport()!.getText()
+  }
+  #nsIdentifiersCounters = new Map<string, number>()
+  private computeSafeNamespaceIdentifierFor(nsid: string) {
+    const baseName = nsidToIdentifier(nsid) || 'NS'
+    let name = baseName
+    while (this.isConflictingIdentifier(name)) {
+      const count = this.#nsIdentifiersCounters.get(baseName) ?? 0
+      this.#nsIdentifiersCounters.set(baseName, count + 1)
+      name = `${baseName}$$${count}`
+    }
+    return name
+  }
+  private isConflictingIdentifier(name: string) {
+    return (
+      this.conflictsWithKeywords(name) ||
+      this.conflictsWithUtils(name) ||
+      this.conflictsWithLocalDefs(name) ||
+      this.conflictsWithLocalDeclarations(name) ||
+      this.conflictsWithImports(name)
+    )
+  }
+  private conflictsWithKeywords(name: string) {
+    return isReservedWord(name)
+  }
+  private conflictsWithUtils(name: string) {
+    // Do not allow "Main" as imported ns identifier since it has a special
+    // meaning in the context of lexicon definitions.
+    if (name === 'Main') return true
+    // When "useRecordExport" returns true, an export named "Record" will be
+    // used in addition to the hash named export. So we need to make sure both
+    // names are not conflicting with local variables.
+    if (name === 'Record') return true
+    // Utility functions generated for lexicon schemas are prefixed with "$"
+    return name.startsWith('$')
+  }
+  private conflictsWithLocalDefs(name: string) {
+    return Object.keys(this.doc.defs).some((hash) => {
+      const identifier = toCamelCase(hash)
+      // A safe identifier will be generated, no risk of conflict.
+      if (!identifier) return false
+      // The imported name conflicts with a local definition name
+      if (identifier === name || `_${identifier}` === name) return true
+      // The imported name conflicts with the type name of a local definition
+      const typeName = ucFirst(identifier)
+      if (typeName === name || `_${typeName}` === name) return true
+      return false
+    })
+  }
+  private conflictsWithLocalDeclarations(name: string) {
+    return (
+      this.file.getVariableDeclarations().some((v) => v.getName() === name) ||
+      this.file
+        .getVariableStatements()
+        .some((vs) => vs.getDeclarations().some((d) => d.getName() === name)) ||
+      this.file.getTypeAliases().some((t) => t.getName() === name) ||
+      this.file.getInterfaces().some((i) => i.getName() === name) ||
+      this.file.getClasses().some((c) => c.getName() === name) ||
+      this.file.getFunctions().some((f) => f.getName() === name) ||
+      this.file.getEnums().some((e) => e.getName() === name)
+    )
+  }
+  private conflictsWithImports(name: string) {
+    return this.file.getImportDeclarations().some(
+      (imp) =>
+        // import name from '...'
+        imp.getDefaultImport()?.getText() === name ||
+        // import * as name from '...'
+        imp.getNamespaceImport()?.getText() === name ||
+        imp.getNamedImports().some(
+          (named) =>
+            // import { name } from '...'
+            // import { foo as name } from '...'
+            (named.getAliasNode()?.getText() ?? named.getName()) === name,
+        ),
+    )
+  }
+}
+/**
+ * @see {@link https://atproto.com/specs/nsid NSID syntax spec}
+ */
+function nsidToIdentifier(nsid: string) {
+  const parts = nsid.split('.')
+  // By default, try to keep only to the last two segments of the NSID as
+  // contextual information. If those do not form a safe identifier (typically
+  // because they start with a digit), try with more segments until we reach the
+  // full NSID.
+  for (let i = 2; i < parts.length; i++) {
+    const identifier = toPascalCase(parts.slice(-i).join('.'))
+    if (isSafeIdentifier(identifier)) return identifier
+  }
+  return undefined
+}
+/**
+ * Generates predictable public identifiers for a given definition hash.
+ *
+ * @note The returned `typeName` is guaranteed to be a valid TypeScript
+ * identifier. `varName` may not be a valid identifier (eg. if the hash contains
+ * unsafe characters), and may need to be accessed using string indexing.
+ */
+export function getPublicIdentifiers(hash: string): ResolvedRef {
+  const varName = hash
+  // @NOTE Type names *must* be valid TypeScript identifiers (this is because,
+  // unlike variable names, we cannot use string indexing to access exported
+  // types).
+  const typeName = toPascalCase(hash)
+  if (!typeName || varName === typeName || !isSafeIdentifier(typeName)) {
+    return { varName, typeName: `Def${typeName}` }
+  }
+  return { varName, typeName }
+}
+function asSafeDefinitionIdentifier(name: string) {
+  if (isSafeIdentifier(name) && isSafeIdentifier(ucFirst(name))) return name
+  const camel = toCamelCase(name)
+  if (isSafeIdentifier(camel) && isSafeIdentifier(ucFirst(camel))) return camel
+  return undefined
+}

package/src/ts-lang.ts ADDED Viewed

@@ -0,0 +1,134 @@
+const RESERVED_WORDS = new Set([
+  // JavaScript keywords
+  // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar
+  'abstract',
+  'arguments',
+  'as',
+  'async',
+  'await',
+  'boolean',
+  'break',
+  'byte',
+  'case',
+  'catch',
+  'char',
+  'class',
+  'const',
+  'continue',
+  'debugger',
+  'default',
+  'delete',
+  'do',
+  'double',
+  'else',
+  'enum',
+  'eval',
+  'export',
+  'extends',
+  'false',
+  'final',
+  'finally',
+  'float',
+  'for',
+  'from',
+  'function',
+  'get',
+  'goto',
+  'if',
+  'implements',
+  'import',
+  'in',
+  'instanceof',
+  'int',
+  'interface',
+  'let',
+  'long',
+  'native',
+  'new',
+  'null',
+  'of',
+  'package',
+  'private',
+  'protected',
+  'public',
+  'return',
+  'set',
+  'short',
+  'static',
+  'super',
+  'switch',
+  'synchronized',
+  'this',
+  'throw',
+  'throws',
+  'transient',
+  'true',
+  'try',
+  'typeof',
+  'undefined',
+  'using',
+  'var',
+  'void',
+  'volatile',
+  'while',
+  'with',
+  'yield',
+  // Constructors and globals
+  'Array',
+  'Boolean',
+  'Buffer',
+  'Date',
+  'Error',
+  'Function',
+  'Infinity',
+  'JSON',
+  'Map',
+  'Math',
+  'NaN',
+  'Number',
+  'Object',
+  'Set',
+  'String',
+  'Symbol',
+  'console',
+  'document',
+  'global',
+  'globalThis',
+  'window',
+  // Test globals
+  'afterAll',
+  'afterEach',
+  'assert',
+  'beforeAll',
+  'beforeEach',
+  'describe',
+  'expect',
+  'it',
+  'test',
+  // CommonJS globals
+  '__dirname',
+  '__filename',
+  'require',
+  'module',
+  'exports',
+  // TypeScript
+  'Record',
+  'any',
+  'declare',
+  'never',
+  'number',
+  'object',
+  'string',
+  'symbol',
+  'unknown',
+  // Future reserved words
+  'constructor',
+  'meta',
+])
+export function isReservedWord(word: string) {
+  return RESERVED_WORDS.has(word)
+}
+export function isSafeIdentifier(name: string) {
+  return !isReservedWord(name) && /^[a-zA-Z_$][a-zA-Z0-9_$]*$/.test(name)
+}

package/src/util.ts ADDED Viewed

@@ -0,0 +1,67 @@
+import { relative } from 'node:path'
+export function memoize<T extends (arg: string) => NonNullable<unknown> | null>(
+  fn: T,
+): T {
+  const cache = new Map<string, NonNullable<unknown> | null>()
+  return ((arg: string) => {
+    const cached = cache.get(arg)
+    if (cached !== undefined) return cached
+    const result = fn(arg)
+    cache.set(arg, result)
+    return result
+  }) as T
+}
+export function ucFirst(str: string) {
+  return str.charAt(0).toUpperCase() + str.slice(1)
+}
+export function lcFirst(str: string) {
+  return str.charAt(0).toLowerCase() + str.slice(1)
+}
+export function toPascalCase(str: string): string {
+  return extractWords(str).map(toLowerCase).map(ucFirst).join('')
+}
+export function toCamelCase(str: string): string {
+  return lcFirst(toPascalCase(str))
+}
+export function toConstantCase(str: string): string {
+  return extractWords(str).map(toUpperCase).join('_')
+}
+export function toLowerCase(str: string): string {
+  return str.toLowerCase()
+}
+export function toUpperCase(str: string): string {
+  return str.toUpperCase()
+}
+function extractWords(str: string): string[] {
+  const processedStr = str
+    .replace(/([a-z0-9])([A-Z])/g, '$1 $2') // split camelCase
+    .replace(/([A-Z])([A-Z][a-z])/g, '$1 $2') // split ALLCAPSWords
+    .replace(/([0-9])([A-Za-z])/g, '$1 $2') // split number followed by letter
+    .replace(/[^a-zA-Z0-9]+/g, ' ') // replace non-alphanumeric with space
+    .trim() // trim leading/trailing spaces
+  return processedStr
+    ? processedStr.split(/\s+/) // split by spaces
+    : [] // Avoid returning [''] for empty strings
+}
+export function asRelativePath(from: string, to: string) {
+  const relPath = relative(from, to)
+  return relPath.startsWith('./') || relPath.startsWith('../')
+    ? relPath
+    : `./${relPath}`
+}
+export function startsWithDigit(str: string) {
+  const code = str.charCodeAt(0)
+  return code >= 48 && code <= 57 // '0' to '9'
+}