@polyglot-sql/sdk 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -462,9 +462,37 @@ const cartesian = validateWithSchema(
462
462
  );
463
463
  ```
464
464
 
465
+ ## Tokenize
466
+
467
+ Access the raw SQL token stream with full source position spans. Useful for syntax highlighting, custom linters, or editor integrations.
468
+
469
+ ```typescript
470
+ import { tokenize, Dialect } from '@polyglot-sql/sdk';
471
+
472
+ const result = tokenize('SELECT a, b FROM t', Dialect.Generic);
473
+ if (result.success) {
474
+ for (const token of result.tokens!) {
475
+ console.log(token.tokenType, token.text, token.span);
476
+ // "Select" "SELECT" { start: 0, end: 6, line: 1, column: 1 }
477
+ // "Var" "a" { start: 7, end: 8, line: 1, column: 8 }
478
+ // ...
479
+ }
480
+ }
481
+ ```
482
+
483
+ Each token includes:
484
+
485
+ | Field | Type | Description |
486
+ |-------|------|-------------|
487
+ | `tokenType` | `string` | Token type name (e.g. `"Select"`, `"Var"`, `"Comma"`) |
488
+ | `text` | `string` | Raw source text of the token |
489
+ | `span` | `SpanInfo` | Source position: `start`/`end` byte offsets, `line`/`column` (1-based) |
490
+ | `comments` | `string[]` | Leading comments attached to this token |
491
+ | `trailingComments` | `string[]` | Trailing comments attached to this token |
492
+
465
493
  ## Error Reporting
466
494
 
467
- Parse and transpile errors include source position information, making it easy to highlight errors in editors or show precise error messages.
495
+ Parse, transpile, and tokenize errors include source position information with both line/column and byte offset ranges, making it easy to highlight errors in editors or show precise error messages.
468
496
 
469
497
  ```typescript
470
498
  import { parse, transpile, Dialect } from '@polyglot-sql/sdk';
@@ -474,15 +502,19 @@ if (!result.success) {
474
502
  console.log(result.error); // "Parse error at line 1, column 11: ..."
475
503
  console.log(result.errorLine); // 1
476
504
  console.log(result.errorColumn); // 11
505
+ console.log(result.errorStart); // 10 (byte offset)
506
+ console.log(result.errorEnd); // 11 (byte offset, exclusive)
477
507
  }
478
508
  ```
479
509
 
480
- Both `ParseResult` and `TranspileResult` include optional position fields:
510
+ `ParseResult`, `TranspileResult`, and `TokenizeResult` include optional position fields:
481
511
 
482
512
  | Field | Type | Description |
483
513
  |-------|------|-------------|
484
514
  | `errorLine` | `number \| undefined` | 1-based line number where the error occurred |
485
515
  | `errorColumn` | `number \| undefined` | 1-based column number where the error occurred |
516
+ | `errorStart` | `number \| undefined` | Start byte offset of the error range (0-based) |
517
+ | `errorEnd` | `number \| undefined` | End byte offset of the error range (exclusive) |
486
518
 
487
519
  These fields are only present when `success` is `false`. On success, they are `undefined`.
488
520
 
@@ -500,7 +532,7 @@ if (!result.success) {
500
532
  Trace how columns flow through SQL queries, from source tables to the result set.
501
533
 
502
534
  ```typescript
503
- import { lineage, getSourceTables } from '@polyglot-sql/sdk';
535
+ import { lineage, lineageWithSchema, getSourceTables } from '@polyglot-sql/sdk';
504
536
 
505
537
  // Trace a column through joins, CTEs, and subqueries
506
538
  const result = lineage('total', 'SELECT o.total FROM orders o JOIN users u ON o.user_id = u.id');
@@ -509,6 +541,19 @@ if (result.success) {
509
541
  console.log(result.lineage.downstream); // source nodes
510
542
  }
511
543
 
544
+ // Schema-aware lineage (same schema format as validateWithSchema)
545
+ const schema = {
546
+ tables: [
547
+ { name: 'users', columns: [{ name: 'id', type: 'INT' }] },
548
+ { name: 'orders', columns: [{ name: 'user_id', type: 'INT' }] },
549
+ ],
550
+ };
551
+ const schemaLineage = lineageWithSchema(
552
+ 'id',
553
+ 'SELECT id FROM users u JOIN orders o ON u.id = o.user_id',
554
+ schema,
555
+ );
556
+
512
557
  // Get all source tables that contribute to a column
513
558
  const tables = getSourceTables('total', 'SELECT o.total FROM orders o JOIN users u ON o.user_id = u.id');
514
559
  if (tables.success) {
@@ -582,6 +627,7 @@ const formattedSafe = pg.formatWithOptions('SELECT a,b FROM t', Dialect.Generic,
582
627
  | `generate(ast, dialect?)` | Generate SQL from AST |
583
628
  | `format(sql, dialect?)` | Pretty-print SQL |
584
629
  | `formatWithOptions(sql, dialect?, options?)` | Pretty-print SQL with guard overrides |
630
+ | `tokenize(sql, dialect?)` | Tokenize SQL into a token stream with source spans |
585
631
  | `validate(sql, dialect?, options?)` | Validate SQL syntax/semantics |
586
632
  | `validateWithSchema(sql, schema, dialect?, options?)` | Validate against a database schema |
587
633
  | `getDialects()` | List supported dialect names |
@@ -592,6 +638,7 @@ const formattedSafe = pg.formatWithOptions('SELECT a,b FROM t', Dialect.Generic,
592
638
  | Function | Description |
593
639
  |----------|-------------|
594
640
  | `lineage(column, sql, dialect?, trimSelects?)` | Trace column lineage through a query |
641
+ | `lineageWithSchema(column, sql, schema, dialect?, trimSelects?)` | Trace lineage with schema-based qualification |
595
642
  | `getSourceTables(column, sql, dialect?)` | Get source tables for a column |
596
643
  | `diff(source, target, dialect?, options?)` | Diff two SQL statements |
597
644
  | `hasChanges(edits)` | Check if diff has non-keep edits |
@@ -733,6 +780,44 @@ For browser use without a bundler:
733
780
  </script>
734
781
  ```
735
782
 
783
+ ## CommonJS (CJS) Usage
784
+
785
+ For Node.js projects using `require()`, the SDK ships a CJS build. Since WASM cannot be loaded synchronously, you must call `init()` before using any other function:
786
+
787
+ ```javascript
788
+ const { init, transpile, parse, select, col, lit, isInitialized } = require('@polyglot-sql/sdk');
789
+
790
+ async function main() {
791
+ await init();
792
+
793
+ // Now all functions work
794
+ const result = transpile('SELECT IFNULL(a, b)', 'mysql', 'postgresql');
795
+ console.log(result.sql[0]); // SELECT COALESCE(a, b)
796
+
797
+ const parsed = parse('SELECT 1', 'generic');
798
+ console.log(parsed.success); // true
799
+
800
+ const sql = select('id', 'name').from('users')
801
+ .where(col('id').eq(lit(1)))
802
+ .toSql();
803
+ console.log(sql); // SELECT id, name FROM users WHERE id = 1
804
+ }
805
+
806
+ main();
807
+ ```
808
+
809
+ You can check initialization status with `isInitialized()`:
810
+
811
+ ```javascript
812
+ const { init, isInitialized } = require('@polyglot-sql/sdk');
813
+
814
+ console.log(isInitialized()); // false
815
+ await init();
816
+ console.log(isInitialized()); // true
817
+ ```
818
+
819
+ > **Note:** The ESM build (`import`) auto-initializes via top-level `await`, so `init()` is not required there. The CJS build requires it because `require()` is synchronous.
820
+
736
821
  ## License
737
822
 
738
823
  [MIT](../../LICENSE)