latticesql 4.0.1 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -1
- package/dist/cli.js +7015 -2417
- package/dist/index.cjs +47576 -42575
- package/dist/index.d.cts +1874 -7
- package/dist/index.d.ts +1874 -7
- package/dist/index.js +47461 -42542
- package/docs/api-reference.md +67 -4
- package/docs/architecture.md +24 -0
- package/docs/assistant.md +23 -0
- package/docs/examples/dashboard.html +284 -0
- package/docs/importing.md +118 -0
- package/docs/retrieval.md +202 -0
- package/package.json +10 -4
package/dist/index.d.cts
CHANGED
|
@@ -666,6 +666,224 @@ declare class InMemoryStateStore implements WritebackStateStore {
|
|
|
666
666
|
*/
|
|
667
667
|
declare function createSQLiteStateStore(db: Database.Database): WritebackStateStore;
|
|
668
668
|
|
|
669
|
+
/**
|
|
670
|
+
* Reranking — an optional second-stage scorer applied to the top candidates of
|
|
671
|
+
* a first-stage retrieval (vector / FTS / hybrid). A cross-encoder reranker
|
|
672
|
+
* typically lifts precision@k meaningfully over bi-encoder similarity, at the
|
|
673
|
+
* cost of one model call over the (small) candidate set.
|
|
674
|
+
*
|
|
675
|
+
* Bring your own reranker: Lattice never calls a model. The reranker is given
|
|
676
|
+
* the query and the candidate texts and returns a score per candidate; higher =
|
|
677
|
+
* more relevant. If it throws or returns nothing usable, retrieval falls back to
|
|
678
|
+
* the first-stage order — a reranker is an enhancement, never a hard dependency.
|
|
679
|
+
*/
|
|
680
|
+
/** A candidate handed to a reranker: an id and the text to score against the query. */
|
|
681
|
+
interface RerankCandidate {
|
|
682
|
+
id: string;
|
|
683
|
+
content: string;
|
|
684
|
+
}
|
|
685
|
+
/** A reranker's verdict for one candidate. */
|
|
686
|
+
interface RerankScore {
|
|
687
|
+
id: string;
|
|
688
|
+
score: number;
|
|
689
|
+
}
|
|
690
|
+
/**
|
|
691
|
+
* Rerank `candidates` for `query`, returning a score per id (higher = better).
|
|
692
|
+
* May be sync or async. Ids absent from the result keep their prior order after
|
|
693
|
+
* any that were scored.
|
|
694
|
+
*/
|
|
695
|
+
type RerankerFn = (query: string, candidates: RerankCandidate[]) => Promise<RerankScore[]> | RerankScore[];
|
|
696
|
+
/**
|
|
697
|
+
* Apply a reranker to an ordered list of items, returning a new order. Each item
|
|
698
|
+
* supplies its `id` and the `content` to rerank on. On any reranker failure (or
|
|
699
|
+
* an empty/garbage result) the original order is returned unchanged — reranking
|
|
700
|
+
* never breaks retrieval.
|
|
701
|
+
*
|
|
702
|
+
* Returns `{ order, applied }`: `order` is the reordered items, `applied` is true
|
|
703
|
+
* only when the reranker actually contributed scores. Items the reranker didn't
|
|
704
|
+
* score retain their relative first-stage order, after the scored ones.
|
|
705
|
+
*/
|
|
706
|
+
declare function applyReranker<T extends {
|
|
707
|
+
id: string;
|
|
708
|
+
content: string;
|
|
709
|
+
}>(query: string, items: T[], reranker: RerankerFn): Promise<{
|
|
710
|
+
order: T[];
|
|
711
|
+
applied: boolean;
|
|
712
|
+
scores: Map<string, number>;
|
|
713
|
+
}>;
|
|
714
|
+
|
|
715
|
+
/**
|
|
716
|
+
* Text chunking for embedding.
|
|
717
|
+
*
|
|
718
|
+
* Embedding a whole row as one vector blurs many topics into a single point,
|
|
719
|
+
* which loses precision and forces a retriever to send large units to the model.
|
|
720
|
+
* Splitting text into smaller, semantically coherent chunks — each embedded
|
|
721
|
+
* separately — raises precision@k and lets a retriever return a few small,
|
|
722
|
+
* on-point units instead of one big one (fewer tokens to a correct answer).
|
|
723
|
+
*
|
|
724
|
+
* The default `semanticChunker` is dependency-free and boundary-aware: it packs
|
|
725
|
+
* text up to a character budget, preferring to break at paragraph, then
|
|
726
|
+
* sentence, then word boundaries, with optional overlap so context that
|
|
727
|
+
* straddles a boundary is not lost. Bring your own {@link ChunkerFn} (e.g. a
|
|
728
|
+
* token-aware or code-aware splitter) when the default heuristic isn't enough.
|
|
729
|
+
*/
|
|
730
|
+
/** A single chunk of source text with its character offsets in the original. */
|
|
731
|
+
interface TextChunk {
|
|
732
|
+
/** 0-based position of this chunk in the sequence. */
|
|
733
|
+
index: number;
|
|
734
|
+
/** The chunk text (already includes any leading overlap). */
|
|
735
|
+
content: string;
|
|
736
|
+
/** Inclusive start offset in the source string. */
|
|
737
|
+
start: number;
|
|
738
|
+
/** Exclusive end offset in the source string. */
|
|
739
|
+
end: number;
|
|
740
|
+
}
|
|
741
|
+
/** Splits source text into ordered chunks. */
|
|
742
|
+
type ChunkerFn = (text: string) => TextChunk[];
|
|
743
|
+
interface SemanticChunkerOptions {
|
|
744
|
+
/** Target maximum characters per chunk. Default 1000. */
|
|
745
|
+
maxChars?: number;
|
|
746
|
+
/**
|
|
747
|
+
* Characters of trailing context to repeat at the start of the next chunk.
|
|
748
|
+
* Default 0. Must be < maxChars.
|
|
749
|
+
*/
|
|
750
|
+
overlap?: number;
|
|
751
|
+
/**
|
|
752
|
+
* Minimum chunk size — a trailing remainder smaller than this is merged into
|
|
753
|
+
* the previous chunk rather than emitted on its own. Default 0.
|
|
754
|
+
*/
|
|
755
|
+
minChars?: number;
|
|
756
|
+
}
|
|
757
|
+
/**
|
|
758
|
+
* Create a boundary-aware chunker. Empty/whitespace-only input yields no chunks.
|
|
759
|
+
* Text shorter than `maxChars` yields a single chunk.
|
|
760
|
+
*/
|
|
761
|
+
declare function semanticChunker(opts?: SemanticChunkerOptions): ChunkerFn;
|
|
762
|
+
/**
|
|
763
|
+
* Apply chunking to a piece of text using the table's config. When no chunker is
|
|
764
|
+
* configured the whole text is one chunk (index 0) — the historical behavior.
|
|
765
|
+
* A `contextPrefix` (e.g. a title or breadcrumb) is prepended to every chunk's
|
|
766
|
+
* embedded content so each chunk carries enough context to be retrieved well.
|
|
767
|
+
*/
|
|
768
|
+
declare function chunkText(text: string, chunker?: ChunkerFn, contextPrefix?: string): {
|
|
769
|
+
chunkIndex: number;
|
|
770
|
+
content: string;
|
|
771
|
+
}[];
|
|
772
|
+
|
|
773
|
+
/**
|
|
774
|
+
* Declarative computed columns + materialized rollups.
|
|
775
|
+
*
|
|
776
|
+
* **Computed columns** are stored columns derived from other columns on the same
|
|
777
|
+
* row by a pure function. They are recomputed on every write (and on a full
|
|
778
|
+
* `refreshComputedColumns`), so a consumer can index / filter / sort on a derived
|
|
779
|
+
* value without recomputing it per query, and an external edit to the rendered
|
|
780
|
+
* file can't desync it — the next write recomputes it.
|
|
781
|
+
*
|
|
782
|
+
* **Materialized rollups** are stored aggregates over a child table (e.g.
|
|
783
|
+
* `post.comment_count`). They are recomputed incrementally when the child table
|
|
784
|
+
* changes and in full via `refreshMaterializedRollups`.
|
|
785
|
+
*
|
|
786
|
+
* Both are opt-in per table and inert otherwise.
|
|
787
|
+
*/
|
|
788
|
+
|
|
789
|
+
interface ComputedColumnSpec {
|
|
790
|
+
/** Columns this value is derived from. Drives recompute-on-change + cycle check. */
|
|
791
|
+
deps: string[];
|
|
792
|
+
/** Pure derivation from the row. Receives the full (merged) row. */
|
|
793
|
+
compute: (row: Row) => unknown;
|
|
794
|
+
/** SQL column type. Default `TEXT`. */
|
|
795
|
+
type?: string;
|
|
796
|
+
}
|
|
797
|
+
type RollupFunction = 'count' | 'sum' | 'avg' | 'min' | 'max';
|
|
798
|
+
interface MaterializedRollupSpec {
|
|
799
|
+
/** Child table to aggregate. */
|
|
800
|
+
sourceTable: string;
|
|
801
|
+
/** Column on the child table that references this table's primary key. */
|
|
802
|
+
foreignKey: string;
|
|
803
|
+
/** Aggregate function. */
|
|
804
|
+
fn: RollupFunction;
|
|
805
|
+
/** Child column to aggregate (omit for `count`). */
|
|
806
|
+
column?: string;
|
|
807
|
+
/** SQL column type for the stored rollup. Default `REAL`. */
|
|
808
|
+
type?: string;
|
|
809
|
+
}
|
|
810
|
+
/**
|
|
811
|
+
* Thrown when computed columns form a dependency cycle (A→B→A), which would make
|
|
812
|
+
* the recompute order undefined. Detected once at init.
|
|
813
|
+
*/
|
|
814
|
+
declare class ComputedColumnCycleError extends Error {
|
|
815
|
+
readonly table: string;
|
|
816
|
+
readonly cycle: string[];
|
|
817
|
+
constructor(table: string, cycle: string[]);
|
|
818
|
+
}
|
|
819
|
+
/**
|
|
820
|
+
* Validate that computed columns have no dependency cycle and return a safe
|
|
821
|
+
* recompute order (dependencies before dependents). Only deps that are
|
|
822
|
+
* themselves computed columns participate in ordering; deps on plain columns are
|
|
823
|
+
* leaves. Throws {@link ComputedColumnCycleError} on a cycle.
|
|
824
|
+
*/
|
|
825
|
+
declare function computedColumnOrder(table: string, computed: Record<string, ComputedColumnSpec>): string[];
|
|
826
|
+
/**
|
|
827
|
+
* Compute the values for `computed` columns from a (full) row, in dependency
|
|
828
|
+
* order, mutating the working row so later computed columns can read earlier
|
|
829
|
+
* ones. Returns a map of computed column → value.
|
|
830
|
+
*/
|
|
831
|
+
declare function computeColumns(computed: Record<string, ComputedColumnSpec>, order: string[], row: Row): Record<string, unknown>;
|
|
832
|
+
/** The DDL column spec map computed columns contribute. */
|
|
833
|
+
declare function computedColumnDdl(computed: Record<string, ComputedColumnSpec>): Record<string, string>;
|
|
834
|
+
/** The DDL column spec map materialized rollups contribute. */
|
|
835
|
+
declare function rollupColumnDdl(rollups: Record<string, MaterializedRollupSpec>): Record<string, string>;
|
|
836
|
+
/** The set of all dep columns across computed specs (for change detection). */
|
|
837
|
+
declare function allComputedDeps(computed: Record<string, ComputedColumnSpec>): Set<string>;
|
|
838
|
+
|
|
839
|
+
/**
|
|
840
|
+
* Data governance primitives — provenance and trust.
|
|
841
|
+
*
|
|
842
|
+
* **Provenance** records, immutably, where a row came from: how it was ingested,
|
|
843
|
+
* the source URI, and when. Stamped at creation and frozen — an update that tries
|
|
844
|
+
* to change a provenance column fails loudly, so the lineage a risk/compliance
|
|
845
|
+
* reviewer signs off on can't be quietly rewritten.
|
|
846
|
+
*
|
|
847
|
+
* **Trust** gates untrusted ingest: a table opted into trust gives every new row a
|
|
848
|
+
* `_trust_state` (default `unverified`), and a verification workflow
|
|
849
|
+
* (`markRowForReview` / `verifyRow`) moves rows to `needs_review` / `verified`.
|
|
850
|
+
* Downstream consumers can filter to verified rows only.
|
|
851
|
+
*
|
|
852
|
+
* Both are opt-in per table and add no overhead to tables that don't use them.
|
|
853
|
+
*/
|
|
854
|
+
type ProvenanceField = 'ingested_via' | 'source_uri' | 'ingested_at';
|
|
855
|
+
interface ProvenanceConfig {
|
|
856
|
+
/**
|
|
857
|
+
* Which immutable provenance columns to add and stamp. Defaults to all three
|
|
858
|
+
* (`ingested_via`, `source_uri`, `ingested_at`). `ingested_at` is auto-stamped
|
|
859
|
+
* on insert when not supplied.
|
|
860
|
+
*/
|
|
861
|
+
fields?: ProvenanceField[];
|
|
862
|
+
}
|
|
863
|
+
type TrustState = 'unverified' | 'needs_review' | 'verified';
|
|
864
|
+
interface TrustConfig {
|
|
865
|
+
/** State assigned to a row on insert. Default `'unverified'`. */
|
|
866
|
+
defaultState?: TrustState;
|
|
867
|
+
}
|
|
868
|
+
declare const ALL_PROVENANCE_FIELDS: readonly ProvenanceField[];
|
|
869
|
+
/** Trust bookkeeping columns (internal-prefixed-ish, opt-in per table). */
|
|
870
|
+
declare const TRUST_COLUMNS: Record<string, string>;
|
|
871
|
+
/** Resolve a `provenance` config (boolean | object) to its column list. */
|
|
872
|
+
declare function resolveProvenanceFields(config: boolean | ProvenanceConfig | undefined): ProvenanceField[];
|
|
873
|
+
/** The DDL column spec map a provenance config contributes. */
|
|
874
|
+
declare function provenanceColumns(config: boolean | ProvenanceConfig | undefined): Record<string, string>;
|
|
875
|
+
/** Resolve a `trust` config (boolean | object) to its default state. */
|
|
876
|
+
declare function resolveTrustDefault(config: boolean | TrustConfig | undefined): TrustState | null;
|
|
877
|
+
/**
|
|
878
|
+
* Thrown when an update tries to change an immutable provenance column. Lineage
|
|
879
|
+
* is creation-time only; surfacing this loudly prevents silent provenance drift.
|
|
880
|
+
*/
|
|
881
|
+
declare class ProvenanceImmutableError extends Error {
|
|
882
|
+
readonly table: string;
|
|
883
|
+
readonly column: string;
|
|
884
|
+
constructor(table: string, column: string);
|
|
885
|
+
}
|
|
886
|
+
|
|
669
887
|
type Row = Record<string, unknown>;
|
|
670
888
|
|
|
671
889
|
interface LatticeOptions {
|
|
@@ -711,6 +929,14 @@ interface LatticeOptions {
|
|
|
711
929
|
* ...")` on violation, so callers can catch it.
|
|
712
930
|
*/
|
|
713
931
|
maxRowBytes?: number;
|
|
932
|
+
/**
|
|
933
|
+
* Default bounded-read cap for `query()` (4.1+). When set, a `query()` with no
|
|
934
|
+
* explicit `limit` and no per-call `maxRows` returns at most this many rows and
|
|
935
|
+
* throws `BoundedReadError` if more match — a guardrail against an accidental
|
|
936
|
+
* unbounded full-table load on a hot path. A per-call `maxRows` or an explicit
|
|
937
|
+
* `limit` overrides it. Off by default (unbounded, preserving prior behavior).
|
|
938
|
+
*/
|
|
939
|
+
defaultMaxRows?: number;
|
|
714
940
|
}
|
|
715
941
|
/**
|
|
716
942
|
* Retention policy for the change log.
|
|
@@ -855,7 +1081,41 @@ interface Filter {
|
|
|
855
1081
|
* For `in`, must be an array.
|
|
856
1082
|
*/
|
|
857
1083
|
val?: unknown;
|
|
1084
|
+
/**
|
|
1085
|
+
* Extract a value from a JSON/JSONB column before comparing. A string like
|
|
1086
|
+
* `'a.b'` or an array `['a', 'b']` addresses a nested key. Compiles to
|
|
1087
|
+
* SQLite `json_extract(col, '$.a.b')` and Postgres `col #>> '{a,b}'`.
|
|
1088
|
+
*
|
|
1089
|
+
* @example
|
|
1090
|
+
* ```ts
|
|
1091
|
+
* { col: 'metadata_json', jsonPath: 'priority', op: 'gte', val: 3 }
|
|
1092
|
+
* { col: 'data', jsonPath: ['address', 'city'], op: 'eq', val: 'NYC' }
|
|
1093
|
+
* ```
|
|
1094
|
+
*/
|
|
1095
|
+
jsonPath?: string | string[];
|
|
1096
|
+
}
|
|
1097
|
+
/** An OR group of filter expressions (any may match). */
|
|
1098
|
+
interface FilterOr {
|
|
1099
|
+
or: FilterExpr[];
|
|
1100
|
+
}
|
|
1101
|
+
/** An AND group of filter expressions (all must match). */
|
|
1102
|
+
interface FilterAnd {
|
|
1103
|
+
and: FilterExpr[];
|
|
858
1104
|
}
|
|
1105
|
+
/**
|
|
1106
|
+
* A filter expression: a single {@link Filter} clause, or a recursive `or` / `and`
|
|
1107
|
+
* group of expressions. A bare `Filter` (the pre-4.1 shape) is still a valid
|
|
1108
|
+
* `FilterExpr`, so existing `filters: Filter[]` usage is unchanged.
|
|
1109
|
+
*
|
|
1110
|
+
* @example
|
|
1111
|
+
* ```ts
|
|
1112
|
+
* filters: [
|
|
1113
|
+
* { col: 'status', op: 'eq', val: 'open' },
|
|
1114
|
+
* { or: [ { col: 'priority', op: 'gte', val: 3 }, { col: 'pinned', op: 'eq', val: true } ] },
|
|
1115
|
+
* ]
|
|
1116
|
+
* ```
|
|
1117
|
+
*/
|
|
1118
|
+
type FilterExpr = Filter | FilterOr | FilterAnd;
|
|
859
1119
|
/**
|
|
860
1120
|
* Names of the four built-in render templates.
|
|
861
1121
|
*
|
|
@@ -1128,6 +1388,36 @@ interface TableDefinition {
|
|
|
1128
1388
|
encrypted?: boolean | {
|
|
1129
1389
|
columns: string[];
|
|
1130
1390
|
};
|
|
1391
|
+
/**
|
|
1392
|
+
* Record immutable provenance for each row (4.1+). `true` adds three columns —
|
|
1393
|
+
* `ingested_via`, `source_uri`, `ingested_at` — or pass a {@link ProvenanceConfig}
|
|
1394
|
+
* to choose a subset. `ingested_at` is auto-stamped on insert; an `update()`
|
|
1395
|
+
* that tries to change any provenance column throws `ProvenanceImmutableError`,
|
|
1396
|
+
* so lineage can't be silently rewritten. Tables without this config are
|
|
1397
|
+
* unaffected.
|
|
1398
|
+
*/
|
|
1399
|
+
provenance?: boolean | ProvenanceConfig;
|
|
1400
|
+
/**
|
|
1401
|
+
* Gate untrusted ingest with a per-row trust state (4.1+). `true` (or a
|
|
1402
|
+
* {@link TrustConfig}) adds `_trust_state` (default `'unverified'`) plus
|
|
1403
|
+
* `_verified_by` / `_verified_at` / `_review_reason`, and enables the
|
|
1404
|
+
* verification workflow (`markRowForReview` / `verifyRow` / `rowsNeedingReview`
|
|
1405
|
+
* / `verifiedRows`). Tables without this config are unaffected.
|
|
1406
|
+
*/
|
|
1407
|
+
trust?: boolean | TrustConfig;
|
|
1408
|
+
/**
|
|
1409
|
+
* Declarative computed columns (4.1+) — stored columns derived from other
|
|
1410
|
+
* columns on the same row by a pure function, recomputed on every write (and
|
|
1411
|
+
* via `refreshComputedColumns`). Lets you index / filter / sort on a derived
|
|
1412
|
+
* value. A dependency cycle is rejected at init. See {@link ComputedColumnSpec}.
|
|
1413
|
+
*/
|
|
1414
|
+
computed?: Record<string, ComputedColumnSpec>;
|
|
1415
|
+
/**
|
|
1416
|
+
* Materialized rollups (4.1+) — stored aggregates over a child table (e.g.
|
|
1417
|
+
* `comment_count`), recomputed when the child changes and via
|
|
1418
|
+
* `refreshMaterializedRollups`. See {@link MaterializedRollupSpec}.
|
|
1419
|
+
*/
|
|
1420
|
+
materializedRollups?: Record<string, MaterializedRollupSpec>;
|
|
1131
1421
|
}
|
|
1132
1422
|
interface MultiTableDefinition {
|
|
1133
1423
|
/** Returns the "anchor" entities — one output file is produced per anchor */
|
|
@@ -1157,6 +1447,37 @@ interface EmbeddingsConfig {
|
|
|
1157
1447
|
* Bring your own model — Lattice does not bundle an embedding provider.
|
|
1158
1448
|
*/
|
|
1159
1449
|
embed: (text: string) => Promise<number[]>;
|
|
1450
|
+
/**
|
|
1451
|
+
* Optional text splitter. When set, each row's concatenated text is split
|
|
1452
|
+
* into chunks and every chunk is embedded separately, so semantic search
|
|
1453
|
+
* matches the most relevant *part* of a row rather than the blurred average
|
|
1454
|
+
* of the whole. Omit for the historical whole-row behavior (one vector/row).
|
|
1455
|
+
* See `semanticChunker` for a dependency-free boundary-aware default.
|
|
1456
|
+
*/
|
|
1457
|
+
chunker?: ChunkerFn;
|
|
1458
|
+
/**
|
|
1459
|
+
* Optional per-row context prefix prepended to every chunk before embedding
|
|
1460
|
+
* (e.g. a title or breadcrumb), so each chunk carries enough context to be
|
|
1461
|
+
* retrieved well on its own. Receives the full row.
|
|
1462
|
+
*/
|
|
1463
|
+
contextPrefix?: (row: Row) => string;
|
|
1464
|
+
/**
|
|
1465
|
+
* Optional identifier of the embedding model, stored alongside each vector.
|
|
1466
|
+
* Lets `refreshEmbeddings` detect and re-embed rows produced by a different
|
|
1467
|
+
* model, and lets the doctor report mixed-model coverage. Purely advisory —
|
|
1468
|
+
* Lattice never calls a model itself.
|
|
1469
|
+
*/
|
|
1470
|
+
modelId?: string;
|
|
1471
|
+
/**
|
|
1472
|
+
* Optional opt-in cap on the no-index fallback scan. When the in-process cosine
|
|
1473
|
+
* scan (used only when no native vector index exists for a table) would read
|
|
1474
|
+
* more than this many stored chunk vectors, `searchByEmbedding` throws
|
|
1475
|
+
* `EmbeddingScanTooLargeError` instead of loading them all into memory. OFF by
|
|
1476
|
+
* default (unbounded scan — historical behavior). Lattice never silently
|
|
1477
|
+
* truncates the scan: a partial cosine scan returns incomplete, wrong results,
|
|
1478
|
+
* so it fails loudly and tells you to add a pgvector index or raise the cap.
|
|
1479
|
+
*/
|
|
1480
|
+
maxScanChunks?: number;
|
|
1160
1481
|
}
|
|
1161
1482
|
/**
|
|
1162
1483
|
* Options for `Lattice.search()`.
|
|
@@ -1169,6 +1490,20 @@ interface SearchOptions {
|
|
|
1169
1490
|
* score are excluded. Default: 0.
|
|
1170
1491
|
*/
|
|
1171
1492
|
minScore?: number;
|
|
1493
|
+
/**
|
|
1494
|
+
* Optional second-stage reranker applied to the retrieved candidates before
|
|
1495
|
+
* the top-K is returned. A cross-encoder reranker typically lifts precision
|
|
1496
|
+
* over raw similarity. Bring your own — Lattice never calls a model. If it
|
|
1497
|
+
* throws or returns nothing usable, the original similarity order is kept
|
|
1498
|
+
* (graceful fallback). To rerank a larger pool than `topK`, set
|
|
1499
|
+
* `rerankPoolSize`.
|
|
1500
|
+
*/
|
|
1501
|
+
reranker?: RerankerFn;
|
|
1502
|
+
/**
|
|
1503
|
+
* Number of candidates to retrieve and hand to the `reranker` before slicing
|
|
1504
|
+
* to `topK`. Defaults to `max(topK * 4, 20)`. Ignored when no reranker is set.
|
|
1505
|
+
*/
|
|
1506
|
+
rerankPoolSize?: number;
|
|
1172
1507
|
}
|
|
1173
1508
|
/**
|
|
1174
1509
|
* A single search result returned by `Lattice.search()`.
|
|
@@ -1178,6 +1513,17 @@ interface SearchResult {
|
|
|
1178
1513
|
row: Row;
|
|
1179
1514
|
/** Cosine similarity score (0–1). */
|
|
1180
1515
|
score: number;
|
|
1516
|
+
/**
|
|
1517
|
+
* For a chunked embedding, the index of the chunk that produced the best
|
|
1518
|
+
* score for this row. Absent for whole-row (unchunked) embeddings.
|
|
1519
|
+
*/
|
|
1520
|
+
chunkIndex?: number;
|
|
1521
|
+
/**
|
|
1522
|
+
* For a chunked embedding, the text of the best-matching chunk — useful as a
|
|
1523
|
+
* precise, low-token snippet to hand to a model. Absent for whole-row
|
|
1524
|
+
* embeddings or when chunk content was not stored.
|
|
1525
|
+
*/
|
|
1526
|
+
matchedContent?: string;
|
|
1181
1527
|
}
|
|
1182
1528
|
/**
|
|
1183
1529
|
* Dimension scores passed to `Lattice.reward()`.
|
|
@@ -1264,6 +1610,19 @@ interface WritebackDefinition {
|
|
|
1264
1610
|
*/
|
|
1265
1611
|
onReject?: (entry: unknown, result: WritebackValidationResult) => void;
|
|
1266
1612
|
}
|
|
1613
|
+
/**
|
|
1614
|
+
* Column projection for a query — return only the columns you need, so wide
|
|
1615
|
+
* tables don't transfer (or decrypt) columns the caller will discard.
|
|
1616
|
+
*
|
|
1617
|
+
* - `string[]` — include exactly these columns.
|
|
1618
|
+
* - `{ include }` — include exactly these columns.
|
|
1619
|
+
* - `{ exclude }` — return all columns except these.
|
|
1620
|
+
*/
|
|
1621
|
+
type QueryProjection = string[] | {
|
|
1622
|
+
include: string[];
|
|
1623
|
+
} | {
|
|
1624
|
+
exclude: string[];
|
|
1625
|
+
};
|
|
1267
1626
|
interface QueryOptions {
|
|
1268
1627
|
/**
|
|
1269
1628
|
* Equality filters — shorthand for `filters: [{ col, op: 'eq', val }]`.
|
|
@@ -1271,30 +1630,123 @@ interface QueryOptions {
|
|
|
1271
1630
|
*/
|
|
1272
1631
|
where?: Record<string, unknown>;
|
|
1273
1632
|
/**
|
|
1274
|
-
* Advanced filter clauses with full operator support.
|
|
1275
|
-
*
|
|
1633
|
+
* Advanced filter clauses with full operator support. May include recursive
|
|
1634
|
+
* `or` / `and` groups (4.1+) and per-clause `jsonPath` extraction. Combined
|
|
1635
|
+
* with `where` using AND.
|
|
1276
1636
|
*
|
|
1277
1637
|
* @example
|
|
1278
1638
|
* ```ts
|
|
1279
1639
|
* filters: [
|
|
1280
1640
|
* { col: 'priority', op: 'gte', val: 3 },
|
|
1281
1641
|
* { col: 'deleted_at', op: 'isNull' },
|
|
1282
|
-
* { col: 'tag',
|
|
1642
|
+
* { or: [ { col: 'tag', op: 'eq', val: 'bug' }, { col: 'tag', op: 'eq', val: 'feature' } ] },
|
|
1283
1643
|
* ]
|
|
1284
1644
|
* ```
|
|
1285
1645
|
*/
|
|
1286
|
-
filters?:
|
|
1646
|
+
filters?: FilterExpr[];
|
|
1287
1647
|
orderBy?: string;
|
|
1288
1648
|
orderDir?: 'asc' | 'desc';
|
|
1289
1649
|
limit?: number;
|
|
1290
1650
|
offset?: number;
|
|
1651
|
+
/**
|
|
1652
|
+
* Return only these columns (4.1+). See {@link QueryProjection}. Omitted
|
|
1653
|
+
* columns are never transferred or decrypted.
|
|
1654
|
+
*/
|
|
1655
|
+
projection?: QueryProjection;
|
|
1656
|
+
/**
|
|
1657
|
+
* Bounded-read cap (4.1+). When set and no explicit `limit` is given, the
|
|
1658
|
+
* query reads at most `maxRows` rows and **throws `BoundedReadError`** if more
|
|
1659
|
+
* exist — forcing the caller to paginate rather than silently loading an
|
|
1660
|
+
* unbounded result set. Overrides `LatticeOptions.defaultMaxRows`. An explicit
|
|
1661
|
+
* `limit` opts out (the caller has bounded the read themselves).
|
|
1662
|
+
*/
|
|
1663
|
+
maxRows?: number;
|
|
1664
|
+
/**
|
|
1665
|
+
* Return one row per distinct value of these column(s) (4.1+). Compiles to
|
|
1666
|
+
* Postgres `DISTINCT ON (...)` and an emulated SQLite `ROW_NUMBER()` window.
|
|
1667
|
+
* Which row survives per group is determined by `orderBy`/`orderDir` (then the
|
|
1668
|
+
* primary key as a deterministic tiebreak).
|
|
1669
|
+
*/
|
|
1670
|
+
distinctOn?: string | string[];
|
|
1671
|
+
/**
|
|
1672
|
+
* Expand declared relations on each returned row (4.1+). Each name must be a
|
|
1673
|
+
* key of the table's `relations`. A `belongsTo` relation attaches the single
|
|
1674
|
+
* related row (or null); a `hasMany` relation attaches an array. Related rows
|
|
1675
|
+
* are fetched in ONE batched `IN (...)` query per relation — no N+1.
|
|
1676
|
+
*/
|
|
1677
|
+
include?: string[];
|
|
1678
|
+
}
|
|
1679
|
+
interface QueryPageOptions {
|
|
1680
|
+
/** Equality filters (same as QueryOptions.where). */
|
|
1681
|
+
where?: Record<string, unknown>;
|
|
1682
|
+
/** Advanced filters (same as QueryOptions.filters). */
|
|
1683
|
+
filters?: FilterExpr[];
|
|
1684
|
+
/** Sort column the cursor walks. Defaults to the primary key. */
|
|
1685
|
+
orderBy?: string;
|
|
1686
|
+
orderDir?: 'asc' | 'desc';
|
|
1687
|
+
/** Page size. Default 50. */
|
|
1688
|
+
limit?: number;
|
|
1689
|
+
/** Opaque cursor from a prior page's `nextCursor`. Omit for the first page. */
|
|
1690
|
+
cursor?: string;
|
|
1691
|
+
/** Return only these columns (see {@link QueryProjection}). */
|
|
1692
|
+
projection?: QueryProjection;
|
|
1693
|
+
}
|
|
1694
|
+
interface QueryPageResult {
|
|
1695
|
+
/** The page of rows. */
|
|
1696
|
+
rows: Row[];
|
|
1697
|
+
/** Opaque cursor for the next page, or null when this is the last page. */
|
|
1698
|
+
nextCursor: string | null;
|
|
1699
|
+
/** Whether more rows exist beyond this page. */
|
|
1700
|
+
hasMore: boolean;
|
|
1291
1701
|
}
|
|
1292
1702
|
interface CountOptions {
|
|
1293
1703
|
/** Equality filters (same as QueryOptions.where) */
|
|
1294
1704
|
where?: Record<string, unknown>;
|
|
1295
1705
|
/** Advanced filter clauses (same as QueryOptions.filters) */
|
|
1296
|
-
filters?:
|
|
1706
|
+
filters?: FilterExpr[];
|
|
1707
|
+
}
|
|
1708
|
+
/** SQL aggregate function. */
|
|
1709
|
+
type AggregateFunction = 'count' | 'sum' | 'avg' | 'min' | 'max';
|
|
1710
|
+
/** One aggregate column in an {@link AggregateOptions}. */
|
|
1711
|
+
interface AggregateSpec {
|
|
1712
|
+
/** The aggregate function to apply. */
|
|
1713
|
+
fn: AggregateFunction;
|
|
1714
|
+
/**
|
|
1715
|
+
* Column to aggregate. Omit for `count` to mean `COUNT(*)`. Required for
|
|
1716
|
+
* `sum`/`avg`/`min`/`max`.
|
|
1717
|
+
*/
|
|
1718
|
+
col?: string;
|
|
1719
|
+
/** Output key for this aggregate in each result row. */
|
|
1720
|
+
as: string;
|
|
1721
|
+
/** Apply `DISTINCT` inside the aggregate (e.g. `COUNT(DISTINCT col)`). */
|
|
1722
|
+
distinct?: boolean;
|
|
1723
|
+
}
|
|
1724
|
+
/** A HAVING clause on an aggregate output (post-grouping filter). */
|
|
1725
|
+
interface AggregateHaving {
|
|
1726
|
+
/** The `as` key of an aggregate in the same query. */
|
|
1727
|
+
aggregate: string;
|
|
1728
|
+
op: FilterOp;
|
|
1729
|
+
val?: unknown;
|
|
1297
1730
|
}
|
|
1731
|
+
interface AggregateOptions {
|
|
1732
|
+
/** Columns to GROUP BY. Omit for a single grand-total row. */
|
|
1733
|
+
groupBy?: string[];
|
|
1734
|
+
/** The aggregate columns to compute (at least one). */
|
|
1735
|
+
aggregates: AggregateSpec[];
|
|
1736
|
+
/** Row-level equality filters applied before grouping. */
|
|
1737
|
+
where?: Record<string, unknown>;
|
|
1738
|
+
/** Row-level advanced filters applied before grouping. */
|
|
1739
|
+
filters?: FilterExpr[];
|
|
1740
|
+
/** Post-grouping filters on aggregate outputs. */
|
|
1741
|
+
having?: AggregateHaving[];
|
|
1742
|
+
/** Order the grouped rows by a groupBy column or an aggregate `as` key. */
|
|
1743
|
+
orderBy?: string;
|
|
1744
|
+
orderDir?: 'asc' | 'desc';
|
|
1745
|
+
/** Max grouped rows to return. */
|
|
1746
|
+
limit?: number;
|
|
1747
|
+
}
|
|
1748
|
+
/** One row of {@link Lattice.aggregate} output: groupBy columns + aggregate keys. */
|
|
1749
|
+
type AggregateResult = Record<string, unknown>;
|
|
1298
1750
|
interface InitOptions {
|
|
1299
1751
|
migrations?: Migration[];
|
|
1300
1752
|
/**
|
|
@@ -2041,6 +2493,704 @@ declare class ProgressThrottle {
|
|
|
2041
2493
|
force(event: RenderProgress): void;
|
|
2042
2494
|
}
|
|
2043
2495
|
|
|
2496
|
+
/** Internal table that stores one embedding vector per (table, row, chunk). */
|
|
2497
|
+
declare const EMBEDDINGS_TABLE = "_lattice_embeddings";
|
|
2498
|
+
/**
|
|
2499
|
+
* Ensure the internal embeddings storage table exists with the chunk-aware
|
|
2500
|
+
* schema, migrating an older two-key (table_name, row_pk) layout forward.
|
|
2501
|
+
*
|
|
2502
|
+
* The embeddings table is a DERIVED cache — every vector can be recomputed from
|
|
2503
|
+
* its source row — so when an older schema is detected it is rebuilt rather than
|
|
2504
|
+
* preserved bit-for-bit. The migration is idempotent: once `chunk_index` exists
|
|
2505
|
+
* the function is a no-op.
|
|
2506
|
+
*/
|
|
2507
|
+
declare function ensureEmbeddingsTable(adapter: StorageAdapter): Promise<void>;
|
|
2508
|
+
/** Concatenate the configured fields of a row into a single embeddable string. */
|
|
2509
|
+
declare function concatRowText(row: Row, fields: string[]): string;
|
|
2510
|
+
/**
|
|
2511
|
+
* Compute and store the embedding(s) for a row. When the config supplies a
|
|
2512
|
+
* `chunker`, the row text is split and each chunk is embedded + stored under its
|
|
2513
|
+
* own `chunk_index`; otherwise the whole text is one chunk (index 0). The row's
|
|
2514
|
+
* prior chunks are replaced atomically-per-row (delete then insert).
|
|
2515
|
+
*/
|
|
2516
|
+
declare function storeEmbedding(adapter: StorageAdapter, table: string, pk: string, row: Row, config: EmbeddingsConfig): Promise<void>;
|
|
2517
|
+
/** Remove all stored embedding chunks for a row. */
|
|
2518
|
+
declare function removeEmbedding(adapter: StorageAdapter, table: string, pk: string): Promise<void>;
|
|
2519
|
+
/** Cosine similarity between two vectors. */
|
|
2520
|
+
declare function cosineSimilarity(a: number[], b: number[]): number;
|
|
2521
|
+
/**
|
|
2522
|
+
* Error thrown when a stored vector's dimensionality does not match the query
|
|
2523
|
+
* vector's — almost always a sign the embedding model changed without a
|
|
2524
|
+
* re-embed. Surfaced loudly rather than silently scoring mismatched vectors.
|
|
2525
|
+
*/
|
|
2526
|
+
declare class EmbeddingDimensionMismatchError extends Error {
|
|
2527
|
+
readonly table: string;
|
|
2528
|
+
readonly expected: number;
|
|
2529
|
+
readonly found: number;
|
|
2530
|
+
constructor(table: string, expected: number, found: number);
|
|
2531
|
+
}
|
|
2532
|
+
/**
|
|
2533
|
+
* Thrown by `searchByEmbedding` when the no-index fallback cosine scan would read
|
|
2534
|
+
* more stored chunk vectors than the configured `maxScanChunks`. The scan is
|
|
2535
|
+
* never silently truncated (that would return incomplete, wrong results) — it
|
|
2536
|
+
* fails loudly so the caller adds a native vector index or raises the cap.
|
|
2537
|
+
*/
|
|
2538
|
+
declare class EmbeddingScanTooLargeError extends Error {
|
|
2539
|
+
readonly table: string;
|
|
2540
|
+
readonly found: number;
|
|
2541
|
+
readonly limit: number;
|
|
2542
|
+
constructor(table: string, found: number, limit: number);
|
|
2543
|
+
}
|
|
2544
|
+
/**
|
|
2545
|
+
* Search rows by semantic similarity. Uses a native vector index (pgvector) when
|
|
2546
|
+
* one exists for the table; otherwise an in-process cosine scan over the stored
|
|
2547
|
+
* chunk vectors. Either way results respect `deleted_at IS NULL` on the base
|
|
2548
|
+
* table and are de-duplicated to the best-scoring chunk per row.
|
|
2549
|
+
*/
|
|
2550
|
+
declare function searchByEmbedding(adapter: StorageAdapter, table: string, queryText: string, config: EmbeddingsConfig, topK: number, minScore: number, pkColumn?: string): Promise<SearchResult[]>;
|
|
2551
|
+
interface RefreshEmbeddingsOptions {
|
|
2552
|
+
/** Only re-embed rows whose stored model differs from `config.modelId`. */
|
|
2553
|
+
staleModelOnly?: boolean;
|
|
2554
|
+
/** Embed rows that have no stored embedding. Default true. */
|
|
2555
|
+
backfillMissing?: boolean;
|
|
2556
|
+
/** Re-embed rows whose source changed since `embedded_at` (caller decides via `changedSince`). */
|
|
2557
|
+
changedSince?: string;
|
|
2558
|
+
/** Page size for the base-table scan. Default 500. */
|
|
2559
|
+
batchSize?: number;
|
|
2560
|
+
}
|
|
2561
|
+
interface EmbeddingRefreshResult {
|
|
2562
|
+
/** Rows that were (re-)embedded. */
|
|
2563
|
+
embedded: number;
|
|
2564
|
+
/** Rows skipped because they were already current. */
|
|
2565
|
+
skipped: number;
|
|
2566
|
+
/** Orphaned embeddings removed (their source row no longer exists). */
|
|
2567
|
+
removed: number;
|
|
2568
|
+
}
|
|
2569
|
+
/**
|
|
2570
|
+
* Backfill / re-embed a table's vectors incrementally — embed only what's
|
|
2571
|
+
* missing or stale, rather than re-embedding everything. Honors `deleted_at`
|
|
2572
|
+
* and sweeps embeddings whose source row is gone.
|
|
2573
|
+
*/
|
|
2574
|
+
declare function refreshEmbeddings(adapter: StorageAdapter, table: string, config: EmbeddingsConfig, pkColumn?: string, opts?: RefreshEmbeddingsOptions): Promise<EmbeddingRefreshResult>;
|
|
2575
|
+
|
|
2576
|
+
/**
|
|
2577
|
+
* Ranking signals — lightweight, deterministic boosts applied to a retrieval
|
|
2578
|
+
* score from columns already on the row, no model required. Pure relevance
|
|
2579
|
+
* (vector/FTS/hybrid) ignores how fresh, how rewarded, or how referenced a row
|
|
2580
|
+
* is; these signals fold that business context back into the ranking.
|
|
2581
|
+
*
|
|
2582
|
+
* Each signal yields a value in [0, 1]; the combined boost is a weighted sum,
|
|
2583
|
+
* and the caller multiplies the base score by `(1 + boost)` so a strong signal
|
|
2584
|
+
* lifts an already-relevant row without drowning relevance entirely.
|
|
2585
|
+
*/
|
|
2586
|
+
|
|
2587
|
+
/** Exponential recency decay: 1.0 at age 0, 0.5 at one half-life, → 0 with age. */
|
|
2588
|
+
interface RecencySignal {
|
|
2589
|
+
/** Timestamp column (ISO-8601 string or epoch ms). */
|
|
2590
|
+
column: string;
|
|
2591
|
+
/** Half-life in days — the age at which the boost halves. */
|
|
2592
|
+
halfLifeDays: number;
|
|
2593
|
+
/** Weight of this signal in the combined boost. */
|
|
2594
|
+
weight: number;
|
|
2595
|
+
}
|
|
2596
|
+
/** Reward signal: saturating boost from a `_reward_total`-style column. */
|
|
2597
|
+
interface RewardSignal {
|
|
2598
|
+
/** Column holding the cumulative reward. Default `_reward_total`. */
|
|
2599
|
+
column?: string;
|
|
2600
|
+
/** Weight of this signal in the combined boost. */
|
|
2601
|
+
weight: number;
|
|
2602
|
+
}
|
|
2603
|
+
/**
|
|
2604
|
+
* Backlink signal: a saturating boost from an inbound-reference-count column. A
|
|
2605
|
+
* row referenced by many others is more central/authoritative, so it ranks higher.
|
|
2606
|
+
*/
|
|
2607
|
+
interface BacklinkSignal {
|
|
2608
|
+
/** Column holding the inbound reference count. Default `_backlink_count`. */
|
|
2609
|
+
column?: string;
|
|
2610
|
+
/** Weight of this signal in the combined boost. */
|
|
2611
|
+
weight: number;
|
|
2612
|
+
}
|
|
2613
|
+
/** A custom per-row signal returning a value in [0, 1]. */
|
|
2614
|
+
interface CustomSignal {
|
|
2615
|
+
fn: (row: Row) => number;
|
|
2616
|
+
weight: number;
|
|
2617
|
+
}
|
|
2618
|
+
interface RankingOptions {
|
|
2619
|
+
recency?: RecencySignal;
|
|
2620
|
+
reward?: RewardSignal;
|
|
2621
|
+
backlink?: BacklinkSignal;
|
|
2622
|
+
custom?: CustomSignal;
|
|
2623
|
+
/**
|
|
2624
|
+
* Reference time (epoch ms) for recency decay. Defaults to `Date.now()`.
|
|
2625
|
+
* Pass it for deterministic ranking/tests.
|
|
2626
|
+
*/
|
|
2627
|
+
now?: number;
|
|
2628
|
+
}
|
|
2629
|
+
/** Recency boost in [0, 1] for a row's timestamp column. */
|
|
2630
|
+
declare function recencyBoost(row: Row, signal: RecencySignal, nowMs: number): number;
|
|
2631
|
+
/** Saturating reward boost in [0, 1): r / (1 + r) for non-negative reward. */
|
|
2632
|
+
declare function rewardBoost(row: Row, signal: RewardSignal): number;
|
|
2633
|
+
/** Saturating backlink boost in [0, 1): b / (1 + b) for a non-negative count. */
|
|
2634
|
+
declare function backlinkBoost(row: Row, signal: BacklinkSignal): number;
|
|
2635
|
+
/**
|
|
2636
|
+
* Combined, weighted ranking boost for a row (≥ 0). Multiply a base relevance
|
|
2637
|
+
* score by `(1 + rankingBoost(...))` to apply it.
|
|
2638
|
+
*/
|
|
2639
|
+
declare function rankingBoost(row: Row, opts: RankingOptions): number;
|
|
2640
|
+
|
|
2641
|
+
/**
|
|
2642
|
+
* Hybrid search — fuse semantic (vector) retrieval with lexical (full-text)
|
|
2643
|
+
* retrieval so the result set has both the recall of embeddings and the
|
|
2644
|
+
* precision of exact-term matching. Neither arm alone is enough: vectors miss
|
|
2645
|
+
* rare exact tokens (names, ids, codes), keywords miss paraphrases.
|
|
2646
|
+
*
|
|
2647
|
+
* Fusion is Reciprocal Rank Fusion (RRF): a document's score is the sum over the
|
|
2648
|
+
* arms it appears in of `1 / (k + rank)`, with `k = 60` by default. RRF needs
|
|
2649
|
+
* only the per-arm *ranks*, so the two arms' incomparable score scales (cosine
|
|
2650
|
+
* similarity vs ts_rank/bm25) never have to be normalized against each other.
|
|
2651
|
+
*
|
|
2652
|
+
* Optional post-fusion stages: deterministic ranking signals (recency / reward /
|
|
2653
|
+
* custom) and a bring-your-own reranker. Results carry a full score breakdown
|
|
2654
|
+
* for `--explain`.
|
|
2655
|
+
*/
|
|
2656
|
+
|
|
2657
|
+
interface HybridSearchOptions {
|
|
2658
|
+
/** Final number of results. Default 10. */
|
|
2659
|
+
topK?: number;
|
|
2660
|
+
/** RRF constant — larger flattens the rank contribution. Default 60. */
|
|
2661
|
+
rrfK?: number;
|
|
2662
|
+
/** Candidates pulled from each arm before fusion. Default max(topK*4, 20). */
|
|
2663
|
+
poolSize?: number;
|
|
2664
|
+
/** Minimum cosine similarity for the vector arm. Default 0. */
|
|
2665
|
+
minVectorScore?: number;
|
|
2666
|
+
/** Embeddings config — enables the vector arm. Omit for FTS-only fusion. */
|
|
2667
|
+
embeddingsConfig?: EmbeddingsConfig;
|
|
2668
|
+
/** Primary-key column of the base table. Default 'id'. */
|
|
2669
|
+
pkColumn?: string;
|
|
2670
|
+
/** Deterministic post-fusion ranking signals. */
|
|
2671
|
+
ranking?: RankingOptions;
|
|
2672
|
+
/** Optional reranker over the fused top candidates (graceful fallback). */
|
|
2673
|
+
reranker?: RerankerFn;
|
|
2674
|
+
}
|
|
2675
|
+
/** Per-result score breakdown (the `--explain` payload). */
|
|
2676
|
+
interface HybridScoreBreakdown {
|
|
2677
|
+
/** Final score used for ordering. */
|
|
2678
|
+
final: number;
|
|
2679
|
+
/** Reciprocal-rank-fusion score before ranking/rerank. */
|
|
2680
|
+
rrf: number;
|
|
2681
|
+
/** 1-based rank in the vector arm, or null if absent. */
|
|
2682
|
+
vectorRank: number | null;
|
|
2683
|
+
/** Cosine similarity from the vector arm, or null. */
|
|
2684
|
+
vectorScore: number | null;
|
|
2685
|
+
/** 1-based rank in the FTS arm, or null if absent. */
|
|
2686
|
+
ftsRank: number | null;
|
|
2687
|
+
/** FTS relevance score (ts_rank / -bm25), or null. */
|
|
2688
|
+
ftsScore: number | null;
|
|
2689
|
+
/** Multiplicative ranking boost applied (0 when no ranking signals). */
|
|
2690
|
+
rankingBoost: number;
|
|
2691
|
+
/** Reranker score, when a reranker actually scored this row. */
|
|
2692
|
+
rerankerScore?: number;
|
|
2693
|
+
}
|
|
2694
|
+
interface HybridSearchResult {
|
|
2695
|
+
row: Row;
|
|
2696
|
+
score: number;
|
|
2697
|
+
explain: HybridScoreBreakdown;
|
|
2698
|
+
/** Best-matching chunk text from the vector arm, when available. */
|
|
2699
|
+
matchedContent?: string;
|
|
2700
|
+
}
|
|
2701
|
+
/**
|
|
2702
|
+
* Run a hybrid (vector + full-text) search over one table and return fused,
|
|
2703
|
+
* optionally ranked + reranked results with a per-result score breakdown.
|
|
2704
|
+
* Soft-deleted rows are excluded (both arms honor `deleted_at`).
|
|
2705
|
+
*/
|
|
2706
|
+
declare function hybridSearch(adapter: StorageAdapter, table: string, query: string, opts?: HybridSearchOptions): Promise<HybridSearchResult[]>;
|
|
2707
|
+
|
|
2708
|
+
/**
|
|
2709
|
+
* Graph-augmented retrieval.
|
|
2710
|
+
*
|
|
2711
|
+
* Pure similarity retrieval treats every row as an island. Real knowledge is
|
|
2712
|
+
* relational: a document cites another, a task blocks a task, a person belongs to
|
|
2713
|
+
* a team. A typed-edge graph over the rows lets retrieval be *relationship-aware*
|
|
2714
|
+
* — traverse from an anchor entity to its neighborhood, and boost results that
|
|
2715
|
+
* are graph-connected to the things you already care about.
|
|
2716
|
+
*
|
|
2717
|
+
* Edges live in one internal `__lattice_edges` table (GUI-hidden by prefix).
|
|
2718
|
+
* They can be added explicitly, or extracted with **zero LLM** from existing
|
|
2719
|
+
* foreign-key columns. Traversal is a **bounded BFS** with hard caps on depth and
|
|
2720
|
+
* visited-node count, so a dense or cyclic graph can never blow up memory.
|
|
2721
|
+
*/
|
|
2722
|
+
|
|
2723
|
+
/** Absolute ceiling on traversal depth — a hard guard against runaway BFS. */
|
|
2724
|
+
declare const MAX_TRAVERSAL_DEPTH = 5;
|
|
2725
|
+
/** Default ceiling on visited nodes per traversal. */
|
|
2726
|
+
declare const DEFAULT_MAX_NODES = 10000;
|
|
2727
|
+
interface GraphNode {
|
|
2728
|
+
table: string;
|
|
2729
|
+
id: string;
|
|
2730
|
+
}
|
|
2731
|
+
interface GraphEdge {
|
|
2732
|
+
srcTable: string;
|
|
2733
|
+
srcId: string;
|
|
2734
|
+
dstTable: string;
|
|
2735
|
+
dstId: string;
|
|
2736
|
+
/** Edge type/label (e.g. 'cites', 'blocks', 'member_of'). */
|
|
2737
|
+
type: string;
|
|
2738
|
+
/** Edge weight (default 1). Higher = stronger relationship. */
|
|
2739
|
+
weight?: number;
|
|
2740
|
+
}
|
|
2741
|
+
type TraversalDirection = 'out' | 'in' | 'both';
|
|
2742
|
+
interface TraversalOptions {
|
|
2743
|
+
/** Max BFS depth (clamped to MAX_TRAVERSAL_DEPTH). Default 2. */
|
|
2744
|
+
maxDepth?: number;
|
|
2745
|
+
/** Follow out-edges, in-edges, or both. Default 'out'. */
|
|
2746
|
+
direction?: TraversalDirection;
|
|
2747
|
+
/** Restrict to these edge types. */
|
|
2748
|
+
edgeTypes?: string[];
|
|
2749
|
+
/** Stop after visiting this many nodes (cycle/blowup guard). Default 10000. */
|
|
2750
|
+
maxNodes?: number;
|
|
2751
|
+
}
|
|
2752
|
+
interface TraversalNode {
|
|
2753
|
+
node: GraphNode;
|
|
2754
|
+
/** BFS depth from the start node (start = 0). */
|
|
2755
|
+
depth: number;
|
|
2756
|
+
}
|
|
2757
|
+
interface GraphTraversalResult {
|
|
2758
|
+
start: GraphNode;
|
|
2759
|
+
nodes: TraversalNode[];
|
|
2760
|
+
edges: GraphEdge[];
|
|
2761
|
+
/** True if a cap (depth or node count) stopped the traversal early. */
|
|
2762
|
+
truncated: boolean;
|
|
2763
|
+
}
|
|
2764
|
+
/** Ensure the internal edges table exists (idempotent, GUI-hidden by prefix). */
|
|
2765
|
+
declare function ensureEdgesTable(adapter: StorageAdapter): Promise<void>;
|
|
2766
|
+
/** Add (upsert) one edge. */
|
|
2767
|
+
declare function addEdge(adapter: StorageAdapter, edge: GraphEdge): Promise<void>;
|
|
2768
|
+
/** Add many edges (each upserted). */
|
|
2769
|
+
declare function addEdges(adapter: StorageAdapter, edges: GraphEdge[]): Promise<void>;
|
|
2770
|
+
/** Remove one edge (all matching types when `type` omitted). */
|
|
2771
|
+
declare function removeEdge(adapter: StorageAdapter, edge: Omit<GraphEdge, 'weight' | 'type'> & {
|
|
2772
|
+
type?: string;
|
|
2773
|
+
}): Promise<void>;
|
|
2774
|
+
/** Direct neighbors of a node (one hop), in the given direction + type filter. */
|
|
2775
|
+
declare function neighbors(adapter: StorageAdapter, node: GraphNode, opts?: {
|
|
2776
|
+
direction?: TraversalDirection;
|
|
2777
|
+
edgeTypes?: string[];
|
|
2778
|
+
}): Promise<GraphEdge[]>;
|
|
2779
|
+
/**
|
|
2780
|
+
* Bounded breadth-first traversal from `start`. Hard caps: depth is clamped to
|
|
2781
|
+
* {@link MAX_TRAVERSAL_DEPTH}, and the visited set is capped at `maxNodes`. A
|
|
2782
|
+
* visited set prevents revisiting nodes in cyclic graphs.
|
|
2783
|
+
*/
|
|
2784
|
+
declare function traverse(adapter: StorageAdapter, start: GraphNode, opts?: TraversalOptions): Promise<GraphTraversalResult>;
|
|
2785
|
+
interface ExtractEdgesSpec {
|
|
2786
|
+
/** Source table holding the foreign key. */
|
|
2787
|
+
srcTable: string;
|
|
2788
|
+
/** FK column on the source table. */
|
|
2789
|
+
fkColumn: string;
|
|
2790
|
+
/** Table the FK points at. */
|
|
2791
|
+
dstTable: string;
|
|
2792
|
+
/** Edge type to label the extracted edges. Default `<fkColumn>`. */
|
|
2793
|
+
type?: string;
|
|
2794
|
+
/** Source-table primary key. Default 'id'. */
|
|
2795
|
+
pkColumn?: string;
|
|
2796
|
+
}
|
|
2797
|
+
/**
|
|
2798
|
+
* Zero-LLM edge extraction: derive `srcTable[pk] --type--> dstTable[fk]` edges
|
|
2799
|
+
* from a foreign-key column. Deterministic; no model. Returns the edge count.
|
|
2800
|
+
*/
|
|
2801
|
+
declare function extractEdgesFromColumn(adapter: StorageAdapter, spec: ExtractEdgesSpec): Promise<number>;
|
|
2802
|
+
/**
|
|
2803
|
+
* Adjacency boost — re-score retrieval results by their graph connectivity to a
|
|
2804
|
+
* set of anchor nodes (e.g. the entities in the user's current context). A
|
|
2805
|
+
* result adjacent (within `maxDepth`) to an anchor is boosted by the edge weight
|
|
2806
|
+
* decayed by hop distance, scaled by `weight`. This makes retrieval
|
|
2807
|
+
* relationship-aware: things related to what you already care about rank higher.
|
|
2808
|
+
*
|
|
2809
|
+
* Returns a new array sorted by the boosted score; pure (no DB writes).
|
|
2810
|
+
*/
|
|
2811
|
+
interface GraphBoostOptions {
|
|
2812
|
+
/** Anchor nodes whose neighborhood is preferred. */
|
|
2813
|
+
anchors: GraphNode[];
|
|
2814
|
+
/** Table the results belong to (results are `{ id, score }`). */
|
|
2815
|
+
resultTable: string;
|
|
2816
|
+
/** Boost weight applied to the adjacency signal. Default 0.5. */
|
|
2817
|
+
weight?: number;
|
|
2818
|
+
/** Hop radius from anchors to consider. Clamped to MAX_TRAVERSAL_DEPTH. Default 1. */
|
|
2819
|
+
maxDepth?: number;
|
|
2820
|
+
/** Edge direction from the anchor's perspective. Default 'both'. */
|
|
2821
|
+
direction?: TraversalDirection;
|
|
2822
|
+
edgeTypes?: string[];
|
|
2823
|
+
}
|
|
2824
|
+
interface GraphBoostResult<T> {
|
|
2825
|
+
item: T;
|
|
2826
|
+
baseScore: number;
|
|
2827
|
+
boostedScore: number;
|
|
2828
|
+
/** Min hop distance to an anchor (Infinity if unreachable). */
|
|
2829
|
+
hops: number;
|
|
2830
|
+
}
|
|
2831
|
+
declare function graphAdjacencyBoost<T extends {
|
|
2832
|
+
id: string;
|
|
2833
|
+
score: number;
|
|
2834
|
+
}>(adapter: StorageAdapter, results: T[], opts: GraphBoostOptions): Promise<GraphBoostResult<T>[]>;
|
|
2835
|
+
|
|
2836
|
+
/**
|
|
2837
|
+
* Seamless cloud file-byte access — an in-database presigned-URL broker.
|
|
2838
|
+
*
|
|
2839
|
+
* After joining a cloud via invite, a scoped member connects directly to the
|
|
2840
|
+
* cloud's Postgres as their own least-privilege role. They can SELECT a `files`
|
|
2841
|
+
* row they're allowed to see, but they hold no S3 credential, so fetching the
|
|
2842
|
+
* bytes would otherwise fail. This installs a `SECURITY DEFINER` function that,
|
|
2843
|
+
* **inside Postgres** (the only place the owner's key lives, away from members),
|
|
2844
|
+
* gates on the member's row-visibility and computes a short-lived AWS SigV4
|
|
2845
|
+
* presigned URL for exactly that object — so the member fetches/uploads bytes
|
|
2846
|
+
* with zero config and never holds a key.
|
|
2847
|
+
*
|
|
2848
|
+
* Why sign in plpgsql: the secret must never leave the database for a member's
|
|
2849
|
+
* process, so a Node-side presign is out — the signature is computed in-DB via
|
|
2850
|
+
* `pgcrypto` HMAC-SHA256. Correctness of the SigV4 chain is verified against
|
|
2851
|
+
* AWS's published test vectors (no real S3 needed).
|
|
2852
|
+
*
|
|
2853
|
+
* Postgres + a cloud only. SQLite is single-user with local bytes — no-op.
|
|
2854
|
+
*/
|
|
2855
|
+
|
|
2856
|
+
/** Owner-only table holding the least-privilege S3 key. Never member-granted. */
|
|
2857
|
+
declare const S3_SECRET_TABLE = "__lattice_cloud_s3_secret";
|
|
2858
|
+
/**
|
|
2859
|
+
* SQL that installs the presigner: `pgcrypto`, the owner-only secret table, the
|
|
2860
|
+
* SigV4 signer, and the visibility-gated `lattice_presign_file` wrapper. All
|
|
2861
|
+
* `SECURITY DEFINER` bodies are search_path-pinned by the caller.
|
|
2862
|
+
*
|
|
2863
|
+
* `lattice_aws_sigv4_presign(...)` is parameterized on the date/time so it can be
|
|
2864
|
+
* verified deterministically against AWS test vectors; `lattice_presign_file`
|
|
2865
|
+
* derives the date from `now()` at call time.
|
|
2866
|
+
*/
|
|
2867
|
+
declare function filePresignSql(): string;
|
|
2868
|
+
/**
|
|
2869
|
+
* Install the presigner into the current Postgres schema. No-op on SQLite.
|
|
2870
|
+
* `schema` is used to pin the `SECURITY DEFINER` search_path (reuse
|
|
2871
|
+
* {@link cloudSchema} to resolve it) — required to prevent a member from
|
|
2872
|
+
* shadowing `files` / the visibility helpers via `pg_temp`.
|
|
2873
|
+
*/
|
|
2874
|
+
declare function installFilePresigner(adapter: StorageAdapter, schema: string): Promise<void>;
|
|
2875
|
+
interface CloudS3Secret {
|
|
2876
|
+
bucket: string;
|
|
2877
|
+
region: string;
|
|
2878
|
+
accessKey: string;
|
|
2879
|
+
secretKey: string;
|
|
2880
|
+
/** Optional key prefix applied to every object. */
|
|
2881
|
+
prefix?: string;
|
|
2882
|
+
/** Optional host override for S3-compatible stores. */
|
|
2883
|
+
endpoint?: string;
|
|
2884
|
+
}
|
|
2885
|
+
/** Store/replace the owner's least-privilege S3 key (owner-only; never granted). */
|
|
2886
|
+
declare function setCloudS3Secret(adapter: StorageAdapter, secret: CloudS3Secret): Promise<void>;
|
|
2887
|
+
/**
|
|
2888
|
+
* Grant a member group EXECUTE on the presigner (so every current + future
|
|
2889
|
+
* member can presign their own visible files), WITHOUT granting any access to
|
|
2890
|
+
* the owner-only secret table. Idempotent.
|
|
2891
|
+
*/
|
|
2892
|
+
declare function grantPresignerToMemberGroup(adapter: StorageAdapter, memberGroup: string): Promise<void>;
|
|
2893
|
+
/** Whether the presigner function is installed in the current schema. */
|
|
2894
|
+
declare function hasFilePresigner(adapter: StorageAdapter): Promise<boolean>;
|
|
2895
|
+
|
|
2896
|
+
/**
|
|
2897
|
+
* Retrieval evaluation — measure the quality of any ranked-retrieval function
|
|
2898
|
+
* against a labeled query set, with the standard information-retrieval metrics:
|
|
2899
|
+
* Precision@k, Recall@k, Mean Reciprocal Rank (MRR), normalized Discounted
|
|
2900
|
+
* Cumulative Gain (nDCG@k), and Mean Average Precision (MAP).
|
|
2901
|
+
*
|
|
2902
|
+
* The evaluator is deliberately decoupled from any specific search
|
|
2903
|
+
* implementation: you hand it a {@link Retriever} — a function that maps a query
|
|
2904
|
+
* string to a best-first list of row ids — plus the ground-truth relevant ids
|
|
2905
|
+
* per query. That makes it usable to grade semantic search, full-text search, a
|
|
2906
|
+
* hybrid fusion, a graph-augmented retriever, or an external service, and to
|
|
2907
|
+
* regression-gate any of them in CI so an upgrade can't silently lower quality.
|
|
2908
|
+
*
|
|
2909
|
+
* All metric math is computed in-process from the ranked id lists; nothing here
|
|
2910
|
+
* touches the database, so it is dialect-agnostic and side-effect free.
|
|
2911
|
+
*/
|
|
2912
|
+
/**
|
|
2913
|
+
* A graded relevance label. `gain` is the graded usefulness of the row for the
|
|
2914
|
+
* query (used by nDCG); omit it for binary relevance (treated as gain 1).
|
|
2915
|
+
*/
|
|
2916
|
+
interface RelevanceLabel {
|
|
2917
|
+
id: string;
|
|
2918
|
+
/** Graded relevance gain (default 1). Higher = more useful. */
|
|
2919
|
+
gain?: number;
|
|
2920
|
+
}
|
|
2921
|
+
/**
|
|
2922
|
+
* A single labeled evaluation query: the query text plus the ground-truth set
|
|
2923
|
+
* of ids that *should* be retrieved. Order of `relevant` is irrelevant — it is
|
|
2924
|
+
* a set; ranking quality is judged against the order the retriever returns.
|
|
2925
|
+
*/
|
|
2926
|
+
interface EvalQuery {
|
|
2927
|
+
/** Stable identifier for per-query reporting. Defaults to the query text. */
|
|
2928
|
+
id?: string;
|
|
2929
|
+
/** Natural-language query text passed to the retriever. */
|
|
2930
|
+
query: string;
|
|
2931
|
+
/** Ground-truth relevant ids — bare ids (binary) or graded labels. */
|
|
2932
|
+
relevant: string[] | RelevanceLabel[];
|
|
2933
|
+
}
|
|
2934
|
+
/**
|
|
2935
|
+
* Maps a query to a ranked, best-first list of row ids. May be sync or async.
|
|
2936
|
+
* Returning more than `k` ids is fine — the evaluator applies the cutoff.
|
|
2937
|
+
*/
|
|
2938
|
+
type Retriever = (query: string) => Promise<string[]> | string[];
|
|
2939
|
+
interface RetrievalEvalOptions {
|
|
2940
|
+
/** Primary cutoff for P@k / Recall@k / nDCG@k. Default 10. */
|
|
2941
|
+
k?: number;
|
|
2942
|
+
/**
|
|
2943
|
+
* Additional cutoffs to also report (e.g. `[1, 3, 5, 10]`). Each appears in
|
|
2944
|
+
* {@link RetrievalEvalSummary.byK}. The primary `k` is always reported.
|
|
2945
|
+
*/
|
|
2946
|
+
ks?: number[];
|
|
2947
|
+
}
|
|
2948
|
+
/** Per-query metric breakdown. */
|
|
2949
|
+
interface PerQueryEval {
|
|
2950
|
+
id: string;
|
|
2951
|
+
query: string;
|
|
2952
|
+
/** Relevant ids found in the top-k, divided by k. */
|
|
2953
|
+
precisionAtK: number;
|
|
2954
|
+
/** Relevant ids found in the top-k, divided by total relevant. */
|
|
2955
|
+
recallAtK: number;
|
|
2956
|
+
/** 1 / (rank of the first relevant id), 0 if none were returned. */
|
|
2957
|
+
reciprocalRank: number;
|
|
2958
|
+
/** DCG@k / ideal-DCG@k, in [0, 1]. */
|
|
2959
|
+
ndcgAtK: number;
|
|
2960
|
+
/** Average precision over the full returned list (the AP that MAP averages). */
|
|
2961
|
+
averagePrecision: number;
|
|
2962
|
+
/** Number of ids the retriever returned. */
|
|
2963
|
+
retrieved: number;
|
|
2964
|
+
/** Number of ground-truth relevant ids. */
|
|
2965
|
+
relevantTotal: number;
|
|
2966
|
+
}
|
|
2967
|
+
/** Aggregate metrics across the whole query set, plus per-query detail. */
|
|
2968
|
+
interface RetrievalEvalSummary {
|
|
2969
|
+
/** The primary cutoff used for the top-level aggregate fields. */
|
|
2970
|
+
k: number;
|
|
2971
|
+
/** Number of queries evaluated. */
|
|
2972
|
+
queryCount: number;
|
|
2973
|
+
/** Mean Precision@k. */
|
|
2974
|
+
precisionAtK: number;
|
|
2975
|
+
/** Mean Recall@k. */
|
|
2976
|
+
recallAtK: number;
|
|
2977
|
+
/** Mean Reciprocal Rank. */
|
|
2978
|
+
mrr: number;
|
|
2979
|
+
/** Mean nDCG@k. */
|
|
2980
|
+
ndcgAtK: number;
|
|
2981
|
+
/** Mean Average Precision. */
|
|
2982
|
+
map: number;
|
|
2983
|
+
/** Per-cutoff means, present when `ks` is supplied (always includes `k`). */
|
|
2984
|
+
byK?: Record<number, {
|
|
2985
|
+
precisionAtK: number;
|
|
2986
|
+
recallAtK: number;
|
|
2987
|
+
ndcgAtK: number;
|
|
2988
|
+
}>;
|
|
2989
|
+
/** Per-query metrics, in input order. */
|
|
2990
|
+
perQuery: PerQueryEval[];
|
|
2991
|
+
}
|
|
2992
|
+
/**
|
|
2993
|
+
* Evaluate a retriever against a labeled query set.
|
|
2994
|
+
*
|
|
2995
|
+
* @throws Error if `queries` is empty — an eval over no queries would silently
|
|
2996
|
+
* report a meaningless "0 across the board", so it fails loudly instead.
|
|
2997
|
+
*/
|
|
2998
|
+
declare function evaluateRetrieval(queries: EvalQuery[], retriever: Retriever, opts?: RetrievalEvalOptions): Promise<RetrievalEvalSummary>;
|
|
2999
|
+
/**
|
|
3000
|
+
* Compare a candidate summary against a baseline and report regressions beyond
|
|
3001
|
+
* a tolerance. Designed to drive a CI gate: a retrieval change that lowers any
|
|
3002
|
+
* headline metric by more than `tolerance` fails the build.
|
|
3003
|
+
*/
|
|
3004
|
+
interface EvalRegression {
|
|
3005
|
+
metric: 'precisionAtK' | 'recallAtK' | 'mrr' | 'ndcgAtK' | 'map';
|
|
3006
|
+
baseline: number;
|
|
3007
|
+
candidate: number;
|
|
3008
|
+
delta: number;
|
|
3009
|
+
}
|
|
3010
|
+
declare function detectRetrievalRegressions(baseline: RetrievalEvalSummary, candidate: RetrievalEvalSummary, tolerance?: number): EvalRegression[];
|
|
3011
|
+
|
|
3012
|
+
/**
|
|
3013
|
+
* Retrieval health diagnostics — a read-only `doctor` for a Lattice database's
|
|
3014
|
+
* search surface. It reports, without mutating anything:
|
|
3015
|
+
*
|
|
3016
|
+
* - which retrieval extensions are available/installed (FTS5, pgvector,
|
|
3017
|
+
* sqlite-vec, pg_trgm),
|
|
3018
|
+
* - per-table full-text and embedding coverage (how many base rows are
|
|
3019
|
+
* actually indexed/embedded vs how many exist),
|
|
3020
|
+
* - staleness and gaps surfaced as severity-ranked issues.
|
|
3021
|
+
*
|
|
3022
|
+
* The point is proactive detection: index or embedding drift is otherwise
|
|
3023
|
+
* invisible until a user notices bad results. `diagnoseRetrieval` surfaces it
|
|
3024
|
+
* first, and `lattice doctor` puts it one command away. Everything here is a
|
|
3025
|
+
* `SELECT`/introspection — no DDL, no writes, both dialects.
|
|
3026
|
+
*/
|
|
3027
|
+
|
|
3028
|
+
type HealthSeverity = 'info' | 'warning' | 'error';
|
|
3029
|
+
type HealthIssueKind = 'fts_missing' | 'fts_stale' | 'fts_empty' | 'embedding_missing' | 'embedding_stale' | 'extension_missing' | 'dimension_mismatch' | 'index_stale';
|
|
3030
|
+
interface RetrievalHealthIssue {
|
|
3031
|
+
/** Table the issue concerns, or undefined for a global/extension issue. */
|
|
3032
|
+
table?: string;
|
|
3033
|
+
kind: HealthIssueKind;
|
|
3034
|
+
severity: HealthSeverity;
|
|
3035
|
+
message: string;
|
|
3036
|
+
/** Optional remediation hint. */
|
|
3037
|
+
hint?: string;
|
|
3038
|
+
}
|
|
3039
|
+
/** Availability of the retrieval extensions on this connection. */
|
|
3040
|
+
interface ExtensionAvailability {
|
|
3041
|
+
/** SQLite compiled with FTS5. */
|
|
3042
|
+
fts5?: boolean;
|
|
3043
|
+
/** SQLite sqlite-vec extension loaded (vec_version() resolves). */
|
|
3044
|
+
sqliteVec?: boolean;
|
|
3045
|
+
/** Postgres pgvector installed (CREATE EXTENSION vector done). */
|
|
3046
|
+
pgvectorInstalled?: boolean;
|
|
3047
|
+
/** Postgres pgvector available to install. */
|
|
3048
|
+
pgvectorAvailable?: boolean;
|
|
3049
|
+
/** Postgres pg_trgm installed. */
|
|
3050
|
+
pgTrgmInstalled?: boolean;
|
|
3051
|
+
}
|
|
3052
|
+
interface TableHealth {
|
|
3053
|
+
table: string;
|
|
3054
|
+
/** Non-deleted base rows. */
|
|
3055
|
+
rowCount: number;
|
|
3056
|
+
/** Rows present in the FTS index, when one exists. */
|
|
3057
|
+
ftsIndexed?: number;
|
|
3058
|
+
/** ftsIndexed / rowCount, in [0, 1]. */
|
|
3059
|
+
ftsCoverage?: number;
|
|
3060
|
+
/** Embeddings stored for this table. */
|
|
3061
|
+
embeddingCount?: number;
|
|
3062
|
+
/** embeddingCount / rowCount, in [0, 1]. */
|
|
3063
|
+
embeddingCoverage?: number;
|
|
3064
|
+
issues: RetrievalHealthIssue[];
|
|
3065
|
+
}
|
|
3066
|
+
interface RetrievalHealthReport {
|
|
3067
|
+
dialect: 'sqlite' | 'postgres';
|
|
3068
|
+
extensions: ExtensionAvailability;
|
|
3069
|
+
tables: TableHealth[];
|
|
3070
|
+
/** Global (non-table) issues, e.g. a missing extension. */
|
|
3071
|
+
issues: RetrievalHealthIssue[];
|
|
3072
|
+
/** True when no `error`-severity issue exists anywhere. */
|
|
3073
|
+
healthy: boolean;
|
|
3074
|
+
}
|
|
3075
|
+
/** What a table is expected to support, so gaps can be flagged as errors. */
|
|
3076
|
+
interface RetrievalHealthSpec {
|
|
3077
|
+
table: string;
|
|
3078
|
+
/** Table opted into full-text search. */
|
|
3079
|
+
expectFts?: boolean;
|
|
3080
|
+
/** Table opted into embeddings. */
|
|
3081
|
+
expectEmbeddings?: boolean;
|
|
3082
|
+
/** Known embedding dimension (reserved for dimension checks). */
|
|
3083
|
+
embeddingDim?: number;
|
|
3084
|
+
}
|
|
3085
|
+
interface DiagnoseOptions {
|
|
3086
|
+
/**
|
|
3087
|
+
* Tables to diagnose with their expected capabilities. When omitted, the
|
|
3088
|
+
* doctor reports extension availability and embedding coverage for every
|
|
3089
|
+
* table that already has stored embeddings, but cannot flag "missing"
|
|
3090
|
+
* (it has no expectations to compare against).
|
|
3091
|
+
*/
|
|
3092
|
+
tables?: RetrievalHealthSpec[];
|
|
3093
|
+
/**
|
|
3094
|
+
* Coverage below which a partially-indexed/embedded table is flagged stale.
|
|
3095
|
+
* Default 1 — anything short of full coverage is a `warning`.
|
|
3096
|
+
*/
|
|
3097
|
+
staleThreshold?: number;
|
|
3098
|
+
}
|
|
3099
|
+
/**
|
|
3100
|
+
* Produce a retrieval health report for the given connection.
|
|
3101
|
+
*
|
|
3102
|
+
* Read-only and dialect-agnostic. Pass `tables` (with expectations) to get
|
|
3103
|
+
* missing-index / missing-embedding errors; omit it for a capability + coverage
|
|
3104
|
+
* snapshot only.
|
|
3105
|
+
*/
|
|
3106
|
+
declare function diagnoseRetrieval(adapter: StorageAdapter, opts?: DiagnoseOptions): Promise<RetrievalHealthReport>;
|
|
3107
|
+
/** Render a report as a human-readable multi-line string (for `lattice doctor`). */
|
|
3108
|
+
declare function formatHealthReport(report: RetrievalHealthReport): string;
|
|
3109
|
+
|
|
3110
|
+
/**
|
|
3111
|
+
* Reproducible retrieval benchmark harness.
|
|
3112
|
+
*
|
|
3113
|
+
* Produces the speed-to-answer numbers a buyer would benchmark you on —
|
|
3114
|
+
* p50/p95/p99 latency for filtered query, full-text search, vector search, and
|
|
3115
|
+
* SQL aggregation — plus ingest throughput, all on synthetic data at a
|
|
3116
|
+
* configurable scale, on either dialect. It exercises the *real* code paths
|
|
3117
|
+
* (the same `fullTextSearch` / `searchByEmbedding` / SQL the library serves in
|
|
3118
|
+
* production), so the numbers are honest, and it ships in the package so the
|
|
3119
|
+
* same harness that gates regressions in CI can be re-run by anyone.
|
|
3120
|
+
*
|
|
3121
|
+
* Default scale is small so a CI SLO gate runs in seconds; override via the
|
|
3122
|
+
* `scale` option (or the `LATTICE_BENCH_ROWS` / `LATTICE_BENCH_QUERIES` /
|
|
3123
|
+
* `LATTICE_BENCH_DIM` env vars) to reproduce the large-n published numbers.
|
|
3124
|
+
*
|
|
3125
|
+
* The default embedder is a dependency-free deterministic token-hash vector —
|
|
3126
|
+
* it measures latency/throughput honestly without pulling in a model. Pass your
|
|
3127
|
+
* own `embed` to benchmark with real vectors.
|
|
3128
|
+
*/
|
|
3129
|
+
|
|
3130
|
+
interface LatencyStats {
|
|
3131
|
+
count: number;
|
|
3132
|
+
min: number;
|
|
3133
|
+
max: number;
|
|
3134
|
+
mean: number;
|
|
3135
|
+
p50: number;
|
|
3136
|
+
p95: number;
|
|
3137
|
+
p99: number;
|
|
3138
|
+
}
|
|
3139
|
+
interface BenchmarkScale {
|
|
3140
|
+
/** Rows inserted into the synthetic table. */
|
|
3141
|
+
rows: number;
|
|
3142
|
+
/** Distinct queries timed for each measured operation. */
|
|
3143
|
+
queries: number;
|
|
3144
|
+
/** Embedding dimensionality. */
|
|
3145
|
+
dim: number;
|
|
3146
|
+
}
|
|
3147
|
+
interface BenchmarkReport {
|
|
3148
|
+
dialect: 'sqlite' | 'postgres';
|
|
3149
|
+
scale: BenchmarkScale;
|
|
3150
|
+
ingest: {
|
|
3151
|
+
rows: number;
|
|
3152
|
+
ms: number;
|
|
3153
|
+
rowsPerSec: number;
|
|
3154
|
+
};
|
|
3155
|
+
query: LatencyStats;
|
|
3156
|
+
fts: LatencyStats;
|
|
3157
|
+
vector: LatencyStats;
|
|
3158
|
+
aggregate: LatencyStats;
|
|
3159
|
+
/** Peak resident-set bytes observed during the vector-search phase. */
|
|
3160
|
+
peakRssBytes: number;
|
|
3161
|
+
/**
|
|
3162
|
+
* Whether the `vector` numbers reflect the NATIVE index (pgvector/sqlite-vec)
|
|
3163
|
+
* or the in-process O(n) scan fallback. `false` means no extension was present,
|
|
3164
|
+
* so `vector.p95` is the scan baseline — not the indexed number. Surfaced so a
|
|
3165
|
+
* published benchmark can never present the scan as the index.
|
|
3166
|
+
*/
|
|
3167
|
+
vectorIndexed: boolean;
|
|
3168
|
+
}
|
|
3169
|
+
interface BenchmarkOptions {
|
|
3170
|
+
scale?: Partial<BenchmarkScale>;
|
|
3171
|
+
/** Embedder; defaults to a deterministic dependency-free token-hash vector. */
|
|
3172
|
+
embed?: (text: string) => Promise<number[]>;
|
|
3173
|
+
/** Table name for the synthetic data (dropped + recreated). Default `_lattice_bench`. */
|
|
3174
|
+
table?: string;
|
|
3175
|
+
}
|
|
3176
|
+
declare function percentile(sortedAsc: number[], p: number): number;
|
|
3177
|
+
declare function latencyStats(samples: number[]): LatencyStats;
|
|
3178
|
+
/**
|
|
3179
|
+
* Run the benchmark against an adapter. Creates a synthetic table, populates it,
|
|
3180
|
+
* builds an FTS index + embeddings, times each operation, and drops the table.
|
|
3181
|
+
*/
|
|
3182
|
+
declare function benchmarkRetrieval(adapter: StorageAdapter, opts?: BenchmarkOptions): Promise<BenchmarkReport>;
|
|
3183
|
+
/** A service-level objective for a single latency metric, in milliseconds. */
|
|
3184
|
+
interface RetrievalSlo {
|
|
3185
|
+
metric: 'query.p95' | 'fts.p95' | 'vector.p95' | 'aggregate.p95';
|
|
3186
|
+
maxMs: number;
|
|
3187
|
+
}
|
|
3188
|
+
interface SloViolation extends RetrievalSlo {
|
|
3189
|
+
observedMs: number;
|
|
3190
|
+
}
|
|
3191
|
+
/** Check a report against SLOs; returns the violations (empty = all pass). */
|
|
3192
|
+
declare function checkSlos(report: BenchmarkReport, slos: RetrievalSlo[]): SloViolation[];
|
|
3193
|
+
|
|
2044
3194
|
/**
|
|
2045
3195
|
* Thrown by Lattice.seed when onUnresolvedLink: 'throw' is set and one or more
|
|
2046
3196
|
* junction links could not be created because their target rows did not
|
|
@@ -2126,6 +3276,18 @@ declare class Lattice {
|
|
|
2126
3276
|
private readonly _writeHooks;
|
|
2127
3277
|
/** Optional cap on per-row payload bytes; see LatticeOptions.maxRowBytes. */
|
|
2128
3278
|
private _maxRowBytes;
|
|
3279
|
+
/** Optional default bounded-read cap; see LatticeOptions.defaultMaxRows. */
|
|
3280
|
+
private _defaultMaxRows;
|
|
3281
|
+
/** table → immutable provenance column names (governance: P-PROV). */
|
|
3282
|
+
private readonly _provenanceCols;
|
|
3283
|
+
/** table → default trust state for new rows (governance: P-TRUST). */
|
|
3284
|
+
private readonly _trustDefault;
|
|
3285
|
+
/** table → computed-column specs + recompute order + dep set (P-VIEW). */
|
|
3286
|
+
private readonly _computed;
|
|
3287
|
+
/** table → materialized rollup specs (P-VIEW). */
|
|
3288
|
+
private readonly _rollups;
|
|
3289
|
+
/** source table → parents whose rollup it feeds (for incremental recompute). */
|
|
3290
|
+
private readonly _rollupSources;
|
|
2129
3291
|
/**
|
|
2130
3292
|
* Reject the row if its payload exceeds `_maxRowBytes`. Cost is dominated
|
|
2131
3293
|
* by Buffer.byteLength() on string columns; we skip numbers/booleans
|
|
@@ -2350,6 +3512,12 @@ declare class Lattice {
|
|
|
2350
3512
|
* inside a GUC-scoped transaction.
|
|
2351
3513
|
*/
|
|
2352
3514
|
private _prepareInsert;
|
|
3515
|
+
/**
|
|
3516
|
+
* Stamp governance defaults at insert time: auto-set `ingested_at` for a
|
|
3517
|
+
* provenance table (when not supplied) and the default `_trust_state` for a
|
|
3518
|
+
* trust table. Returns a shallow copy; a no-op for tables without governance.
|
|
3519
|
+
*/
|
|
3520
|
+
private _applyGovernanceDefaults;
|
|
2353
3521
|
/** Post-insert side effects (changelog, audit, write hooks, embedding sync),
|
|
2354
3522
|
* identical for the plain and force-visibility insert paths. */
|
|
2355
3523
|
private _afterInsert;
|
|
@@ -2363,6 +3531,31 @@ declare class Lattice {
|
|
|
2363
3531
|
upsert(table: string, row: Row): Promise<string>;
|
|
2364
3532
|
upsertBy(table: string, col: string, val: unknown, row: Row): Promise<string>;
|
|
2365
3533
|
update(table: string, id: PkLookup, row: Partial<Row>, provenance?: ChangeProvenance): Promise<void>;
|
|
3534
|
+
/** Compute a table's computed columns from a (full) row, returning the merged row. */
|
|
3535
|
+
private _applyComputedColumns;
|
|
3536
|
+
/**
|
|
3537
|
+
* On update, if the changed columns include any computed-column dependency,
|
|
3538
|
+
* fetch + decrypt the current row, merge the changes, recompute the computed
|
|
3539
|
+
* columns, and fold them into the update payload. No-op otherwise.
|
|
3540
|
+
*/
|
|
3541
|
+
private _recomputeComputedOnUpdate;
|
|
3542
|
+
/** Recompute parent rollup(s) for the FK values carried on a source row. */
|
|
3543
|
+
private _propagateRollupsFromRow;
|
|
3544
|
+
/** Recompute the parent rollup(s) fed by a changed source row (fetched by id). */
|
|
3545
|
+
private _propagateRollups;
|
|
3546
|
+
/** Recompute a single rollup cell for one parent row. */
|
|
3547
|
+
private _recomputeRollupCell;
|
|
3548
|
+
/**
|
|
3549
|
+
* Recompute all computed columns for every row of a table (a full pass). Use
|
|
3550
|
+
* after a bulk import that bypassed the per-row recompute, or after changing a
|
|
3551
|
+
* computed definition. Requires `computed` config.
|
|
3552
|
+
*/
|
|
3553
|
+
refreshComputedColumns(table: string): Promise<number>;
|
|
3554
|
+
/**
|
|
3555
|
+
* Recompute all materialized rollups for every row of a table (a full,
|
|
3556
|
+
* authoritative pass). Requires `materializedRollups` config.
|
|
3557
|
+
*/
|
|
3558
|
+
refreshMaterializedRollups(table: string): Promise<number>;
|
|
2366
3559
|
/**
|
|
2367
3560
|
* Update a row and return the full updated row. Equivalent to `update()`
|
|
2368
3561
|
* followed by `get()`.
|
|
@@ -2485,8 +3678,147 @@ declare class Lattice {
|
|
|
2485
3678
|
* @returns Matching rows with similarity scores, sorted best-first.
|
|
2486
3679
|
*/
|
|
2487
3680
|
search(table: string, query: string, opts?: SearchOptions): Promise<SearchResult[]>;
|
|
3681
|
+
/**
|
|
3682
|
+
* Hybrid search — fuse semantic (vector) and full-text retrieval with
|
|
3683
|
+
* Reciprocal Rank Fusion, with optional deterministic ranking signals
|
|
3684
|
+
* (recency / reward / custom) and an optional reranker. Returns fused results
|
|
3685
|
+
* with a per-result score breakdown (`explain`). The vector arm is enabled
|
|
3686
|
+
* when the table has `embeddings` config; otherwise it is full-text only.
|
|
3687
|
+
*/
|
|
3688
|
+
hybridSearch(table: string, query: string, opts?: Omit<HybridSearchOptions, 'embeddingsConfig' | 'pkColumn'>): Promise<HybridSearchResult[]>;
|
|
3689
|
+
/**
|
|
3690
|
+
* Backfill / re-embed a table's vectors incrementally — embed only rows that
|
|
3691
|
+
* are missing, model-stale, or changed since a timestamp, sweeping embeddings
|
|
3692
|
+
* whose source row is gone. Honors `deleted_at`. Requires `embeddings` config.
|
|
3693
|
+
*/
|
|
3694
|
+
refreshEmbeddings(table: string, opts?: RefreshEmbeddingsOptions): Promise<EmbeddingRefreshResult>;
|
|
3695
|
+
/**
|
|
3696
|
+
* Build (or rebuild) a native vector index (pgvector / sqlite-vec) for `table`
|
|
3697
|
+
* from the stored embeddings, so semantic search uses an indexed
|
|
3698
|
+
* approximate-nearest-neighbor lookup instead of an in-process scan. Returns
|
|
3699
|
+
* the number of vectors indexed; a no-op (returns 0) when no native vector
|
|
3700
|
+
* extension is available, unless `requireExtension` is set. Requires
|
|
3701
|
+
* `embeddings` config (to determine the vector dimension).
|
|
3702
|
+
*/
|
|
3703
|
+
buildVectorIndex(table: string, requireExtension?: boolean): Promise<number>;
|
|
3704
|
+
/**
|
|
3705
|
+
* Evaluate a retriever against a labeled query set, returning the standard IR
|
|
3706
|
+
* metrics (P@k / Recall@k / MRR / nDCG@k / MAP). The retriever is any
|
|
3707
|
+
* `(query) => rankedRowIds` function, so this grades semantic search,
|
|
3708
|
+
* full-text search, a hybrid fusion, or an external service — and can gate
|
|
3709
|
+
* retrieval-quality regressions in CI.
|
|
3710
|
+
*/
|
|
3711
|
+
evaluateRetrieval(queries: EvalQuery[], retriever: Retriever, opts?: RetrievalEvalOptions): Promise<RetrievalEvalSummary>;
|
|
3712
|
+
/**
|
|
3713
|
+
* Diagnose the database's retrieval health: extension availability plus
|
|
3714
|
+
* per-table full-text and embedding coverage, with gaps/staleness surfaced as
|
|
3715
|
+
* severity-ranked issues. Read-only. When `tables` is omitted, the expectations
|
|
3716
|
+
* are derived from each registered table's `fts` / `embeddings` config.
|
|
3717
|
+
*/
|
|
3718
|
+
diagnoseRetrieval(opts?: {
|
|
3719
|
+
tables?: RetrievalHealthSpec[];
|
|
3720
|
+
}): Promise<RetrievalHealthReport>;
|
|
3721
|
+
/**
|
|
3722
|
+
* Run the reproducible retrieval benchmark against this connection and return
|
|
3723
|
+
* latency percentiles + ingest throughput. Default scale is small (CI-fast);
|
|
3724
|
+
* pass `scale` (or set LATTICE_BENCH_* env vars) to reproduce large-n numbers.
|
|
3725
|
+
*/
|
|
3726
|
+
benchmarkRetrieval(opts?: BenchmarkOptions): Promise<BenchmarkReport>;
|
|
2488
3727
|
query(table: string, opts?: QueryOptions): Promise<Row[]>;
|
|
3728
|
+
/**
|
|
3729
|
+
* Keyset (cursor) pagination — stable, index-friendly paging that stays fast
|
|
3730
|
+
* arbitrarily deep into a result set (unlike OFFSET, which scans-and-discards).
|
|
3731
|
+
* Returns a page plus an opaque `nextCursor` (null on the last page).
|
|
3732
|
+
*/
|
|
3733
|
+
queryPage(table: string, opts?: QueryPageOptions): Promise<QueryPageResult>;
|
|
3734
|
+
/**
|
|
3735
|
+
* Attach declared relations to each row in `rows` (mutates in place). Each
|
|
3736
|
+
* relation is fetched in ONE batched `IN (...)` query — no N+1. `belongsTo`
|
|
3737
|
+
* attaches a single row (or null); `hasMany` attaches an array.
|
|
3738
|
+
*/
|
|
3739
|
+
private _expandRelations;
|
|
2489
3740
|
count(table: string, opts?: CountOptions): Promise<number>;
|
|
3741
|
+
/**
|
|
3742
|
+
* SQL-side aggregation — `COUNT`/`SUM`/`AVG`/`MIN`/`MAX` with optional
|
|
3743
|
+
* `GROUP BY` and `HAVING`, computed in the database so only the grouped result
|
|
3744
|
+
* rows transfer (never the underlying rows). Returns one object per group with
|
|
3745
|
+
* the groupBy columns and each aggregate under its `as` key.
|
|
3746
|
+
*
|
|
3747
|
+
* @example
|
|
3748
|
+
* ```ts
|
|
3749
|
+
* await db.aggregate('orders', {
|
|
3750
|
+
* groupBy: ['status'],
|
|
3751
|
+
* aggregates: [{ fn: 'count', as: 'n' }, { fn: 'sum', col: 'total', as: 'revenue' }],
|
|
3752
|
+
* having: [{ aggregate: 'n', op: 'gt', val: 10 }],
|
|
3753
|
+
* orderBy: 'revenue', orderDir: 'desc',
|
|
3754
|
+
* });
|
|
3755
|
+
* ```
|
|
3756
|
+
*/
|
|
3757
|
+
aggregate(table: string, opts: AggregateOptions): Promise<AggregateResult[]>;
|
|
3758
|
+
private _assertTrust;
|
|
3759
|
+
/**
|
|
3760
|
+
* Mark a row `verified` — sets `_trust_state='verified'`, `_verified_by`, and
|
|
3761
|
+
* `_verified_at` (now). Requires `trust` config on the table.
|
|
3762
|
+
*/
|
|
3763
|
+
verifyRow(table: string, id: PkLookup, verifiedBy?: string): Promise<void>;
|
|
3764
|
+
/**
|
|
3765
|
+
* Flag a row for human review — sets `_trust_state='needs_review'` and an
|
|
3766
|
+
* optional `_review_reason`. Requires `trust` config on the table.
|
|
3767
|
+
*/
|
|
3768
|
+
markRowForReview(table: string, id: PkLookup, reason?: string): Promise<void>;
|
|
3769
|
+
/** Rows currently in `needs_review` state (non-deleted). Requires `trust` config. */
|
|
3770
|
+
rowsNeedingReview(table: string): Promise<Row[]>;
|
|
3771
|
+
/** Rows currently in `verified` state (non-deleted). Requires `trust` config. */
|
|
3772
|
+
verifiedRows(table: string): Promise<Row[]>;
|
|
3773
|
+
/** Add (upsert) a typed edge between two rows. */
|
|
3774
|
+
addEdge(edge: GraphEdge): Promise<void>;
|
|
3775
|
+
/** Add (upsert) many typed edges. */
|
|
3776
|
+
addEdges(edges: GraphEdge[]): Promise<void>;
|
|
3777
|
+
/** Remove an edge (all types between the pair when `type` omitted). */
|
|
3778
|
+
removeEdge(edge: Omit<GraphEdge, 'weight' | 'type'> & {
|
|
3779
|
+
type?: string;
|
|
3780
|
+
}): Promise<void>;
|
|
3781
|
+
/** Direct neighbors (one hop) of a node. */
|
|
3782
|
+
neighbors(node: GraphNode, opts?: {
|
|
3783
|
+
direction?: TraversalDirection;
|
|
3784
|
+
edgeTypes?: string[];
|
|
3785
|
+
}): Promise<GraphEdge[]>;
|
|
3786
|
+
/** Bounded BFS from a node (depth ≤ 5, node-count capped). */
|
|
3787
|
+
traverseGraph(start: GraphNode, opts?: TraversalOptions): Promise<GraphTraversalResult>;
|
|
3788
|
+
/** Zero-LLM edge extraction from a foreign-key column. Returns the edge count. */
|
|
3789
|
+
extractEdges(spec: ExtractEdgesSpec): Promise<number>;
|
|
3790
|
+
/**
|
|
3791
|
+
* Graph-augmented hybrid search: run {@link hybridSearch}, then boost results
|
|
3792
|
+
* that are graph-adjacent to the `anchors` (e.g. the user's current-context
|
|
3793
|
+
* entities), so relationship-relevant rows rank higher. Returns the reranked
|
|
3794
|
+
* hybrid results (the graph boost is folded into each score).
|
|
3795
|
+
*/
|
|
3796
|
+
graphSearch(table: string, query: string, opts: Omit<HybridSearchOptions, 'embeddingsConfig' | 'pkColumn'> & {
|
|
3797
|
+
anchors: GraphNode[];
|
|
3798
|
+
graphWeight?: number;
|
|
3799
|
+
graphDepth?: number;
|
|
3800
|
+
graphDirection?: TraversalDirection;
|
|
3801
|
+
graphEdgeTypes?: string[];
|
|
3802
|
+
}): Promise<HybridSearchResult[]>;
|
|
3803
|
+
/**
|
|
3804
|
+
* Enable keyless cloud file-byte access cloud-wide (Postgres cloud only).
|
|
3805
|
+
* Installs the in-database SigV4 presigner + `pgcrypto`, stores the owner's
|
|
3806
|
+
* least-privilege S3 key in an **owner-only, member-unreadable** table, and
|
|
3807
|
+
* grants the cloud's member group EXECUTE on `lattice_presign_file` — so every
|
|
3808
|
+
* current + future member can presign GET/PUT URLs for the `files` rows they're
|
|
3809
|
+
* allowed to see, holding no key themselves. One owner action turns it on for
|
|
3810
|
+
* the whole cloud.
|
|
3811
|
+
*/
|
|
3812
|
+
enableCloudFilePresigning(secret: CloudS3Secret, opts?: {
|
|
3813
|
+
memberGroup?: string;
|
|
3814
|
+
}): Promise<void>;
|
|
3815
|
+
/**
|
|
3816
|
+
* Presign a GET/PUT URL for a `files` row, computed inside Postgres and gated
|
|
3817
|
+
* on the caller's row-visibility (the keyless-member path). Requires the
|
|
3818
|
+
* presigner to be installed + an S3 secret configured
|
|
3819
|
+
* ({@link enableCloudFilePresigning}). TTL is capped at 60s server-side.
|
|
3820
|
+
*/
|
|
3821
|
+
presignFile(fileId: string, method: 'GET' | 'PUT', ttlSeconds?: number): Promise<string>;
|
|
2490
3822
|
render(outputDir: string, opts?: RenderOptions): Promise<RenderResult>;
|
|
2491
3823
|
/**
|
|
2492
3824
|
* Render into `outputDir` through the shared single-flight guard, intended to
|
|
@@ -2510,7 +3842,10 @@ declare class Lattice {
|
|
|
2510
3842
|
* rendered context tree is read THROUGH the member's RLS connection + masking
|
|
2511
3843
|
* views — making the on-disk tree the viewer's own scoped projection. Owner /
|
|
2512
3844
|
* local SQLite leave it unset → identity → unchanged behavior. Set on the
|
|
2513
|
-
*
|
|
3845
|
+
* SchemaManager (the read layer), not per-render-call, so the opts-less
|
|
3846
|
+
* auto-render path masks too — AND so the reverse-sync engine, which reads the
|
|
3847
|
+
* same SchemaManager, writes a member's file edit back through the masked view
|
|
3848
|
+
* instead of the REVOKE'd base table. One resolver, every reader.
|
|
2514
3849
|
*/
|
|
2515
3850
|
setRenderReadRelation(fn: (table: string) => string): void;
|
|
2516
3851
|
/**
|
|
@@ -3008,6 +4343,17 @@ declare function parseConfigFile(configPath: string): ParsedConfig;
|
|
|
3008
4343
|
*/
|
|
3009
4344
|
declare function parseConfigString(yamlContent: string, configDir: string): ParsedConfig;
|
|
3010
4345
|
|
|
4346
|
+
/**
|
|
4347
|
+
* Thrown by a bounded read (`QueryOptions.maxRows` or `defaultMaxRows`) when more
|
|
4348
|
+
* rows match than the cap allows and no explicit `limit` was given — forcing the
|
|
4349
|
+
* caller to paginate instead of silently loading an unbounded result set.
|
|
4350
|
+
*/
|
|
4351
|
+
declare class BoundedReadError extends Error {
|
|
4352
|
+
readonly table: string;
|
|
4353
|
+
readonly maxRows: number;
|
|
4354
|
+
constructor(table: string, maxRows: number);
|
|
4355
|
+
}
|
|
4356
|
+
|
|
3011
4357
|
declare function contentHash(content: string): string;
|
|
3012
4358
|
|
|
3013
4359
|
/**
|
|
@@ -3552,6 +4898,123 @@ declare class PostgresAdapter implements StorageAdapter {
|
|
|
3552
4898
|
private _registerPolyfills;
|
|
3553
4899
|
}
|
|
3554
4900
|
|
|
4901
|
+
/**
|
|
4902
|
+
* Durable retry for transient database failures.
|
|
4903
|
+
*
|
|
4904
|
+
* Under contention a database returns retryable errors that a robust app should
|
|
4905
|
+
* simply re-attempt: SQLite `SQLITE_BUSY`, Postgres serialization failures
|
|
4906
|
+
* (`40001`) and deadlocks (`40P01` / `40P02`), and dropped connections. `withRetry`
|
|
4907
|
+
* re-runs an operation through a bounded, decorrelated-jitter backoff so these
|
|
4908
|
+
* blips don't surface as user-facing failures.
|
|
4909
|
+
*
|
|
4910
|
+
* CRITICAL: only wrap **idempotent** work — a whole bulk operation or a Lattice
|
|
4911
|
+
* API method that is safe to re-run, NOT a raw non-idempotent `adapter.run()`
|
|
4912
|
+
* (re-running an `INSERT` after a mid-statement connection drop could double-apply
|
|
4913
|
+
* it). A nested `withRetry` is detected and does NOT add a second retry layer, so
|
|
4914
|
+
* composing retry-wrapped helpers can't multiply the attempt count.
|
|
4915
|
+
*/
|
|
4916
|
+
interface RetryOptions {
|
|
4917
|
+
/** Maximum attempts including the first. Default 5. */
|
|
4918
|
+
maxAttempts?: number;
|
|
4919
|
+
/** Base backoff in ms. Default 50. */
|
|
4920
|
+
baseDelayMs?: number;
|
|
4921
|
+
/** Backoff ceiling in ms. Default 2000. */
|
|
4922
|
+
maxDelayMs?: number;
|
|
4923
|
+
/** Override the retryable-error classifier. */
|
|
4924
|
+
isRetryable?: (err: unknown) => boolean;
|
|
4925
|
+
/** Called before each retry (for logging/metrics). */
|
|
4926
|
+
onRetry?: (err: unknown, attempt: number, delayMs: number) => void;
|
|
4927
|
+
/** Injectable RNG in [0,1) for deterministic tests. Default Math.random. */
|
|
4928
|
+
random?: () => number;
|
|
4929
|
+
/** Injectable sleep for deterministic tests. Default real setTimeout. */
|
|
4930
|
+
sleep?: (ms: number) => Promise<void>;
|
|
4931
|
+
}
|
|
4932
|
+
/** Default classifier: SQLITE_BUSY / locked, retryable PG SQLSTATEs, dropped sockets. */
|
|
4933
|
+
declare function isRetryableDbError(err: unknown): boolean;
|
|
4934
|
+
/**
|
|
4935
|
+
* Run `fn`, retrying transient DB failures with decorrelated-jitter backoff.
|
|
4936
|
+
* A nested call (already inside a `withRetry`) runs `fn` directly without a
|
|
4937
|
+
* second retry layer.
|
|
4938
|
+
*
|
|
4939
|
+
* @throws the last error if every attempt fails, or immediately for a
|
|
4940
|
+
* non-retryable error.
|
|
4941
|
+
*/
|
|
4942
|
+
declare function withRetry<T>(fn: () => Promise<T>, opts?: RetryOptions): Promise<T>;
|
|
4943
|
+
|
|
4944
|
+
/**
|
|
4945
|
+
* Online, resumable migrations.
|
|
4946
|
+
*
|
|
4947
|
+
* A naive backfill that updates a whole table in one statement takes a long
|
|
4948
|
+
* table lock — a maintenance window — and, if killed partway, has to restart
|
|
4949
|
+
* from zero. A chunked migration instead walks the table's primary key in
|
|
4950
|
+
* batches, each its own short transaction, and records progress in a checkpoint
|
|
4951
|
+
* table after every batch. So it never holds a long lock, and a kill (deploy,
|
|
4952
|
+
* crash, OOM) is recoverable: `resumeMigration` picks up after the last
|
|
4953
|
+
* checkpointed key instead of redoing completed work.
|
|
4954
|
+
*
|
|
4955
|
+
* The per-batch `apply` callback MUST be idempotent — a batch can re-run if the
|
|
4956
|
+
* process dies between applying it and committing its checkpoint.
|
|
4957
|
+
*/
|
|
4958
|
+
|
|
4959
|
+
type MigrationStatus = 'running' | 'complete' | 'reverted';
|
|
4960
|
+
interface MigrationCheckpoint {
|
|
4961
|
+
id: string;
|
|
4962
|
+
table: string;
|
|
4963
|
+
lastPk: string | null;
|
|
4964
|
+
processed: number;
|
|
4965
|
+
status: MigrationStatus;
|
|
4966
|
+
startedAt: string;
|
|
4967
|
+
updatedAt: string;
|
|
4968
|
+
}
|
|
4969
|
+
interface ChunkedMigrationOptions {
|
|
4970
|
+
/** Unique, stable id for this migration (the resume key). */
|
|
4971
|
+
id: string;
|
|
4972
|
+
/** Table to walk. */
|
|
4973
|
+
table: string;
|
|
4974
|
+
/** Process one batch of rows. MUST be idempotent. */
|
|
4975
|
+
apply: (rows: Row[], adapter: StorageAdapter) => Promise<void>;
|
|
4976
|
+
/** Rows per batch. Default 500. */
|
|
4977
|
+
batchSize?: number;
|
|
4978
|
+
/** Primary-key column to walk (must be sortable). Default 'id'. */
|
|
4979
|
+
pkColumn?: string;
|
|
4980
|
+
/** Optional WHERE predicate (without the leading WHERE) to scope rows. */
|
|
4981
|
+
where?: string;
|
|
4982
|
+
/** Bound params for `where`. */
|
|
4983
|
+
whereParams?: unknown[];
|
|
4984
|
+
}
|
|
4985
|
+
interface ChunkedMigrationResult {
|
|
4986
|
+
id: string;
|
|
4987
|
+
processed: number;
|
|
4988
|
+
batches: number;
|
|
4989
|
+
status: MigrationStatus;
|
|
4990
|
+
}
|
|
4991
|
+
/** Ensure the checkpoint table exists (idempotent). */
|
|
4992
|
+
declare function ensureCheckpointTable(adapter: StorageAdapter): Promise<void>;
|
|
4993
|
+
declare function getMigrationCheckpoint(adapter: StorageAdapter, id: string): Promise<MigrationCheckpoint | null>;
|
|
4994
|
+
declare function listMigrationCheckpoints(adapter: StorageAdapter): Promise<MigrationCheckpoint[]>;
|
|
4995
|
+
/**
|
|
4996
|
+
* Apply a chunked migration. Creates a checkpoint and walks the table in
|
|
4997
|
+
* batches, committing progress after each. If a checkpoint for `id` already
|
|
4998
|
+
* exists (a prior killed run), it resumes from there. Re-running a completed
|
|
4999
|
+
* migration is a no-op.
|
|
5000
|
+
*/
|
|
5001
|
+
declare function applyChunkedMigration(adapter: StorageAdapter, opts: ChunkedMigrationOptions): Promise<ChunkedMigrationResult>;
|
|
5002
|
+
/**
|
|
5003
|
+
* Resume a previously-started (and not completed) chunked migration. Throws if
|
|
5004
|
+
* no checkpoint exists for `id` — use {@link applyChunkedMigration} to start one.
|
|
5005
|
+
*/
|
|
5006
|
+
declare function resumeMigration(adapter: StorageAdapter, opts: ChunkedMigrationOptions): Promise<ChunkedMigrationResult>;
|
|
5007
|
+
/**
|
|
5008
|
+
* Revert a migration: walk the processed rows in batches applying `revert`, then
|
|
5009
|
+
* mark the checkpoint `reverted`. `revert` must be idempotent.
|
|
5010
|
+
*/
|
|
5011
|
+
declare function revertMigration(adapter: StorageAdapter, id: string, table: string, revert: (rows: Row[], adapter: StorageAdapter) => Promise<void>, opts?: {
|
|
5012
|
+
batchSize?: number;
|
|
5013
|
+
pkColumn?: string;
|
|
5014
|
+
where?: string;
|
|
5015
|
+
whereParams?: unknown[];
|
|
5016
|
+
}): Promise<ChunkedMigrationResult>;
|
|
5017
|
+
|
|
3555
5018
|
/**
|
|
3556
5019
|
* Framework-shipped tables that every Lattice can opt into via
|
|
3557
5020
|
* {@link registerNativeEntities}. These tables are intentionally generic —
|
|
@@ -4181,6 +5644,13 @@ declare function isPrivateIp(ip: string): boolean;
|
|
|
4181
5644
|
interface FtsHit {
|
|
4182
5645
|
id: string;
|
|
4183
5646
|
snippet: string;
|
|
5647
|
+
/**
|
|
5648
|
+
* Relevance score, higher = better. Populated by the indexed tier
|
|
5649
|
+
* (`ts_rank` on Postgres, `-bm25` on SQLite FTS5). Absent / 0 for the LIKE
|
|
5650
|
+
* fallback tier, which has no ranking model. Used to order hits and as the
|
|
5651
|
+
* FTS signal in hybrid fusion.
|
|
5652
|
+
*/
|
|
5653
|
+
score?: number;
|
|
4184
5654
|
}
|
|
4185
5655
|
interface FtsGroup {
|
|
4186
5656
|
table: string;
|
|
@@ -4225,6 +5695,66 @@ declare function hasFtsIndex(adapter: StorageAdapter, table: string): Promise<bo
|
|
|
4225
5695
|
*/
|
|
4226
5696
|
declare function fullTextSearch(adapter: StorageAdapter, tables: string[], opts: FtsOptions): Promise<FtsResult>;
|
|
4227
5697
|
|
|
5698
|
+
/**
|
|
5699
|
+
* Native indexed vector search.
|
|
5700
|
+
*
|
|
5701
|
+
* The in-process cosine scan in `embeddings.ts` loads and scores every stored
|
|
5702
|
+
* vector per query — O(n), fine at small n but a disqualifier at enterprise
|
|
5703
|
+
* scale. When the database provides an approximate-nearest-neighbor index this
|
|
5704
|
+
* module builds and queries it instead, turning vector search into an indexed
|
|
5705
|
+
* ~O(log n) lookup that stays flat as n grows.
|
|
5706
|
+
*
|
|
5707
|
+
* - **Postgres + pgvector:** a per-table `_lattice_vec_<table>` with a
|
|
5708
|
+
* `vector(dim)` column and an HNSW index on cosine distance (`<=>`).
|
|
5709
|
+
* - **SQLite + sqlite-vec:** a per-table `vec0` virtual table (used when the
|
|
5710
|
+
* extension has been loaded into the connection).
|
|
5711
|
+
*
|
|
5712
|
+
* The JSON store in `_lattice_embeddings` remains the portable source of record;
|
|
5713
|
+
* this index is a DERIVED accelerator built from it (mirroring how the FTS index
|
|
5714
|
+
* derives from the base table). When no extension is present, callers fall back
|
|
5715
|
+
* to the in-process scan, and `lattice doctor` reports the missing extension.
|
|
5716
|
+
*
|
|
5717
|
+
* The index is opt-in: `buildVectorIndex` populates it from the JSON store, and
|
|
5718
|
+
* search uses it only when `hasVectorIndex` is true — so the default behavior is
|
|
5719
|
+
* unchanged for users who never call it.
|
|
5720
|
+
*/
|
|
5721
|
+
|
|
5722
|
+
/**
|
|
5723
|
+
* Per-table native vector index name. The table is grammar-guarded here because
|
|
5724
|
+
* the returned name is interpolated into DDL (`CREATE TABLE "..."`, `DROP`,
|
|
5725
|
+
* `INSERT`); every build/drop/search path derives the index name through this
|
|
5726
|
+
* one function, so a single guard at the choke point covers them all.
|
|
5727
|
+
*/
|
|
5728
|
+
declare function vectorIndexName(table: string): string;
|
|
5729
|
+
interface VectorHit {
|
|
5730
|
+
pk: string;
|
|
5731
|
+
chunkIndex: number;
|
|
5732
|
+
content: string | null;
|
|
5733
|
+
/** Cosine similarity in [0, 1] (1 − cosine distance). */
|
|
5734
|
+
score: number;
|
|
5735
|
+
}
|
|
5736
|
+
/** Whether this connection has a usable native vector extension. */
|
|
5737
|
+
declare function vectorIndexAvailable(adapter: StorageAdapter): Promise<boolean>;
|
|
5738
|
+
/** Whether a native vector index table exists for `table`. */
|
|
5739
|
+
declare function hasVectorIndex(adapter: StorageAdapter, table: string): Promise<boolean>;
|
|
5740
|
+
/**
|
|
5741
|
+
* Build (or rebuild) the native vector index for `table` from the JSON store,
|
|
5742
|
+
* for vectors of dimension `dim`. No-op when no native extension is available.
|
|
5743
|
+
* Returns the number of vectors indexed.
|
|
5744
|
+
*
|
|
5745
|
+
* @throws when called with an unavailable extension *and* `requireExtension` is
|
|
5746
|
+
* true — surfacing a misconfiguration loudly rather than silently doing
|
|
5747
|
+
* nothing. By default it is a reported no-op (returns 0).
|
|
5748
|
+
*/
|
|
5749
|
+
declare function buildVectorIndex(adapter: StorageAdapter, table: string, dim: number, requireExtension?: boolean): Promise<number>;
|
|
5750
|
+
/** Drop a table's native vector index. */
|
|
5751
|
+
declare function dropVectorIndex(adapter: StorageAdapter, table: string): Promise<void>;
|
|
5752
|
+
/**
|
|
5753
|
+
* Query the native vector index for the nearest chunks to `queryVector`.
|
|
5754
|
+
* Returns up to `limit` hits with cosine similarity scores ≥ `minScore`.
|
|
5755
|
+
*/
|
|
5756
|
+
declare function searchVectorIndex(adapter: StorageAdapter, table: string, queryVector: number[], limit: number, minScore: number): Promise<VectorHit[]>;
|
|
5757
|
+
|
|
4228
5758
|
/**
|
|
4229
5759
|
* Reference-ingestion API — parallel to {@link attachBlob}, but records a row
|
|
4230
5760
|
* that *indexes* data living elsewhere instead of copying bytes. The returned
|
|
@@ -5183,6 +6713,16 @@ interface GuiServerHandle {
|
|
|
5183
6713
|
port: number;
|
|
5184
6714
|
url: string;
|
|
5185
6715
|
close: () => Promise<void>;
|
|
6716
|
+
/**
|
|
6717
|
+
* Resolves when the active workspace's background owner-side convergence (cloud
|
|
6718
|
+
* RLS / member grants, incl. the `_lattice_gui_meta` read-grant) has settled.
|
|
6719
|
+
* Opening a cloud workspace returns immediately and convergence runs unawaited
|
|
6720
|
+
* (see {@link ActiveDb.converged}); a test that acts AS A MEMBER right after the
|
|
6721
|
+
* owner opens MUST await this, or the member render can race the grant and fail
|
|
6722
|
+
* with "permission denied for table _lattice_gui_meta". The GUI never needs it;
|
|
6723
|
+
* resolves immediately for a non-cloud / virgin workspace.
|
|
6724
|
+
*/
|
|
6725
|
+
whenConverged: () => Promise<void>;
|
|
5186
6726
|
}
|
|
5187
6727
|
declare function startGuiServer(options: StartGuiServerOptions): Promise<GuiServerHandle>;
|
|
5188
6728
|
|
|
@@ -5250,4 +6790,331 @@ declare class FileSourceKeyStore implements SourceKeyStore {
|
|
|
5250
6790
|
private encodeFile;
|
|
5251
6791
|
}
|
|
5252
6792
|
|
|
5253
|
-
|
|
6793
|
+
/**
|
|
6794
|
+
* Types for the structured-source importer: turning a parsed JSON source into a
|
|
6795
|
+
* proposed Lattice schema (entities, dimensions, linkages) that the user reviews
|
|
6796
|
+
* before anything is written. The inference step ({@link inferSchema}) returns a
|
|
6797
|
+
* {@link ProposedSchema}; the user approves (optionally trims) it; the
|
|
6798
|
+
* materialize step creates the tables, rows, and junctions from it.
|
|
6799
|
+
*
|
|
6800
|
+
* Derived / computed values are intentionally NOT modeled here — they belong to a
|
|
6801
|
+
* later dashboard→Lattice write-back feature, not source ingestion.
|
|
6802
|
+
*/
|
|
6803
|
+
/** Canonical column types we infer from JSON values. Maps onto Lattice field types. */
|
|
6804
|
+
type InferredType = 'integer' | 'real' | 'boolean' | 'date' | 'datetime' | 'text';
|
|
6805
|
+
interface InferredColumn {
|
|
6806
|
+
/** Normalized snake_case column name. */
|
|
6807
|
+
name: string;
|
|
6808
|
+
/** Original JSON key (may differ from {@link name}). */
|
|
6809
|
+
sourceKey: string;
|
|
6810
|
+
type: InferredType;
|
|
6811
|
+
}
|
|
6812
|
+
/**
|
|
6813
|
+
* A relationship inferred between two entities (or an entity and a normalized
|
|
6814
|
+
* dimension). Reported with match counts + a confidence so the user can judge it
|
|
6815
|
+
* before approving — links are never applied silently.
|
|
6816
|
+
*/
|
|
6817
|
+
interface InferredLinkage {
|
|
6818
|
+
kind: 'many-to-many' | 'many-to-one' | 'dimension';
|
|
6819
|
+
/** Entity table the reference lives on (normalized name). */
|
|
6820
|
+
fromEntity: string;
|
|
6821
|
+
/** Source JSON key on the entity holding the reference value(s). */
|
|
6822
|
+
fromField: string;
|
|
6823
|
+
/** Target entity/dimension table (normalized name). */
|
|
6824
|
+
toEntity: string;
|
|
6825
|
+
/** Natural-key column on the target used to resolve a reference value. */
|
|
6826
|
+
toKey: string;
|
|
6827
|
+
/** Junction table name (many-to-many + dimension links). */
|
|
6828
|
+
junction?: string;
|
|
6829
|
+
/** Distinct reference values that resolve to a target row. */
|
|
6830
|
+
matched: number;
|
|
6831
|
+
/** Distinct reference values that do NOT resolve (reported, never fatal). */
|
|
6832
|
+
unresolved: number;
|
|
6833
|
+
/** 0..1 — share of distinct reference values that resolved. */
|
|
6834
|
+
confidence: number;
|
|
6835
|
+
}
|
|
6836
|
+
interface InferredEntity {
|
|
6837
|
+
/** Normalized snake_case table name. */
|
|
6838
|
+
name: string;
|
|
6839
|
+
/** Original top-level JSON key. */
|
|
6840
|
+
sourceKey: string;
|
|
6841
|
+
/** Scalar columns only — array/linkage fields and dimension-extracted columns are excluded. */
|
|
6842
|
+
columns: InferredColumn[];
|
|
6843
|
+
/** Natural-key column name (normalized), or null = keyless (surrogate id + content-hash dedup). */
|
|
6844
|
+
naturalKey: string | null;
|
|
6845
|
+
/** Original JSON key for the natural key (un-normalized), or null. Used to read source records. */
|
|
6846
|
+
naturalKeySource: string | null;
|
|
6847
|
+
rowCount: number;
|
|
6848
|
+
/** True when reconstructed from `<key>` (array of arrays) + `<key>Cols` (column dictionary). */
|
|
6849
|
+
columnar: boolean;
|
|
6850
|
+
}
|
|
6851
|
+
interface InferredDimension {
|
|
6852
|
+
/** Dimension table name (e.g. `industry`). */
|
|
6853
|
+
name: string;
|
|
6854
|
+
/** Source column key the values come from. */
|
|
6855
|
+
sourceField: string;
|
|
6856
|
+
/** Entities that contribute values to this dimension. */
|
|
6857
|
+
fromEntities: string[];
|
|
6858
|
+
distinctValues: number;
|
|
6859
|
+
}
|
|
6860
|
+
interface ProposedSchema {
|
|
6861
|
+
entities: InferredEntity[];
|
|
6862
|
+
dimensions: InferredDimension[];
|
|
6863
|
+
linkages: InferredLinkage[];
|
|
6864
|
+
/** Top-level keys not imported (derived rollups, meta, scalars, column dictionaries). */
|
|
6865
|
+
skipped: {
|
|
6866
|
+
key: string;
|
|
6867
|
+
reason: string;
|
|
6868
|
+
}[];
|
|
6869
|
+
}
|
|
6870
|
+
/**
|
|
6871
|
+
* An entity recognized as a reconstructable projection of another (a "master")
|
|
6872
|
+
* table — its rows are contained in the master, filtered by one column. It is
|
|
6873
|
+
* materialized as a read-only DB VIEW (`master WHERE filterColumn = filterValue`)
|
|
6874
|
+
* rather than a duplicate table.
|
|
6875
|
+
*/
|
|
6876
|
+
interface DetectedView {
|
|
6877
|
+
/** The view's name (normalized) — the original tab/entity. */
|
|
6878
|
+
name: string;
|
|
6879
|
+
/** The master entity (normalized) this view projects from. */
|
|
6880
|
+
master: string;
|
|
6881
|
+
/** Master column the view filters on (normalized). */
|
|
6882
|
+
filterColumn: string;
|
|
6883
|
+
/** The value `filterColumn` equals for this view. */
|
|
6884
|
+
filterValue: string;
|
|
6885
|
+
/** Number of master rows that matched (the view's row count). */
|
|
6886
|
+
matchedRows: number;
|
|
6887
|
+
}
|
|
6888
|
+
|
|
6889
|
+
/**
|
|
6890
|
+
* Re-extract the raw source records for an entity from the parsed JSON — handling
|
|
6891
|
+
* the columnar case (`<key>` array-of-arrays + `<key>Cols` dictionary). Shared by
|
|
6892
|
+
* the inference step and the materialize step so both read records identically.
|
|
6893
|
+
*/
|
|
6894
|
+
declare function sourceRecords(data: Record<string, unknown>, entity: {
|
|
6895
|
+
sourceKey: string;
|
|
6896
|
+
columnar: boolean;
|
|
6897
|
+
}): Record<string, unknown>[];
|
|
6898
|
+
/** Lower-snake-case a JSON key into a safe SQL identifier. */
|
|
6899
|
+
declare function normalizeName(key: string): string;
|
|
6900
|
+
/** Infer a column type from a set of values (nulls ignored). Defaults to text. */
|
|
6901
|
+
declare function inferFieldType(values: unknown[]): InferredType;
|
|
6902
|
+
interface InferOptions {
|
|
6903
|
+
/** Override the inferred entity → table name (sourceKey → name). */
|
|
6904
|
+
rename?: Record<string, string>;
|
|
6905
|
+
}
|
|
6906
|
+
declare function inferSchema(data: Record<string, unknown>, opts?: InferOptions): ProposedSchema;
|
|
6907
|
+
|
|
6908
|
+
/**
|
|
6909
|
+
* Materialize an approved {@link ProposedSchema} into a Lattice workspace: create
|
|
6910
|
+
* the entity + dimension tables and the junctions, then load the rows (deduped)
|
|
6911
|
+
* and the links. Lattice becomes the system of record — the schema is persisted
|
|
6912
|
+
* to the workspace config so it survives a restart.
|
|
6913
|
+
*
|
|
6914
|
+
* Rows go through {@link Lattice.seed} (upsert by natural/content key → dedup +
|
|
6915
|
+
* idempotent re-apply). Junctions are written directly so the link column names
|
|
6916
|
+
* are fully controlled (`<entity>_id`).
|
|
6917
|
+
*/
|
|
6918
|
+
interface MaterializeCtx {
|
|
6919
|
+
db: Lattice;
|
|
6920
|
+
/** Workspace config path. When set, the schema is persisted here (canonical). */
|
|
6921
|
+
configPath?: string | null;
|
|
6922
|
+
}
|
|
6923
|
+
/**
|
|
6924
|
+
* What to materialize:
|
|
6925
|
+
* - `schema` — table structures + the deduped dimension values (the taxonomy /
|
|
6926
|
+
* "dictionary") + views. No entity rows, no links.
|
|
6927
|
+
* - `contents` — the entity rows + their links, into tables that already exist
|
|
6928
|
+
* (created idempotently if missing). No dimension values, no views.
|
|
6929
|
+
* - `both` — schema + contents (the default).
|
|
6930
|
+
*/
|
|
6931
|
+
type ImportMode = 'schema' | 'contents' | 'both';
|
|
6932
|
+
/** A live step in the import pipeline, for streaming progress to the UI. */
|
|
6933
|
+
interface ImportProgress {
|
|
6934
|
+
phase: 'parse' | 'infer' | 'detect' | 'entities' | 'dimensions' | 'links' | 'views' | 'done';
|
|
6935
|
+
message: string;
|
|
6936
|
+
table?: string;
|
|
6937
|
+
count?: number;
|
|
6938
|
+
}
|
|
6939
|
+
interface MaterializeOptions {
|
|
6940
|
+
/** Which parts to materialize (default `both`). */
|
|
6941
|
+
mode?: ImportMode;
|
|
6942
|
+
/**
|
|
6943
|
+
* Called as each step completes — drives the live pipeline view. May return a
|
|
6944
|
+
* promise; it is awaited, so a streaming caller can yield to the event loop
|
|
6945
|
+
* (flushing the socket) between steps. Without that yield, a synchronous DB
|
|
6946
|
+
* (SQLite) runs the whole import in one tick and the progress batches.
|
|
6947
|
+
*/
|
|
6948
|
+
onProgress?: (p: ImportProgress) => void | Promise<void>;
|
|
6949
|
+
/**
|
|
6950
|
+
* Point-in-time snapshot date (ISO `YYYY-MM-DD`). When set, every entity + link
|
|
6951
|
+
* row is stamped with `as_of` and the row identity includes it, so re-importing
|
|
6952
|
+
* the same model at a new date APPENDS a dated snapshot (the prior one is kept)
|
|
6953
|
+
* and links resolve within each snapshot. Dimensions (the shared taxonomy) are
|
|
6954
|
+
* not dated. Omitted ⇒ the import is undated (re-import upserts in place).
|
|
6955
|
+
*/
|
|
6956
|
+
asOf?: string | null;
|
|
6957
|
+
/**
|
|
6958
|
+
* Per-row snapshot date: the (normalized) name of a column whose value dates
|
|
6959
|
+
* each row individually, so one file can carry many periods. When set, each
|
|
6960
|
+
* row's `as_of` is read from this column (parsed per row), falling back to
|
|
6961
|
+
* {@link asOf} when a row's value isn't a date; applied to every entity that
|
|
6962
|
+
* has a column of this name. Identity + link resolution fold in the per-row
|
|
6963
|
+
* date exactly as the file-level {@link asOf} does.
|
|
6964
|
+
*/
|
|
6965
|
+
asOfColumn?: string | null;
|
|
6966
|
+
}
|
|
6967
|
+
interface MaterializeResult {
|
|
6968
|
+
mode: ImportMode;
|
|
6969
|
+
/** The file-level snapshot date stamped on the rows, or null. */
|
|
6970
|
+
asOf: string | null;
|
|
6971
|
+
/** The per-row date column used (each row dated from it), or null. */
|
|
6972
|
+
asOfColumn: string | null;
|
|
6973
|
+
tablesCreated: string[];
|
|
6974
|
+
rowsByTable: Record<string, number>;
|
|
6975
|
+
links: {
|
|
6976
|
+
junction: string;
|
|
6977
|
+
created: number;
|
|
6978
|
+
unresolved: number;
|
|
6979
|
+
}[];
|
|
6980
|
+
/** Read-only views created for detected projections (master filtered by a column). */
|
|
6981
|
+
views: {
|
|
6982
|
+
name: string;
|
|
6983
|
+
master: string;
|
|
6984
|
+
rows: number;
|
|
6985
|
+
}[];
|
|
6986
|
+
}
|
|
6987
|
+
declare function materializeImport(ctx: MaterializeCtx, data: Record<string, unknown>, plan: ProposedSchema, views?: DetectedView[], opts?: MaterializeOptions): Promise<MaterializeResult>;
|
|
6988
|
+
|
|
6989
|
+
/**
|
|
6990
|
+
* "As of" (snapshot date) detection for an import. The date drives how dated
|
|
6991
|
+
* snapshots are kept, so detection is deliberately a *suggestion* layer: many
|
|
6992
|
+
* signals each produce a ranked {@link AsOfCandidate} with evidence, the best is
|
|
6993
|
+
* prefilled, and the user confirms or overrides. Worst case (no signal) is a
|
|
6994
|
+
* blank field — never a silent wrong guess.
|
|
6995
|
+
*
|
|
6996
|
+
* Signals, strongest first:
|
|
6997
|
+
* - in-content phrase ("as of <date>", "period ended <date>") — the document says it
|
|
6998
|
+
* - in-content bare date (a date in the title/preamble or extracted text)
|
|
6999
|
+
* - file name date ("… 3.31.26.xlsx")
|
|
7000
|
+
* Each is file-type-agnostic: a caller feeds text snippets (Excel preamble, JSON
|
|
7001
|
+
* meta, extracted PDF text, …) + the file name, and the scanner does the rest.
|
|
7002
|
+
*/
|
|
7003
|
+
interface AsOfCandidate {
|
|
7004
|
+
/** ISO `YYYY-MM-DD`. */
|
|
7005
|
+
date: string;
|
|
7006
|
+
/** Where it came from (for the UI + ranking). */
|
|
7007
|
+
source: 'content' | 'filename' | 'column' | 'metadata' | 'llm';
|
|
7008
|
+
/** 0..1 — higher wins. */
|
|
7009
|
+
confidence: number;
|
|
7010
|
+
/** Human-readable justification, shown next to the prefilled field. */
|
|
7011
|
+
evidence: string;
|
|
7012
|
+
}
|
|
7013
|
+
/**
|
|
7014
|
+
* Parse a single cell/field value into an ISO date, or null. Handles a `Date`
|
|
7015
|
+
* (exceljs hands back `Date` objects for date-typed cells) and date-bearing
|
|
7016
|
+
* strings ("2026-03-31", "3/31/26", "March 31, 2026"). Used to read a per-row
|
|
7017
|
+
* "as of" column. A bare number is ignored — an Excel serial is already a `Date`
|
|
7018
|
+
* by the time we see it, and a loose number is far more likely an amount.
|
|
7019
|
+
*/
|
|
7020
|
+
declare function parseCellDate(value: unknown): string | null;
|
|
7021
|
+
interface AsOfInputs {
|
|
7022
|
+
fileName?: string;
|
|
7023
|
+
/** Text snippets to scan, each with a label (e.g. "title", "extracted text"). */
|
|
7024
|
+
texts?: {
|
|
7025
|
+
label: string;
|
|
7026
|
+
text: string;
|
|
7027
|
+
}[];
|
|
7028
|
+
}
|
|
7029
|
+
/** Gather + rank as-of candidates from all inputs (best first, deduped by date). */
|
|
7030
|
+
declare function detectAsOfCandidates(inputs: AsOfInputs): AsOfCandidate[];
|
|
7031
|
+
/** Convenience: the single best-guess date from a file name, or null. */
|
|
7032
|
+
declare function detectAsOf(fileName: string): string | null;
|
|
7033
|
+
|
|
7034
|
+
/**
|
|
7035
|
+
* Detect a per-row "as of" DATE COLUMN — so one file can carry many periods
|
|
7036
|
+
* (each row dated by its own column value) instead of a single file-level
|
|
7037
|
+
* snapshot date. Like {@link detectAsOfCandidates}, this is a *suggestion* layer:
|
|
7038
|
+
* ranked candidates with evidence; the user confirms or declines. Picking one
|
|
7039
|
+
* makes the import stamp each row's `as_of` from that column (see
|
|
7040
|
+
* `materializeImport`'s `asOfColumn` option) rather than one date for the file.
|
|
7041
|
+
*/
|
|
7042
|
+
|
|
7043
|
+
interface AsOfColumnCandidate {
|
|
7044
|
+
/** Entity (table) the column lives on. */
|
|
7045
|
+
entity: string;
|
|
7046
|
+
/** Normalized column name (matches an entity's {@link InferredColumn} `name`). */
|
|
7047
|
+
column: string;
|
|
7048
|
+
/** 0..1 — higher wins. */
|
|
7049
|
+
confidence: number;
|
|
7050
|
+
/** Distinct dates seen (>1 ⇒ genuinely multiple periods in one file). */
|
|
7051
|
+
distinctDates: number;
|
|
7052
|
+
/** Human-readable justification, shown next to the option. */
|
|
7053
|
+
evidence: string;
|
|
7054
|
+
}
|
|
7055
|
+
declare function detectAsOfColumns(data: Record<string, unknown>, plan: ProposedSchema): AsOfColumnCandidate[];
|
|
7056
|
+
|
|
7057
|
+
/**
|
|
7058
|
+
* Match an inferred import schema against the tables already in a workspace, so
|
|
7059
|
+
* a re-uploaded file is recognized as a NEW PERIOD of a document already
|
|
7060
|
+
* imported — not a brand-new set of tables. Each inferred entity is fingerprinted
|
|
7061
|
+
* by its column-name set and matched to the best existing table by containment
|
|
7062
|
+
* (robust to added columns + renames of a few columns). When enough of the upload
|
|
7063
|
+
* maps onto existing tables, it's a "known document": the importer can stamp it
|
|
7064
|
+
* as a dated snapshot into those tables instead of creating duplicates.
|
|
7065
|
+
*
|
|
7066
|
+
* Pure + dependency-free (takes a plain list of existing tables, not a `Lattice`)
|
|
7067
|
+
* so it's unit-testable and reusable from any door (import panel + assistant).
|
|
7068
|
+
*/
|
|
7069
|
+
|
|
7070
|
+
interface ExistingTable {
|
|
7071
|
+
name: string;
|
|
7072
|
+
columns: string[];
|
|
7073
|
+
}
|
|
7074
|
+
interface EntityMatch {
|
|
7075
|
+
/** Inferred entity name (from the new upload). */
|
|
7076
|
+
from: string;
|
|
7077
|
+
/** Existing table it matches. */
|
|
7078
|
+
to: string;
|
|
7079
|
+
/** 0..1 — share of the inferred entity's columns present in the existing table. */
|
|
7080
|
+
overlap: number;
|
|
7081
|
+
}
|
|
7082
|
+
interface SchemaMatch {
|
|
7083
|
+
/** Per-entity matches above the threshold (best existing table for each). */
|
|
7084
|
+
matches: EntityMatch[];
|
|
7085
|
+
/** Rename map (inferred name → existing table name) for names that differ. */
|
|
7086
|
+
rename: Record<string, string>;
|
|
7087
|
+
matchedCount: number;
|
|
7088
|
+
totalEntities: number;
|
|
7089
|
+
/**
|
|
7090
|
+
* True when enough of the upload maps onto existing tables to treat it as a
|
|
7091
|
+
* re-import of a document already in the workspace (i.e. a new dated snapshot).
|
|
7092
|
+
*/
|
|
7093
|
+
isKnownDocument: boolean;
|
|
7094
|
+
}
|
|
7095
|
+
/**
|
|
7096
|
+
* Match the inferred {@link ProposedSchema} against the workspace's existing
|
|
7097
|
+
* tables. `existing` should already exclude native/system tables (the caller has
|
|
7098
|
+
* the registry); junctions/dimensions are harmless — their tiny signatures won't
|
|
7099
|
+
* reach the threshold against a real data entity.
|
|
7100
|
+
*/
|
|
7101
|
+
declare function matchSchemaToExisting(existing: ExistingTable[], plan: ProposedSchema): SchemaMatch;
|
|
7102
|
+
/**
|
|
7103
|
+
* Apply a {@link SchemaMatch} rename map to a plan + its views, so materialize
|
|
7104
|
+
* writes into the matched existing tables. Names absent from the map pass through
|
|
7105
|
+
* unchanged (dimensions, unmatched entities). Linkage `toEntity` may be a
|
|
7106
|
+
* dimension; renaming only hits names in the map, so dimensions are untouched.
|
|
7107
|
+
*/
|
|
7108
|
+
declare function renameEntities(plan: ProposedSchema, views: DetectedView[], rename: Record<string, string>): {
|
|
7109
|
+
plan: ProposedSchema;
|
|
7110
|
+
views: DetectedView[];
|
|
7111
|
+
};
|
|
7112
|
+
|
|
7113
|
+
declare function excelToRecords(absPath: string): Promise<Record<string, unknown[]>>;
|
|
7114
|
+
|
|
7115
|
+
declare function dedupeAndDetectViews(plan: ProposedSchema, data: Record<string, unknown>): {
|
|
7116
|
+
plan: ProposedSchema;
|
|
7117
|
+
views: DetectedView[];
|
|
7118
|
+
};
|
|
7119
|
+
|
|
7120
|
+
export { ALL_PROVENANCE_FIELDS, type AddWorkspaceOptions, type AdoptNativeOptions, type AdoptResult, type AggregateFunction, type AggregateHaving, type AggregateOptions, type AggregateResult, type AggregateSpec, type ApplyWriteResult, type AsOfCandidate, type AsOfColumnCandidate, type AsOfInputs, type AudienceRowCtx, type AuditEvent, type AutoUpdateResult, type BacklinkSignal, type BelongsToRelation, type BelongsToSource, type BenchmarkOptions, type BenchmarkReport, type BenchmarkScale, type BlobMetadata, BoundedReadError, type BuiltinTemplateName, CLOUD_SETTING_SYSTEM_PROMPT, CLOUD_SETTING_WORKSPACE_LOGO, CLOUD_SETTING_WORKSPACE_LOGO_ETAG, CONFIG_SUBDIR, type CatalogEntity, type CatalogRecord, type ChangeEntry, type ChangelogOptions, type ChunkedMigrationOptions, type ChunkedMigrationResult, type ChunkerFn, type ClassifyMatch, type CleanupOptions, type CleanupResult, type CloudProbeResult, type CloudS3Secret, ComputedColumnCycleError, type ComputedColumnSpec, type CountOptions, type CrawlOptions, type CrawlResult, type CustomSignal, type CustomSource, DEFAULT_ENTRY_TYPES, DEFAULT_MAX_NODES, DEFAULT_TYPE_ALIASES, type DetectedView, type DiagnoseOptions, type DiscoveredTable, EMBEDDINGS_TABLE, EmbeddingDimensionMismatchError, type EmbeddingRefreshResult, EmbeddingScanTooLargeError, type EmbeddingsConfig, type EnrichOptions, type EnrichResult, type EnrichedSource, type EnrichmentLookup, type EntityContextDefinition, type EntityContextManifestEntry, type EntityFileManifestInfo, type EntityFileSource, type EntityFileSpec, type EntityMatch, type EntityProfileField, type EntityProfileSection, type EntityProfileTemplate, type EntityRenderSpec, type EntityRenderTemplate, type EntitySectionPerRow, type EntitySectionsTemplate, type EntityTableColumn, type EntityTableTemplate, type EvalQuery, type EvalRegression, type ExistingTable, type ExtensionAvailability, type ExtractEdgesSpec, type ExtractedObject, FileSourceKeyStore, type FileSourceKeyStoreOptions, type FilesRow, type Filter, type FilterAnd, type FilterExpr, type FilterOp, type FilterOr, FoldCache, type FtsConfig, type FtsGroup, type FtsHit, type FtsOptions, type FtsResult, type GraphBoostOptions, type GraphBoostResult, type GraphEdge, type GraphNode, type GraphTraversalResult, type GuiServerHandle, type HasManyRelation, type HasManySource, type HealthIssueKind, type HealthSeverity, type HybridScoreBreakdown, type HybridSearchOptions, type HybridSearchResult, type ImportMode, type ImportProgress, InMemorySourceKeyStore, InMemoryStateStore, type InferredColumn, type InferredDimension, type InferredEntity, type InferredLinkage, type InferredType, type InitOptions, LEGACY_MEMBER_GROUP, LOCAL_DB_RELPATH, type LatencyStats, Lattice, type LatticeConfig, type LatticeConfigInput, type LatticeEntityDef, type LatticeEntityRenderSpec, type LatticeFieldDef, type LatticeFieldType, type LatticeManifest, type LatticeOptions, type LinkOptions, type LlmClient, type LlmMessage, MAX_TRAVERSAL_DEPTH, type ManyToManySource, type MarkdownTableColumn, type MaterializeCtx, type MaterializeOptions, type MaterializeResult, type MaterializedRollupSpec, type MigrateResult, type Migration, type MigrationCheckpoint, type MigrationOptions, type MigrationProgress, type MigrationResult, type MigrationStatus, type MultiTableDefinition, NATIVE_ENTITY_DEFS, NATIVE_ENTITY_NAMES, NATIVE_REGISTRY_TABLE, type Observation, type OrderBySpec, type OrganizeOptions, type OrganizeResult, type OrganizedCreation, type OrganizedLink, type ParseError, type ParseResult, type ParsedConfig, type PdfOptions, type PdfSenderInput, type PerQueryEval, type PkLookup, PostgresAdapter, type PostgresAdapterOptions, type PreparedStatement, type PrimaryKey, ProgressThrottle, type ProposedSchema, type ProvenanceConfig, type ProvenanceField, ProvenanceImmutableError, type QueryOptions, type QueryPageOptions, type QueryPageResult, type QueryProjection, READ_ONLY_HEADER, ROOT_DIRNAME, type RankingOptions, type ReadOnlyHeaderOptions, type RecencySignal, type ReconcileOptions, type ReconcileResult, type RefKind, type RefProvider, type ReferenceMetadata, ReferenceUnavailableError, type RefreshEmbeddingsOptions, type Relation, type RelevanceLabel, type RemoteBlobStore, type RenderHooks, type RenderOptions, type RenderProgress, type RenderProgressCallback, type RenderProgressKind, type RenderResult, type RenderSpec, type ReportConfig, type ReportResult, type ReportSection, type ReportSectionResult, type RerankCandidate, type RerankScore, type RerankerFn, type ResolveOptions, type RetrievalEvalOptions, type RetrievalEvalSummary, type RetrievalHealthIssue, type RetrievalHealthReport, type RetrievalHealthSpec, type RetrievalSlo, type Retriever, type RetryOptions, type ReverseSeedDetection, type ReverseSeedResult, type ReverseSeedTableResult, type ReverseSyncError, type ReverseSyncResult, type ReverseSyncUpdate, type RewardScores, type RewardSignal, type RollupFunction, type Row, type RowVisibilityDefault, type S3Config, type S3StoreConfig, S3UnavailableError, S3_SECRET_TABLE, SQLiteAdapter, type SchemaEntity, type SchemaMatch, type SearchOptions, type SearchResult, type SecurityOptions, type SeedConfig, type SeedLinkSpec, SeedReconciliationError, type SeedResult, type SelfSource, type SemanticChunkerOptions, type SessionEntry, type SessionParseOptions, type SessionWriteEntry, type SessionWriteOp, type SessionWriteParseResult, type SloViolation, type SourceHandle, type SourceKeyStore, type SourceMetadata, type SourceQueryOptions, SourceShreddedError, type StartGuiServerOptions, type StopFn, type StorageAdapter, type SyncResult, TRUST_COLUMNS, type TableDefinition, type TableHealth, type TablePolicy, type TemplateRenderSpec, type TextChunk, type TraversalDirection, type TraversalNode, type TraversalOptions, type TrustConfig, type TrustState, type TurnParams, type TurnResult, type UnresolvedLink, type UpsertByNaturalKeyOptions, type UserIdentity, type UserPreferences, type VectorHit, type Viewer, type VisionOptions, type VisionSenderInput, WORKSPACES_SUBDIR, type WatchOptions, type WorkspacePaths, type WorkspaceRecord, type WorkspaceRegistry, type WriteHook, type WriteHookContext, type WritebackDefinition, type WritebackStateStore, type WritebackValidationResult, activeWorkspaceLabel, addEdge, addEdges, addWorkspace, adoptNativeEntities, allComputedDeps, analyticsEnabled, applyChunkedMigration, applyReranker, applyTokenBudget, applyWriteEntry, archiveLocalSqlite, assertSafeUrl, attachBlob, audiencePredicate, audienceViewSql, autoFtsColumns, autoUpdate, backfillOwnership, backlinkBoost, benchmarkRetrieval, buildVectorIndex, canManageRoles, checkSlos, chunkText, classifyLinks, cloudRlsInstalled, computeColumns, computedColumnDdl, computedColumnOrder, concatRowText, configDir, contentHash, cosineSimilarity, crawlUrl, createReadOnlyHeader, createS3Store, createSQLiteStateStore, decrypt, dedupeAndDetectViews, defaultWorkspaceYaml, deleteDbCredential, deleteToken, deriveCanonicalContexts, deriveKey, describeImage, describePdf, detectAsOf, detectAsOfCandidates, detectAsOfColumns, detectRetrievalRegressions, diagnoseRetrieval, discoverCloudTables, dropVectorIndex, enableAudienceView, enableChangelogRls, enableRlsForTable, encrypt, enrichKnowledge, ensureCheckpointTable, ensureEdgesTable, ensureEmbeddingsTable, ensureFtsIndex, ensureLatticeRoot, entityFileNames, estimateTokens, evaluateRetrieval, excelToRecords, extractEdgesFromColumn, extractObjects, filePresignSql, findLatticeRoot, fixSchemaConflicts, foldEntity, formatHealthReport, frontmatter, ftsTableName, fullTextSearch, generateEntryId, generateMemberPassword, generateWriteEntryId, getActiveWorkspace, getCloudSetting, getDbCredential, getMigrationCheckpoint, getOrCreateMasterKey, getTablePolicy, getWorkspace, grantPresignerToMemberGroup, graphAdjacencyBoost, hasFilePresigner, hasFtsIndex, hasVectorIndex, hashFile, hybridSearch, importLegacyUserConfig, inferFieldType, inferSchema, installCloudRls, installCloudSettings, installFilePresigner, isEncrypted, isNativeEntity, isPostgresUrl, isPrivateIp, isRetryableDbError, isRowAudience, latencyStats, listDbCredentials, listMigrationCheckpoints, listNativeBindings, listTokens, listWorkspaces, loadColumnPolicy, manifestPath, markdownTable, matchSchemaToExisting, materializeImport, memberGroupFor, memberRoleName, migrateLatticeData, neighbors, normalizeName, observationVisible, observationsFromChange, openTargetLatticeForMigration, openUnderSource, organizeSource, parseCellDate, parseConfigFile, parseConfigString, parseMarkdownEntries, parseMatches, parseObjects, parseSessionMD, parseSessionWrites, percentile, probeCloud, provenanceColumns, providerForUrl, provisionMemberRole, rankingBoost, readIdentity, readManifest, readPreferences, readRegistry, readToken, recencyBoost, referenceLocalFile, referenceUrl, refreshEmbeddings, regenerateAudienceViewFromDb, registerNativeEntities, registryPath, removeEdge, removeEmbedding, renameEntities, resolveActiveS3Config, resolveLatticeRoot, resolveProvenanceFields, resolveSource, resolveTrustDefault, resolveWorkspacePaths, resumeMigration, revertMigration, revokeMemberRole, rewardBoost, rollupColumnDdl, rootConfigDir, s3Key, saveDbCredential, saveDbCredentialForTeam, sealUnderSource, searchByEmbedding, searchVectorIndex, secureCloud, seedColumnPolicyFromYaml, semanticChunker, setActiveWorkspace, setCloudS3Secret, setCloudSetting, setColumnAudience, setRowVisibility, setTableDefaultVisibility, setTableNeverShare, shredSource, slugify, sourceRecords, startGuiServer, storeEmbedding, summarizeText, tableNeedsAudienceView, toSafeDirName, traverse, truncate, validateEntryId, vectorIndexAvailable, vectorIndexName, withRetry, workspaceBlobsDir, workspaceConfigPath, workspaceContextDir, workspaceDataDir, workspaceDbPath, workspaceDir, workspacesDir, writeIdentity, writeManifest, writePreferences, writeRegistry, writeToken };
|