@legioncodeinc/nectar 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/LICENSE.md +662 -662
  2. package/README.md +307 -307
  3. package/dist/brooding/describe.js +13 -13
  4. package/dist/cli.js +26 -26
  5. package/dist/service/templates.js +80 -80
  6. package/dist/telemetry/checkin.js +6 -6
  7. package/dist/telemetry/db.js +21 -21
  8. package/dist/telemetry/metrics.js +8 -8
  9. package/dist/telemetry-usage/posthog-key.js +4 -22
  10. package/package.json +48 -44
  11. package/dist/embeddings/cohere-portkey.d.ts +0 -67
  12. package/dist/embeddings/cohere-portkey.d.ts.map +0 -1
  13. package/dist/embeddings/cohere-portkey.js +0 -171
  14. package/dist/embeddings/cohere-portkey.js.map +0 -1
  15. package/dist/hivedoctor-registry.d.ts +0 -111
  16. package/dist/hivedoctor-registry.d.ts.map +0 -1
  17. package/dist/hivedoctor-registry.js +0 -143
  18. package/dist/hivedoctor-registry.js.map +0 -1
  19. package/dist/source-graph/deeplake-credentials.d.ts +0 -57
  20. package/dist/source-graph/deeplake-credentials.d.ts.map +0 -1
  21. package/dist/source-graph/deeplake-credentials.js +0 -109
  22. package/dist/source-graph/deeplake-credentials.js.map +0 -1
  23. package/dist/source-graph/deeplake-heal.d.ts +0 -53
  24. package/dist/source-graph/deeplake-heal.d.ts.map +0 -1
  25. package/dist/source-graph/deeplake-heal.js +0 -41
  26. package/dist/source-graph/deeplake-heal.js.map +0 -1
  27. package/dist/source-graph/deeplake-store.d.ts +0 -151
  28. package/dist/source-graph/deeplake-store.d.ts.map +0 -1
  29. package/dist/source-graph/deeplake-store.js +0 -389
  30. package/dist/source-graph/deeplake-store.js.map +0 -1
  31. package/dist/source-graph/deeplake-transport.d.ts +0 -74
  32. package/dist/source-graph/deeplake-transport.d.ts.map +0 -1
  33. package/dist/source-graph/deeplake-transport.js +0 -107
  34. package/dist/source-graph/deeplake-transport.js.map +0 -1
  35. package/dist/source-graph/hash.d.ts +0 -3
  36. package/dist/source-graph/hash.d.ts.map +0 -1
  37. package/dist/source-graph/hash.js +0 -12
  38. package/dist/source-graph/hash.js.map +0 -1
  39. package/dist/source-graph/memory-store.d.ts +0 -32
  40. package/dist/source-graph/memory-store.d.ts.map +0 -1
  41. package/dist/source-graph/memory-store.js +0 -81
  42. package/dist/source-graph/memory-store.js.map +0 -1
  43. package/dist/source-graph/model.d.ts +0 -102
  44. package/dist/source-graph/model.d.ts.map +0 -1
  45. package/dist/source-graph/model.js +0 -36
  46. package/dist/source-graph/model.js.map +0 -1
  47. package/dist/source-graph/paths.d.ts +0 -7
  48. package/dist/source-graph/paths.d.ts.map +0 -1
  49. package/dist/source-graph/paths.js +0 -26
  50. package/dist/source-graph/paths.js.map +0 -1
  51. package/dist/source-graph/schema.d.ts +0 -44
  52. package/dist/source-graph/schema.d.ts.map +0 -1
  53. package/dist/source-graph/schema.js +0 -123
  54. package/dist/source-graph/schema.js.map +0 -1
  55. package/dist/source-graph/sql-guards.d.ts +0 -99
  56. package/dist/source-graph/sql-guards.d.ts.map +0 -1
  57. package/dist/source-graph/sql-guards.js +0 -129
  58. package/dist/source-graph/sql-guards.js.map +0 -1
  59. package/dist/source-graph/store.d.ts +0 -101
  60. package/dist/source-graph/store.d.ts.map +0 -1
  61. package/dist/source-graph/store.js +0 -2
  62. package/dist/source-graph/store.js.map +0 -1
  63. package/dist/source-graph/ulid.d.ts +0 -9
  64. package/dist/source-graph/ulid.d.ts.map +0 -1
  65. package/dist/source-graph/ulid.js +0 -61
  66. package/dist/source-graph/ulid.js.map +0 -1
@@ -1,53 +0,0 @@
1
- /**
2
- * Heal-on-first-write for the Deep Lake adapter (PRD-005).
3
- *
4
- * Mirrors the missing-table branch of honeycomb's `withHeal`
5
- * (`src/daemon/storage/heal.ts:286-313`), scoped down to what the
6
- * source-graph tables need: a write that fails because its table does not
7
- * exist yet triggers exactly ONE create-then-retry. Any other failure
8
- * (permission, connection, timeout, a genuine syntax error) propagates
9
- * unchanged and never triggers a create — the same anti-mask rule honeycomb's
10
- * `classifyFailure` documents: a credentials or syntax problem must never be
11
- * misread as a schema gap behind a confusing CREATE attempt.
12
- *
13
- * Column healing (honeycomb's `healColumns` / `ALTER TABLE ADD COLUMN`) is
14
- * deliberately NOT ported here: `SOURCE_GRAPH_COLUMNS` /
15
- * `SOURCE_GRAPH_VERSIONS_COLUMNS` are the full, fixed column set at CREATE
16
- * time and PRD-005 does not add columns to an already-created table out from
17
- * under a running adapter, so there is no missing-column case for this
18
- * adapter to heal. A future PRD that adds a column to the catalog would need
19
- * to bring the column-heal half of honeycomb's engine over too.
20
- */
21
- import type { CatalogTable } from "./schema.js";
22
- import type { DeepLakeRow } from "./deeplake-transport.js";
23
- import { TransportError } from "./deeplake-transport.js";
24
- /**
25
- * The minimal shape `withHeal` needs from a transport: one method that runs a
26
- * SQL statement. `HttpDeepLakeTransport` satisfies this structurally (its
27
- * `query` method is public even though its connection fields are private), so
28
- * production callers pass it directly; a test can pass a plain fake object
29
- * with a `query` method instead, without needing to construct a real
30
- * `HttpDeepLakeTransport` or reach for a mocking library (hivenectar has
31
- * none, by design).
32
- */
33
- export interface QueryRunner {
34
- query(sql: string): Promise<DeepLakeRow[]>;
35
- }
36
- /**
37
- * Classify a `query`-kind `TransportError` message as a missing-table failure
38
- * (mirrors honeycomb's `classifyFailure`, `src/daemon/storage/heal.ts:77-98`,
39
- * missing-table branch only). Auth/permission failures are excluded first so a
40
- * message that happens to mention a relation is never misread as a schema gap.
41
- */
42
- export declare function isMissingTableError(err: TransportError): boolean;
43
- /**
44
- * Run a write, and on a missing-table failure, CREATE the table and retry the
45
- * write EXACTLY ONCE. Any other failure (or a success) returns/throws
46
- * immediately and unhealed. A second failure after the heal propagates
47
- * without a further retry (no infinite loop).
48
- *
49
- * `runWrite` is the original statement's thunk so the retry re-issues the
50
- * identical write.
51
- */
52
- export declare function withHeal(transport: QueryRunner, table: CatalogTable, runWrite: () => Promise<DeepLakeRow[]>): Promise<DeepLakeRow[]>;
53
- //# sourceMappingURL=deeplake-heal.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"deeplake-heal.d.ts","sourceRoot":"","sources":["../../src/source-graph/deeplake-heal.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AACH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAEzD;;;;;;;;GAQG;AACH,MAAM,WAAW,WAAW;IAC1B,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;CAC5C;AAED;;;;;GAKG;AACH,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,cAAc,GAAG,OAAO,CAQhE;AAED;;;;;;;;GAQG;AACH,wBAAsB,QAAQ,CAC5B,SAAS,EAAE,WAAW,EACtB,KAAK,EAAE,YAAY,EACnB,QAAQ,EAAE,MAAM,OAAO,CAAC,WAAW,EAAE,CAAC,GACrC,OAAO,CAAC,WAAW,EAAE,CAAC,CAaxB"}
@@ -1,41 +0,0 @@
1
- import { buildCreateTableSql } from "./schema.js";
2
- import { TransportError } from "./deeplake-transport.js";
3
- /**
4
- * Classify a `query`-kind `TransportError` message as a missing-table failure
5
- * (mirrors honeycomb's `classifyFailure`, `src/daemon/storage/heal.ts:77-98`,
6
- * missing-table branch only). Auth/permission failures are excluded first so a
7
- * message that happens to mention a relation is never misread as a schema gap.
8
- */
9
- export function isMissingTableError(err) {
10
- if (err.kind !== "query")
11
- return false;
12
- if (/permission denied|must be owner|not authorized|forbidden|unauthorized/i.test(err.message)) {
13
- return false;
14
- }
15
- return /table does not exist|relation ["']?[A-Za-z_][A-Za-z0-9_.]*["']? does not exist|no such table/i.test(err.message);
16
- }
17
- /**
18
- * Run a write, and on a missing-table failure, CREATE the table and retry the
19
- * write EXACTLY ONCE. Any other failure (or a success) returns/throws
20
- * immediately and unhealed. A second failure after the heal propagates
21
- * without a further retry (no infinite loop).
22
- *
23
- * `runWrite` is the original statement's thunk so the retry re-issues the
24
- * identical write.
25
- */
26
- export async function withHeal(transport, table, runWrite) {
27
- try {
28
- return await runWrite();
29
- }
30
- catch (err) {
31
- if (!(err instanceof TransportError) || !isMissingTableError(err)) {
32
- throw err;
33
- }
34
- // Missing table: create it (IF NOT EXISTS makes concurrent heals
35
- // converge), then retry the original write exactly once. Any failure from
36
- // here on (the create itself, or the retried write) propagates unchanged.
37
- await transport.query(buildCreateTableSql(table));
38
- return runWrite();
39
- }
40
- }
41
- //# sourceMappingURL=deeplake-heal.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"deeplake-heal.js","sourceRoot":"","sources":["../../src/source-graph/deeplake-heal.ts"],"names":[],"mappings":"AAqBA,OAAO,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AAElD,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAezD;;;;;GAKG;AACH,MAAM,UAAU,mBAAmB,CAAC,GAAmB;IACrD,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO;QAAE,OAAO,KAAK,CAAC;IACvC,IAAI,wEAAwE,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;QAC/F,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,+FAA+F,CAAC,IAAI,CACzG,GAAG,CAAC,OAAO,CACZ,CAAC;AACJ,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,SAAsB,EACtB,KAAmB,EACnB,QAAsC;IAEtC,IAAI,CAAC;QACH,OAAO,MAAM,QAAQ,EAAE,CAAC;IAC1B,CAAC;IAAC,OAAO,GAAY,EAAE,CAAC;QACtB,IAAI,CAAC,CAAC,GAAG,YAAY,cAAc,CAAC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,EAAE,CAAC;YAClE,MAAM,GAAG,CAAC;QACZ,CAAC;QACD,iEAAiE;QACjE,0EAA0E;QAC1E,0EAA0E;QAC1E,MAAM,SAAS,CAAC,KAAK,CAAC,mBAAmB,CAAC,KAAK,CAAC,CAAC,CAAC;QAClD,OAAO,QAAQ,EAAE,CAAC;IACpB,CAAC;AACH,CAAC"}
@@ -1,151 +0,0 @@
1
- /**
2
- * The Deep Lake-backed SourceGraphStore adapter (PRD-005).
3
- *
4
- * `DeepLakeSourceGraphStore` is the durable substrate for the two source-graph
5
- * tables (`source_graph`, `source_graph_versions`), reached over the network
6
- * through hivenectar's own transport (`deeplake-transport.ts`) per ADR-0002 -
7
- * never by importing the honeycomb runtime in-process. It is an ADDITIONAL
8
- * adapter alongside `InMemorySourceGraphStore` (`memory-store.ts`), not a
9
- * replacement of it: the in-memory store keeps backing the file-registration
10
- * ladder's tests and local dev, unchanged.
11
- *
12
- * `DeepLakeSourceGraphStore implements AsyncSourceGraphStore`
13
- * (`store.ts`), not the synchronous `SourceGraphStore`. See the docblock on
14
- * `AsyncSourceGraphStore` for why: the ladder's synchronous contract cannot be
15
- * honored by a store that does real HTTP I/O, and wiring the ladder to an
16
- * async store is a future PRD's decision, not this adapter's.
17
- *
18
- * All reads are scoped by `org_id`/`workspace_id`/`project_id` wherever the
19
- * method receives a `Tenancy` (`listLatestVersions`, `latestVersionByPath`,
20
- * `latestVersionByHash`), per PRD-005c's soft-filter contract - the
21
- * `project_id` predicate is never omitted from those queries. The four
22
- * by-nectar-only methods (`getIdentity`, `touchIdentity`, `nextSeq`,
23
- * `latestVersion`) take no `Tenancy` parameter in the `SourceGraphStore` /
24
- * `AsyncSourceGraphStore` seam itself - `nectar` is a globally-unique 26-char
25
- * ULID (`ulid.ts`) and `InMemorySourceGraphStore` looks those up by nectar
26
- * alone too (no tenancy filter), so this adapter matches that seam exactly
27
- * for drop-in-replacement parity rather than inventing extra scoping the
28
- * interface does not carry.
29
- */
30
- import type { SourceGraphRow, SourceGraphVersionRow, Tenancy } from "./model.js";
31
- import type { AsyncSourceGraphStore, LatestVersion } from "./store.js";
32
- import type { DeepLakeCredentials } from "./deeplake-credentials.js";
33
- import type { QueryRunner } from "./deeplake-heal.js";
34
- /** Options for {@link DeepLakeSourceGraphStore}. */
35
- export interface DeepLakeSourceGraphStoreOptions {
36
- /** Loaded via `deeplake-credentials.ts#loadDeepLakeCredentials`. */
37
- readonly credentials: DeepLakeCredentials;
38
- /** Per-statement timeout override in ms; see `DEFAULT_TRANSPORT_TIMEOUT_MS`. */
39
- readonly timeoutMs?: number;
40
- /**
41
- * TEST-ONLY SEAM: inject a fake `QueryRunner` instead of constructing a
42
- * real `HttpDeepLakeTransport` from `credentials`. When absent (the
43
- * production path, and the default for every existing caller), the
44
- * constructor builds a real `HttpDeepLakeTransport` from `credentials`
45
- * exactly as before this option existed. `credentials` is still required
46
- * even when `transport` is supplied (a test passes a trivial placeholder
47
- * object; nothing in the class reads `credentials` again once the
48
- * transport is constructed), which keeps this a strict addition rather
49
- * than a change to the required shape of the options object.
50
- */
51
- readonly transport?: QueryRunner;
52
- }
53
- export declare class DeepLakeSourceGraphStore implements AsyncSourceGraphStore {
54
- private readonly transport;
55
- constructor(options: DeepLakeSourceGraphStoreOptions);
56
- /**
57
- * Run a READ statement, tolerating a missing-table failure as "no data yet"
58
- * (an empty result), exactly like honeycomb's read-path guard
59
- * (`tableExists`, `src/daemon/storage/heal.ts:171-175`) fails OPEN rather
60
- * than provoking a CREATE. Only the missing-table shape is tolerated; a
61
- * genuine connection/timeout/permission/syntax failure still propagates.
62
- * Reads never create a table - only a write does, via `withHeal`.
63
- */
64
- private readTolerant;
65
- /**
66
- * Insert-only mint, mirroring honeycomb's SELECT-before-INSERT write
67
- * pattern (`selectBeforeInsert`, `src/daemon/storage/writes.ts:325-360`)
68
- * rather than relying on a Deep Lake UNIQUE constraint: the Deep Lake SQL
69
- * surface this adapter targets has no reliably-enforced uniqueness
70
- * constraint to lean on, so "does the nectar already exist" is answered by
71
- * probing first. Both the probe and the insert are heal-aware (a fresh
72
- * tenancy's tables may not exist yet).
73
- *
74
- * After the insert, a best-effort re-verify SELECT counts the rows for the
75
- * nectar; more than one means a race doubled it. Deep Lake offers no
76
- * transactions, so this cannot PREVENT a race, only make it observable
77
- * (the same limitation honeycomb documents). In practice a real race here
78
- * is a near-zero-probability event - a nectar is a 26-char ULID with 80
79
- * bits of randomness (`ulid.ts`), not a narrow key space two writers could
80
- * plausibly collide on - so a failure of the re-verify SELECT itself
81
- * (e.g. a transient network hiccup immediately after a successful insert)
82
- * does not fail the whole call; the insert already succeeded and the
83
- * re-verify is an observability nicety, not the source of truth.
84
- */
85
- insertIdentity(row: SourceGraphRow): Promise<void>;
86
- getIdentity(nectar: string): Promise<SourceGraphRow | undefined>;
87
- /**
88
- * Bump `last_update_date` via an in-place `UPDATE`.
89
- *
90
- * KNOWN CAVEAT: honeycomb's own operational history found that an in-place
91
- * `UPDATE ... WHERE id = ...` does not reliably land on this backend under
92
- * load - a by-id point read can still return a pre-update snapshot from a
93
- * stale segment (see honeycomb `src/daemon/storage/catalog/tenancy.ts:475-489`,
94
- * where the equivalent revoke-by-UPDATE was RETIRED in favor of an
95
- * append-only version-bump for that reason). `source_graph`'s catalog entry
96
- * (`schema.ts`) nonetheless declares `writePattern: "update-or-insert"` for
97
- * this exact field, and `last_update_date` is a low-stakes denormalized
98
- * "last observed change" timestamp (not an authorization-bearing flag like
99
- * the retired revoke), so this adapter uses `UPDATE` as declared rather than
100
- * inventing an append-based scheme the schema does not define. If
101
- * `last_update_date` staleness proves to be a real problem in practice, a
102
- * future PRD should revisit this the same way honeycomb did - not this
103
- * adapter, which mirrors the declared write pattern faithfully.
104
- */
105
- touchIdentity(nectar: string, lastUpdateDate: string): Promise<void>;
106
- appendVersion(row: SourceGraphVersionRow): Promise<void>;
107
- /**
108
- * The next monotonic seq for a nectar: MAX(seq)+1 over every version row,
109
- * computed client-side by {@link reduceLatestVersion} rather than trusted
110
- * to an SQL `ORDER BY`/`LIMIT`/`MAX()` clause. See that function's docblock
111
- * for why.
112
- */
113
- nextSeq(nectar: string): Promise<number>;
114
- /** The latest (MAX seq) version row for a nectar; see {@link reduceLatestVersion}. */
115
- latestVersion(nectar: string): Promise<SourceGraphVersionRow | undefined>;
116
- /**
117
- * Every nectar's latest version, scoped by the full tenancy predicate
118
- * (`org_id`/`workspace_id`/`project_id`, PRD-005c). Fetches every identity
119
- * and every version row for the tenancy and reduces to "latest per nectar"
120
- * in application code, mirroring `InMemorySourceGraphStore.listLatestVersions`'s
121
- * iterate-and-reduce shape rather than pushing a `GROUP BY`/window-function
122
- * reduction into SQL. This is not optimized for a very large per-tenant
123
- * row count; a future PRD could push the "latest per nectar" reduction into
124
- * SQL if that becomes the bottleneck. It is not this adapter's job to
125
- * diverge from the reference in-memory implementation's behavior to chase
126
- * that optimization prematurely.
127
- */
128
- listLatestVersions(tenancy: Tenancy): Promise<LatestVersion[]>;
129
- latestVersionByPath(tenancy: Tenancy, path: string): Promise<LatestVersion | undefined>;
130
- latestVersionByHash(tenancy: Tenancy, contentHash: string): Promise<LatestVersion | undefined>;
131
- /**
132
- * Delete a nectar (identity + versions) scoped to `tenancy`, the SOLE deletion
133
- * path (`prune --confirm`, PRD-006d). Both DELETE statements carry the full
134
- * tenancy predicate (`org_id`/`workspace_id`/`project_id`, never omitting
135
- * `project_id`) alongside the nectar key, so a nectar minted under another
136
- * project is never removed by a delete issued in this tenancy (AC-20).
137
- *
138
- * The delete path deliberately does NOT go through `withHeal`: a missing table
139
- * means there is nothing to delete, so it is a harmless no-op, NOT a reason to
140
- * CREATE a fresh (empty) table. `deleteTolerant` therefore swallows only the
141
- * missing-table transport error (via `isMissingTableError`, the same
142
- * classification the read path uses) and lets every other failure propagate.
143
- */
144
- deleteNectar(tenancy: Tenancy, nectar: string): Promise<void>;
145
- /**
146
- * Run a DELETE, tolerating a missing-table failure as "nothing to delete" (a
147
- * no-op). Never creates a table. Any other transport failure propagates.
148
- */
149
- private deleteTolerant;
150
- }
151
- //# sourceMappingURL=deeplake-store.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"deeplake-store.d.ts","sourceRoot":"","sources":["../../src/source-graph/deeplake-store.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,OAAO,KAAK,EAAkB,cAAc,EAAE,qBAAqB,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAEjG,OAAO,KAAK,EAAE,qBAAqB,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEvE,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AAGrE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAOtD,oDAAoD;AACpD,MAAM,WAAW,+BAA+B;IAC9C,oEAAoE;IACpE,QAAQ,CAAC,WAAW,EAAE,mBAAmB,CAAC;IAC1C,gFAAgF;IAChF,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B;;;;;;;;;;OAUG;IACH,QAAQ,CAAC,SAAS,CAAC,EAAE,WAAW,CAAC;CAClC;AAyLD,qBAAa,wBAAyB,YAAW,qBAAqB;IACpE,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAc;gBAE5B,OAAO,EAAE,+BAA+B;IAYpD;;;;;;;OAOG;YACW,YAAY;IAS1B;;;;;;;;;;;;;;;;;;;OAmBG;IACG,cAAc,CAAC,GAAG,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC;IAmBlD,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,GAAG,SAAS,CAAC;IAMtE;;;;;;;;;;;;;;;;;OAiBG;IACG,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAOpE,aAAa,CAAC,GAAG,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAK9D;;;;;OAKG;IACG,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAY9C,sFAAsF;IAChF,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,GAAG,SAAS,CAAC;IAM/E;;;;;;;;;;;OAWG;IACG,kBAAkB,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;IA8B9D,mBAAmB,CAAC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,SAAS,CAAC;IAKvF,mBAAmB,CAAC,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,SAAS,CAAC;IAKpG;;;;;;;;;;;;OAYG;IACG,YAAY,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAWnE;;;OAGG;YACW,cAAc;CAQ7B"}
@@ -1,389 +0,0 @@
1
- import { DESCRIBE_STATUSES } from "./model.js";
2
- import { SOURCE_GRAPH_TABLE, SOURCE_GRAPH_VERSIONS_TABLE } from "./schema.js";
3
- import { HttpDeepLakeTransport, TransportError } from "./deeplake-transport.js";
4
- import { isMissingTableError, withHeal } from "./deeplake-heal.js";
5
- import { eLiteral, sLiteral, sqlFloat4Array, sqlIdent, sqlNum } from "./sql-guards.js";
6
- const SOURCE_GRAPH_TABLE_NAME = sqlIdent(SOURCE_GRAPH_TABLE.name);
7
- const SOURCE_GRAPH_VERSIONS_TABLE_NAME = sqlIdent(SOURCE_GRAPH_VERSIONS_TABLE.name);
8
- function toStr(value) {
9
- return typeof value === "string" ? value : String(value ?? "");
10
- }
11
- function toNum(value) {
12
- const n = typeof value === "number" ? value : Number(value);
13
- return Number.isFinite(n) ? n : 0;
14
- }
15
- function toDescribeStatus(value) {
16
- if (typeof value === "string" && DESCRIBE_STATUSES.includes(value)) {
17
- return value;
18
- }
19
- throw new Error(`invalid describe_status value from Deep Lake: ${JSON.stringify(value)}`);
20
- }
21
- function toEmbedding(value) {
22
- if (!Array.isArray(value))
23
- return null;
24
- return value.map((v) => toNum(v));
25
- }
26
- function toConfidence(value) {
27
- if (value === null || value === undefined)
28
- return null;
29
- const n = typeof value === "number" ? value : Number(value);
30
- return Number.isFinite(n) ? n : null;
31
- }
32
- /** Map a raw `source_graph` row into the domain `SourceGraphRow`. */
33
- function toIdentityRow(row) {
34
- return {
35
- nectar: toStr(row.nectar),
36
- kind: row.kind === "directory" ? "directory" : "file",
37
- createdAt: toStr(row.created_at),
38
- derivedFromNectar: toStr(row.derived_from_nectar),
39
- forkContentHash: toStr(row.fork_content_hash),
40
- orgId: toStr(row.org_id),
41
- workspaceId: toStr(row.workspace_id),
42
- projectId: toStr(row.project_id),
43
- lastUpdateDate: toStr(row.last_update_date),
44
- };
45
- }
46
- /** Map a raw `source_graph_versions` row into the domain `SourceGraphVersionRow`. */
47
- function toVersionRow(row) {
48
- return {
49
- nectar: toStr(row.nectar),
50
- contentHash: toStr(row.content_hash),
51
- seq: toNum(row.seq),
52
- path: toStr(row.path),
53
- filename: toStr(row.filename),
54
- ext: toStr(row.ext),
55
- sizeBytes: toNum(row.size_bytes),
56
- mtimeObserved: toStr(row.mtime_observed),
57
- title: toStr(row.title),
58
- description: toStr(row.description),
59
- concepts: toStr(row.concepts),
60
- embedding: toEmbedding(row.embedding),
61
- confidence: toConfidence(row.confidence),
62
- fingerprint: typeof row.fingerprint === "string" ? row.fingerprint : null,
63
- describedAt: toStr(row.described_at),
64
- describeModel: toStr(row.describe_model),
65
- describeStatus: toDescribeStatus(row.describe_status),
66
- observedAt: toStr(row.observed_at),
67
- orgId: toStr(row.org_id),
68
- workspaceId: toStr(row.workspace_id),
69
- projectId: toStr(row.project_id),
70
- lastUpdateDate: toStr(row.last_update_date),
71
- };
72
- }
73
- /**
74
- * Reduce a set of raw `source_graph_versions` rows for ONE nectar to the one
75
- * with the highest `seq` (client-side MAX(seq)), returning `undefined` for an
76
- * empty set. This is the real "MAX(seq) selection" logic `nextSeq`,
77
- * `latestVersion`, and `listLatestVersions` all share - it does not trust the
78
- * Deep Lake backend to have already applied an `ORDER BY seq DESC LIMIT 1`
79
- * correctly, since this same file documents (`touchIdentity`'s docblock,
80
- * citing honeycomb `src/daemon/storage/catalog/tenancy.ts:475-489`) that this
81
- * backend has known point-read/ordering quirks under load. Fetching every
82
- * version row for a nectar and reducing here is a small, testable function
83
- * instead of an opaque SQL clause, and a nectar's version history (one row
84
- * per observed edit of a single file) is not expected to be large enough for
85
- * the extra rows to matter.
86
- */
87
- function reduceLatestVersion(rows) {
88
- let latest;
89
- for (const raw of rows) {
90
- const version = toVersionRow(raw);
91
- if (latest === undefined || version.seq > latest.seq)
92
- latest = version;
93
- }
94
- return latest;
95
- }
96
- /** Build the `INSERT INTO "source_graph" (...) VALUES (...)` statement for one identity row. */
97
- function buildInsertIdentitySql(row) {
98
- const cols = [
99
- "nectar",
100
- "kind",
101
- "created_at",
102
- "derived_from_nectar",
103
- "fork_content_hash",
104
- "org_id",
105
- "workspace_id",
106
- "project_id",
107
- "last_update_date",
108
- ];
109
- const vals = [
110
- sLiteral(row.nectar),
111
- sLiteral(row.kind),
112
- sLiteral(row.createdAt),
113
- sLiteral(row.derivedFromNectar),
114
- sLiteral(row.forkContentHash),
115
- sLiteral(row.orgId),
116
- sLiteral(row.workspaceId),
117
- sLiteral(row.projectId),
118
- sLiteral(row.lastUpdateDate),
119
- ];
120
- return `INSERT INTO "${SOURCE_GRAPH_TABLE_NAME}" (${cols.join(", ")}) VALUES (${vals.join(", ")})`;
121
- }
122
- /** Build the `INSERT INTO "source_graph_versions" (...) VALUES (...)` statement for one version row. */
123
- function buildInsertVersionSql(row) {
124
- const cols = [
125
- "nectar",
126
- "content_hash",
127
- "seq",
128
- "path",
129
- "filename",
130
- "ext",
131
- "size_bytes",
132
- "mtime_observed",
133
- "title",
134
- "description",
135
- "concepts",
136
- "embedding",
137
- "confidence",
138
- "fingerprint",
139
- "described_at",
140
- "describe_model",
141
- "describe_status",
142
- "observed_at",
143
- "org_id",
144
- "workspace_id",
145
- "project_id",
146
- "last_update_date",
147
- ];
148
- const vals = [
149
- sLiteral(row.nectar),
150
- sLiteral(row.contentHash),
151
- sqlNum(row.seq),
152
- sLiteral(row.path),
153
- sLiteral(row.filename),
154
- sLiteral(row.ext),
155
- sqlNum(row.sizeBytes),
156
- sLiteral(row.mtimeObserved),
157
- // Free-text fields may carry backslashes/newlines; use the E'...' form.
158
- eLiteral(row.title),
159
- eLiteral(row.description),
160
- eLiteral(row.concepts),
161
- row.embedding !== null ? sqlFloat4Array(row.embedding) : "NULL",
162
- row.confidence !== null ? sqlNum(row.confidence) : "NULL",
163
- row.fingerprint !== null ? sLiteral(row.fingerprint) : "NULL",
164
- sLiteral(row.describedAt),
165
- sLiteral(row.describeModel),
166
- sLiteral(row.describeStatus),
167
- sLiteral(row.observedAt),
168
- sLiteral(row.orgId),
169
- sLiteral(row.workspaceId),
170
- sLiteral(row.projectId),
171
- sLiteral(row.lastUpdateDate),
172
- ];
173
- return `INSERT INTO "${SOURCE_GRAPH_VERSIONS_TABLE_NAME}" (${cols.join(", ")}) VALUES (${vals.join(", ")})`;
174
- }
175
- /** The `AND`-joined tenancy predicate (`org_id`/`workspace_id`/`project_id`), never omitting `project_id`. */
176
- function tenancyPredicate(tenancy) {
177
- return (`org_id = ${sLiteral(tenancy.orgId)} AND ` +
178
- `workspace_id = ${sLiteral(tenancy.workspaceId)} AND ` +
179
- `project_id = ${sLiteral(tenancy.projectId)}`);
180
- }
181
- export class DeepLakeSourceGraphStore {
182
- transport;
183
- constructor(options) {
184
- this.transport =
185
- options.transport ??
186
- new HttpDeepLakeTransport({
187
- endpoint: options.credentials.apiUrl,
188
- token: options.credentials.token,
189
- orgId: options.credentials.orgId,
190
- workspaceId: options.credentials.workspaceId,
191
- ...(options.timeoutMs !== undefined ? { timeoutMs: options.timeoutMs } : {}),
192
- });
193
- }
194
- /**
195
- * Run a READ statement, tolerating a missing-table failure as "no data yet"
196
- * (an empty result), exactly like honeycomb's read-path guard
197
- * (`tableExists`, `src/daemon/storage/heal.ts:171-175`) fails OPEN rather
198
- * than provoking a CREATE. Only the missing-table shape is tolerated; a
199
- * genuine connection/timeout/permission/syntax failure still propagates.
200
- * Reads never create a table - only a write does, via `withHeal`.
201
- */
202
- async readTolerant(sql) {
203
- try {
204
- return await this.transport.query(sql);
205
- }
206
- catch (err) {
207
- if (err instanceof TransportError && isMissingTableError(err))
208
- return [];
209
- throw err;
210
- }
211
- }
212
- /**
213
- * Insert-only mint, mirroring honeycomb's SELECT-before-INSERT write
214
- * pattern (`selectBeforeInsert`, `src/daemon/storage/writes.ts:325-360`)
215
- * rather than relying on a Deep Lake UNIQUE constraint: the Deep Lake SQL
216
- * surface this adapter targets has no reliably-enforced uniqueness
217
- * constraint to lean on, so "does the nectar already exist" is answered by
218
- * probing first. Both the probe and the insert are heal-aware (a fresh
219
- * tenancy's tables may not exist yet).
220
- *
221
- * After the insert, a best-effort re-verify SELECT counts the rows for the
222
- * nectar; more than one means a race doubled it. Deep Lake offers no
223
- * transactions, so this cannot PREVENT a race, only make it observable
224
- * (the same limitation honeycomb documents). In practice a real race here
225
- * is a near-zero-probability event - a nectar is a 26-char ULID with 80
226
- * bits of randomness (`ulid.ts`), not a narrow key space two writers could
227
- * plausibly collide on - so a failure of the re-verify SELECT itself
228
- * (e.g. a transient network hiccup immediately after a successful insert)
229
- * does not fail the whole call; the insert already succeeded and the
230
- * re-verify is an observability nicety, not the source of truth.
231
- */
232
- async insertIdentity(row) {
233
- const probeSql = `SELECT nectar FROM "${SOURCE_GRAPH_TABLE_NAME}" WHERE nectar = ${sLiteral(row.nectar)} LIMIT 1`;
234
- const probeRows = await withHeal(this.transport, SOURCE_GRAPH_TABLE, () => this.transport.query(probeSql));
235
- if (probeRows.length > 0) {
236
- throw new Error(`identity already exists for nectar ${row.nectar}`);
237
- }
238
- const insertSql = buildInsertIdentitySql(row);
239
- await withHeal(this.transport, SOURCE_GRAPH_TABLE, () => this.transport.query(insertSql));
240
- const verifySql = `SELECT nectar FROM "${SOURCE_GRAPH_TABLE_NAME}" WHERE nectar = ${sLiteral(row.nectar)}`;
241
- const verifyRows = await this.transport.query(verifySql).catch(() => []);
242
- if (verifyRows.length > 1) {
243
- throw new Error(`race detected inserting identity for nectar ${row.nectar}: ${verifyRows.length} rows present after insert`);
244
- }
245
- }
246
- async getIdentity(nectar) {
247
- const sql = `SELECT * FROM "${SOURCE_GRAPH_TABLE_NAME}" WHERE nectar = ${sLiteral(nectar)} LIMIT 1`;
248
- const rows = await this.readTolerant(sql);
249
- return rows.length > 0 ? toIdentityRow(rows[0]) : undefined;
250
- }
251
- /**
252
- * Bump `last_update_date` via an in-place `UPDATE`.
253
- *
254
- * KNOWN CAVEAT: honeycomb's own operational history found that an in-place
255
- * `UPDATE ... WHERE id = ...` does not reliably land on this backend under
256
- * load - a by-id point read can still return a pre-update snapshot from a
257
- * stale segment (see honeycomb `src/daemon/storage/catalog/tenancy.ts:475-489`,
258
- * where the equivalent revoke-by-UPDATE was RETIRED in favor of an
259
- * append-only version-bump for that reason). `source_graph`'s catalog entry
260
- * (`schema.ts`) nonetheless declares `writePattern: "update-or-insert"` for
261
- * this exact field, and `last_update_date` is a low-stakes denormalized
262
- * "last observed change" timestamp (not an authorization-bearing flag like
263
- * the retired revoke), so this adapter uses `UPDATE` as declared rather than
264
- * inventing an append-based scheme the schema does not define. If
265
- * `last_update_date` staleness proves to be a real problem in practice, a
266
- * future PRD should revisit this the same way honeycomb did - not this
267
- * adapter, which mirrors the declared write pattern faithfully.
268
- */
269
- async touchIdentity(nectar, lastUpdateDate) {
270
- const sql = `UPDATE "${SOURCE_GRAPH_TABLE_NAME}" SET last_update_date = ${sLiteral(lastUpdateDate)} ` +
271
- `WHERE nectar = ${sLiteral(nectar)}`;
272
- await withHeal(this.transport, SOURCE_GRAPH_TABLE, () => this.transport.query(sql));
273
- }
274
- async appendVersion(row) {
275
- const insertSql = buildInsertVersionSql(row);
276
- await withHeal(this.transport, SOURCE_GRAPH_VERSIONS_TABLE, () => this.transport.query(insertSql));
277
- }
278
- /**
279
- * The next monotonic seq for a nectar: MAX(seq)+1 over every version row,
280
- * computed client-side by {@link reduceLatestVersion} rather than trusted
281
- * to an SQL `ORDER BY`/`LIMIT`/`MAX()` clause. See that function's docblock
282
- * for why.
283
- */
284
- async nextSeq(nectar) {
285
- const sql = `SELECT seq FROM "${SOURCE_GRAPH_VERSIONS_TABLE_NAME}" WHERE nectar = ${sLiteral(nectar)}`;
286
- const rows = await this.readTolerant(sql);
287
- if (rows.length === 0)
288
- return 0;
289
- let maxSeq = toNum(rows[0]?.seq);
290
- for (const row of rows) {
291
- const seq = toNum(row.seq);
292
- if (seq > maxSeq)
293
- maxSeq = seq;
294
- }
295
- return maxSeq + 1;
296
- }
297
- /** The latest (MAX seq) version row for a nectar; see {@link reduceLatestVersion}. */
298
- async latestVersion(nectar) {
299
- const sql = `SELECT * FROM "${SOURCE_GRAPH_VERSIONS_TABLE_NAME}" WHERE nectar = ${sLiteral(nectar)}`;
300
- const rows = await this.readTolerant(sql);
301
- return reduceLatestVersion(rows);
302
- }
303
- /**
304
- * Every nectar's latest version, scoped by the full tenancy predicate
305
- * (`org_id`/`workspace_id`/`project_id`, PRD-005c). Fetches every identity
306
- * and every version row for the tenancy and reduces to "latest per nectar"
307
- * in application code, mirroring `InMemorySourceGraphStore.listLatestVersions`'s
308
- * iterate-and-reduce shape rather than pushing a `GROUP BY`/window-function
309
- * reduction into SQL. This is not optimized for a very large per-tenant
310
- * row count; a future PRD could push the "latest per nectar" reduction into
311
- * SQL if that becomes the bottleneck. It is not this adapter's job to
312
- * diverge from the reference in-memory implementation's behavior to chase
313
- * that optimization prematurely.
314
- */
315
- async listLatestVersions(tenancy) {
316
- const predicate = tenancyPredicate(tenancy);
317
- const identitiesSql = `SELECT * FROM "${SOURCE_GRAPH_TABLE_NAME}" WHERE ${predicate}`;
318
- const versionsSql = `SELECT * FROM "${SOURCE_GRAPH_VERSIONS_TABLE_NAME}" WHERE ${predicate}`;
319
- const [identityRows, versionRows] = await Promise.all([
320
- this.readTolerant(identitiesSql),
321
- this.readTolerant(versionsSql),
322
- ]);
323
- // Group raw version rows by nectar first, then reduce each group with the
324
- // SAME `reduceLatestVersion` helper `nextSeq`/`latestVersion` use, so
325
- // there is exactly one "pick the highest seq" implementation in this file.
326
- const rowsByNectar = new Map();
327
- for (const raw of versionRows) {
328
- const nectar = toStr(raw.nectar);
329
- const group = rowsByNectar.get(nectar);
330
- if (group === undefined)
331
- rowsByNectar.set(nectar, [raw]);
332
- else
333
- group.push(raw);
334
- }
335
- const out = [];
336
- for (const raw of identityRows) {
337
- const identity = toIdentityRow(raw);
338
- const group = rowsByNectar.get(identity.nectar);
339
- const version = group !== undefined ? reduceLatestVersion(group) : undefined;
340
- if (version !== undefined)
341
- out.push({ identity, version });
342
- }
343
- return out;
344
- }
345
- async latestVersionByPath(tenancy, path) {
346
- const all = await this.listLatestVersions(tenancy);
347
- return all.find((lv) => lv.version.path === path);
348
- }
349
- async latestVersionByHash(tenancy, contentHash) {
350
- const all = await this.listLatestVersions(tenancy);
351
- return all.find((lv) => lv.version.contentHash === contentHash);
352
- }
353
- /**
354
- * Delete a nectar (identity + versions) scoped to `tenancy`, the SOLE deletion
355
- * path (`prune --confirm`, PRD-006d). Both DELETE statements carry the full
356
- * tenancy predicate (`org_id`/`workspace_id`/`project_id`, never omitting
357
- * `project_id`) alongside the nectar key, so a nectar minted under another
358
- * project is never removed by a delete issued in this tenancy (AC-20).
359
- *
360
- * The delete path deliberately does NOT go through `withHeal`: a missing table
361
- * means there is nothing to delete, so it is a harmless no-op, NOT a reason to
362
- * CREATE a fresh (empty) table. `deleteTolerant` therefore swallows only the
363
- * missing-table transport error (via `isMissingTableError`, the same
364
- * classification the read path uses) and lets every other failure propagate.
365
- */
366
- async deleteNectar(tenancy, nectar) {
367
- const predicate = tenancyPredicate(tenancy);
368
- const nectarKey = sLiteral(nectar);
369
- const deleteVersionsSql = `DELETE FROM "${SOURCE_GRAPH_VERSIONS_TABLE_NAME}" WHERE nectar = ${nectarKey} AND ${predicate}`;
370
- const deleteIdentitySql = `DELETE FROM "${SOURCE_GRAPH_TABLE_NAME}" WHERE nectar = ${nectarKey} AND ${predicate}`;
371
- await this.deleteTolerant(deleteVersionsSql);
372
- await this.deleteTolerant(deleteIdentitySql);
373
- }
374
- /**
375
- * Run a DELETE, tolerating a missing-table failure as "nothing to delete" (a
376
- * no-op). Never creates a table. Any other transport failure propagates.
377
- */
378
- async deleteTolerant(sql) {
379
- try {
380
- await this.transport.query(sql);
381
- }
382
- catch (err) {
383
- if (err instanceof TransportError && isMissingTableError(err))
384
- return;
385
- throw err;
386
- }
387
- }
388
- }
389
- //# sourceMappingURL=deeplake-store.js.map