@anmol-srv/sigil 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +417 -0
  3. package/dist/cli.js +1019 -0
  4. package/dist/hooks/post-tool-use.js +70 -0
  5. package/dist/hooks/session-end.js +222 -0
  6. package/dist/hooks/stop.js +259 -0
  7. package/dist/hooks/user-prompt-submit.js +279 -0
  8. package/dist/server.js +573 -0
  9. package/integrations/hermes/README.md +41 -0
  10. package/integrations/hermes/plugin/README.md +72 -0
  11. package/integrations/hermes/plugin/__init__.py +353 -0
  12. package/integrations/hermes/plugin/plugin.yaml +10 -0
  13. package/knexfile.js +15 -0
  14. package/package.json +100 -0
  15. package/prompts/audm-decision.md +31 -0
  16. package/prompts/chunk-context.md +23 -0
  17. package/prompts/default-extraction.md +35 -0
  18. package/prompts/entity-extraction.md +37 -0
  19. package/prompts/input-classifier.md +23 -0
  20. package/prompts/query-router.md +18 -0
  21. package/src/db/migrations/20260310120000_create-cortex-document-table.cjs +21 -0
  22. package/src/db/migrations/20260310120001_create-cortex-chunk-table.cjs +37 -0
  23. package/src/db/migrations/20260310120002_create-cortex-fact-table.cjs +37 -0
  24. package/src/db/migrations/20260310120003_create-cortex-entity-table.cjs +26 -0
  25. package/src/db/migrations/20260310120004_create-cortex-relation-table.cjs +27 -0
  26. package/src/db/migrations/20260310120005_create-cortex-history-table.cjs +16 -0
  27. package/src/db/migrations/20260311120000_add-entity-namespace-and-relation-indexes.cjs +32 -0
  28. package/src/db/migrations/20260312120000_add-fact-entity-linking.cjs +22 -0
  29. package/src/db/migrations/20260313093130_create-api-key-table.cjs +15 -0
  30. package/src/db/migrations/20260313120000_add-entity-dedup-support.cjs +13 -0
  31. package/src/db/migrations/20260313150000_create-connector-tables.cjs +46 -0
  32. package/src/db/migrations/20260318120000_add-contextual-chunk-prefix.cjs +11 -0
  33. package/src/db/migrations/20260318120001_add-fact-temporal-validity.cjs +15 -0
  34. package/src/db/migrations/20260318120002_add-fact-importance.cjs +11 -0
  35. package/src/db/migrations/20260318120003_add-fact-access-tracking.cjs +13 -0
  36. package/src/db/migrations/20260405120000_add-unique-constraints.cjs +58 -0
  37. package/src/db/migrations/20260405140000_create-llm-log-table.cjs +21 -0
  38. package/src/db/migrations/20260424120000_split-fact-lifecycle.cjs +86 -0
  39. package/src/db/migrations/20260424120002_create-embedding-cache.cjs +26 -0
  40. package/src/db/migrations/20260429120000_halfvec-index-compression.cjs +34 -0
  41. package/src/db/migrations/20260429120100_create-hebbian-edge-table.cjs +37 -0
  42. package/src/db/migrations/20260429120200_upgrade-embedding-dim-1024.cjs +68 -0
  43. package/src/db/migrations/20260504120000_scope-document-source-path-uniqueness.cjs +45 -0
  44. package/src/db/migrations/20260508001733_add-entity-aliases.cjs +42 -0
  45. package/src/db/migrations/20260512120000_create-entity-hebbian-edge.cjs +42 -0
  46. package/src/db/migrations/20260512120000_create-pod-tables.cjs +71 -0
  47. package/src/db/migrations/20260512120100_create-pod-membership.cjs +50 -0
  48. package/src/db/migrations/20260512120200_add-document-source-metadata.cjs +32 -0
  49. package/src/db/migrations/20260514023428_rewrite-session-pods-and-add-fact-attribution-columns.cjs +86 -0
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Persistent embedding cache — avoids re-embedding identical text.
3
+ *
4
+ * Keyed on sha256(provider + model + text). Value is the cached vector.
5
+ * LRU eviction when count exceeds a soft limit (applied at write time).
6
+ */
7
+
8
+ exports.up = async function (knex) {
9
+ await knex.schema.createTable('embedding_cache', (table) => {
10
+ table.string('key').primary(); // sha256(provider|model|text)
11
+ table.string('provider').notNullable();
12
+ table.string('model').notNullable();
13
+ table.integer('hits').notNullable().defaultTo(0);
14
+ table.timestamp('created_at').notNullable().defaultTo(knex.fn.now());
15
+ table.timestamp('last_used_at').notNullable().defaultTo(knex.fn.now());
16
+
17
+ table.index('last_used_at');
18
+ });
19
+
20
+ // Embedding column with the same dims as everywhere else (768)
21
+ await knex.raw('ALTER TABLE embedding_cache ADD COLUMN embedding vector(768)');
22
+ };
23
+
24
+ exports.down = async function (knex) {
25
+ await knex.schema.dropTable('embedding_cache');
26
+ };
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Halfvec compression for HNSW indexes (Ogham §"Halfvec compression").
3
+ *
4
+ * The embedding columns stay as vector(768) (float32), but the HNSW index
5
+ * casts to halfvec(768) (float16). ~50% index size reduction with negligible
6
+ * quality loss — the cosine distance computation has more than enough
7
+ * precision at fp16 for retrieval ranking.
8
+ *
9
+ * Why not change the column type? Because storing as float32 keeps room for
10
+ * higher-precision operations (exact distance, future re-indexing strategies)
11
+ * while the HNSW index only needs distance ordering, where fp16 is fine.
12
+ */
13
+
14
+ const TABLES = ['chunk', 'fact', 'entity'];
15
+
16
+ exports.up = async function (knex) {
17
+ for (const table of TABLES) {
18
+ // Drop the old plain-vector HNSW index
19
+ await knex.raw(`DROP INDEX IF EXISTS ${table}_embedding_idx`);
20
+ // Recreate with halfvec cast
21
+ await knex.raw(
22
+ `CREATE INDEX ${table}_embedding_idx ON ${table} USING hnsw ((embedding::halfvec(768)) halfvec_cosine_ops) WITH (m = 16, ef_construction = 64)`,
23
+ );
24
+ }
25
+ };
26
+
27
+ exports.down = async function (knex) {
28
+ for (const table of TABLES) {
29
+ await knex.raw(`DROP INDEX IF EXISTS ${table}_embedding_idx`);
30
+ await knex.raw(
31
+ `CREATE INDEX ${table}_embedding_idx ON ${table} USING hnsw (embedding vector_cosine_ops)`,
32
+ );
33
+ }
34
+ };
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Hebbian co-retrieval edges between facts.
3
+ *
4
+ * When two facts are retrieved together in the same search top-K, the edge
5
+ * between them strengthens. Over time, the graph builds itself from search
6
+ * behavior — no LLM calls, no manual annotation.
7
+ *
8
+ * Lexicographic canonicalization (fact_a_id < fact_b_id) prevents the
9
+ * (a,b)/(b,a) duplicate problem that bites symmetric relations. Lesson
10
+ * borrowed from OGHAM-LEARNINGS.md.
11
+ */
12
+
13
+ exports.up = async function (knex) {
14
+ await knex.schema.createTable('hebbian_edge', (table) => {
15
+ table.bigInteger('fact_a_id').notNullable().references('id').inTable('fact').onDelete('CASCADE');
16
+ table.bigInteger('fact_b_id').notNullable().references('id').inTable('fact').onDelete('CASCADE');
17
+ table.integer('strength').notNullable().defaultTo(1);
18
+ table.timestamp('first_seen_at').notNullable().defaultTo(knex.fn.now());
19
+ table.timestamp('last_seen_at').notNullable().defaultTo(knex.fn.now());
20
+ table.primary(['fact_a_id', 'fact_b_id']);
21
+ });
22
+
23
+ // Enforce canonical ordering at the row level — fact_a_id MUST be less than fact_b_id.
24
+ await knex.raw(`
25
+ ALTER TABLE hebbian_edge
26
+ ADD CONSTRAINT hebbian_edge_canonical_order
27
+ CHECK (fact_a_id < fact_b_id)
28
+ `);
29
+
30
+ // For walking outward from a single fact: index both columns.
31
+ await knex.raw(`CREATE INDEX hebbian_edge_a_idx ON hebbian_edge (fact_a_id, strength DESC)`);
32
+ await knex.raw(`CREATE INDEX hebbian_edge_b_idx ON hebbian_edge (fact_b_id, strength DESC)`);
33
+ };
34
+
35
+ exports.down = async function (knex) {
36
+ await knex.schema.dropTable('hebbian_edge');
37
+ };
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Upgrade embedding columns from vector(768) → vector(N) where N >= 1024.
3
+ *
4
+ * CONDITIONAL: only runs when EMBEDDING_DIMENSIONS env >= 1024. The default
5
+ * (unset or 768) is the Ollama nomic-embed-text dimension; bumping the schema
6
+ * to 1024 there would mismatch the embedder and break ingest.
7
+ *
8
+ * Activates when an operator opts into a 1024d-class model (Voyage 3-large,
9
+ * OpenAI text-embedding-3-large truncated to 1024d, bge-large-en-v1.5).
10
+ * They set EMBEDDING_DIMENSIONS=1024 (or higher) and re-run sigil migrate.
11
+ *
12
+ * REFUSES TO RUN if any embedding row exists — changing the column type
13
+ * would invalidate stored embeddings. Operators upgrading an existing DB:
14
+ * 1. sigil export to back up
15
+ * 2. sigil reset --confirm
16
+ * 3. set EMBEDDING_DIMENSIONS=1024 in ~/.sigil/.env
17
+ * 4. sigil migrate
18
+ * 5. re-ingest with the new embedding model
19
+ */
20
+
21
+ const TABLES = ['chunk', 'fact', 'entity', 'embedding_cache'];
22
+ const DEFAULT_DIM = 768;
23
+
24
+ exports.up = async function (knex) {
25
+ const targetDim = Number(process.env.EMBEDDING_DIMENSIONS) || DEFAULT_DIM;
26
+
27
+ if (targetDim <= DEFAULT_DIM) {
28
+ // No-op for the default 768d (local nomic). Migration is recorded as
29
+ // applied so it doesn't keep trying on every sigil migrate.
30
+ return;
31
+ }
32
+
33
+ // Safety check — bail loudly if existing embeddings would be invalidated.
34
+ for (const table of TABLES) {
35
+ const { rows } = await knex.raw(`SELECT COUNT(*)::int AS c FROM ${table} WHERE embedding IS NOT NULL`);
36
+ const count = rows[0].c;
37
+ if (count > 0) {
38
+ throw new Error(
39
+ `Cannot upgrade embedding dim to ${targetDim}: ${table} has ${count} rows with existing embeddings. ` +
40
+ `Run 'sigil export' to back up, then 'sigil reset --confirm' to wipe, then re-migrate ` +
41
+ `and re-ingest with the new embedding model.`,
42
+ );
43
+ }
44
+ }
45
+
46
+ for (const table of TABLES) {
47
+ await knex.raw(`ALTER TABLE ${table} ALTER COLUMN embedding TYPE vector(${targetDim}) USING embedding::vector(${targetDim})`);
48
+ // embedding_cache doesn't have an HNSW index — it's a key-value store keyed on sha256.
49
+ if (table === 'embedding_cache') continue;
50
+ await knex.raw(`DROP INDEX IF EXISTS ${table}_embedding_idx`);
51
+ await knex.raw(
52
+ `CREATE INDEX ${table}_embedding_idx ON ${table} USING hnsw ((embedding::halfvec(${targetDim})) halfvec_cosine_ops) WITH (m = 16, ef_construction = 64)`,
53
+ );
54
+ }
55
+ };
56
+
57
+ exports.down = async function (knex) {
58
+ // The down migration always reverts to 768d — it's the lowest common
59
+ // denominator and matches the prior halfvec migration's index.
60
+ for (const table of TABLES) {
61
+ await knex.raw(`ALTER TABLE ${table} ALTER COLUMN embedding TYPE vector(${DEFAULT_DIM}) USING NULL`);
62
+ if (table === 'embedding_cache') continue;
63
+ await knex.raw(`DROP INDEX IF EXISTS ${table}_embedding_idx`);
64
+ await knex.raw(
65
+ `CREATE INDEX ${table}_embedding_idx ON ${table} USING hnsw ((embedding::halfvec(${DEFAULT_DIM})) halfvec_cosine_ops) WITH (m = 16, ef_construction = 64)`,
66
+ );
67
+ }
68
+ };
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Scope document.source_path uniqueness to (source_path, namespace).
3
+ *
4
+ * Prior schema enforced UNIQUE(source_path) globally — meaning the same file
5
+ * path could only exist in ONE namespace at a time. This bit eval harnesses
6
+ * (per-question namespaces re-using the same source path), and would bite
7
+ * legitimate users wanting the same doc in personal + work namespaces.
8
+ *
9
+ * The composite UNIQUE(source_path, namespace) keeps "no dupes within a
10
+ * namespace" guarantee but allows the same path in different namespaces.
11
+ */
12
+
13
+ exports.up = async function (knex) {
14
+ // Count any cross-namespace would-be-duplicates the old constraint masked.
15
+ const dupes = await knex.raw(`
16
+ SELECT source_path, COUNT(DISTINCT namespace) AS namespaces
17
+ FROM document
18
+ GROUP BY source_path
19
+ HAVING COUNT(DISTINCT namespace) > 1
20
+ `);
21
+ if (dupes.rows && dupes.rows.length) {
22
+ console.warn(`[migration] ${dupes.rows.length} source_paths now allowed in multiple namespaces.`);
23
+ }
24
+
25
+ await knex.schema.alterTable('document', (table) => {
26
+ table.dropUnique('source_path');
27
+ });
28
+
29
+ await knex.schema.alterTable('document', (table) => {
30
+ table.unique(['source_path', 'namespace']);
31
+ });
32
+ };
33
+
34
+ exports.down = async function (knex) {
35
+ await knex.schema.alterTable('document', (table) => {
36
+ table.dropUnique(['source_path', 'namespace']);
37
+ });
38
+
39
+ // Recreate the old global constraint. If multiple rows share a source_path
40
+ // across namespaces, this DOWN will fail loudly — that's correct, the
41
+ // operator must consolidate first.
42
+ await knex.schema.alterTable('document', (table) => {
43
+ table.unique('source_path');
44
+ });
45
+ };
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Add an `aliases` text-array column to the entity table.
3
+ *
4
+ * Why: AUDM and the existing 3-stage entity resolver both fail on entity
5
+ * renames ("Smara is now named Sigil") because the rename's vector
6
+ * similarity to existing facts about Smara is too low to trigger any
7
+ * dedup. The structural fix is to track entity identity at the entity
8
+ * layer (stable UUIDs surviving renames) and let facts reference those
9
+ * UUIDs via fact_entity. When a rename is detected, the canonical
10
+ * `name` rolls forward and the old name lands in `aliases[]` so that:
11
+ *
12
+ * 1. Future ingests mentioning the old name still resolve to the
13
+ * same entity row (alias-aware lookup in findByName).
14
+ * 2. Search-time graph traversal pulls historical facts via the
15
+ * stable entity_id even though their text still mentions the
16
+ * old name.
17
+ *
18
+ * Defaults to '{}' so all existing rows have a sensible empty value.
19
+ * Indexed via a GIN expression on the lowercased array so case-
20
+ * insensitive lookup is fast.
21
+ */
22
+
23
+ exports.up = async function (knex) {
24
+ await knex.raw(`
25
+ ALTER TABLE entity
26
+ ADD COLUMN aliases TEXT[] NOT NULL DEFAULT '{}'::text[]
27
+ `);
28
+
29
+ // Aliases are stored already lowercased by the resolver (push only happens
30
+ // via pushAlias() which lowercases at the boundary), so a plain GIN index
31
+ // on the array is sufficient. PGlite rejects subqueries in expression
32
+ // indexes, so we can't transform at index time — pre-lowercasing is the
33
+ // simpler contract.
34
+ await knex.raw(`
35
+ CREATE INDEX entity_aliases_idx ON entity USING GIN (aliases)
36
+ `);
37
+ };
38
+
39
+ exports.down = async function (knex) {
40
+ await knex.raw(`DROP INDEX IF EXISTS entity_aliases_idx`);
41
+ await knex.raw(`ALTER TABLE entity DROP COLUMN IF EXISTS aliases`);
42
+ };
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Hebbian co-retrieval edges between entities.
3
+ *
4
+ * Sibling of hebbian_edge but for entities, not facts. When a search returns
5
+ * a top-K result set, every entity linked to those facts is considered "co-
6
+ * activated." Pairwise edges between those entities strengthen.
7
+ *
8
+ * Why entities (in addition to fact-level edges):
9
+ * - Fact-level edges are brittle when the same idea is stored as two
10
+ * different facts. Entity edges survive paraphrase + AUDM splits.
11
+ * - The entity graph is already the substrate for graph_boost / related-
12
+ * entity expansion. A learned weight on top sharpens that traversal.
13
+ *
14
+ * Strength is NUMERIC (not integer) because the update rule caps via
15
+ * LEAST(strength + eta, cap) and read-time decay multiplies by a fractional
16
+ * exponential factor. Lex canonicalization (entity_a_id < entity_b_id)
17
+ * prevents (a,b)/(b,a) dupes.
18
+ */
19
+
20
+ exports.up = async function (knex) {
21
+ await knex.schema.createTable('entity_hebbian_edge', (table) => {
22
+ table.bigInteger('entity_a_id').notNullable().references('id').inTable('entity').onDelete('CASCADE');
23
+ table.bigInteger('entity_b_id').notNullable().references('id').inTable('entity').onDelete('CASCADE');
24
+ table.decimal('strength', 12, 4).notNullable().defaultTo(1);
25
+ table.timestamp('first_seen_at').notNullable().defaultTo(knex.fn.now());
26
+ table.timestamp('last_seen_at').notNullable().defaultTo(knex.fn.now());
27
+ table.primary(['entity_a_id', 'entity_b_id']);
28
+ });
29
+
30
+ await knex.raw(`
31
+ ALTER TABLE entity_hebbian_edge
32
+ ADD CONSTRAINT entity_hebbian_edge_canonical_order
33
+ CHECK (entity_a_id < entity_b_id)
34
+ `);
35
+
36
+ await knex.raw(`CREATE INDEX entity_hebbian_edge_a_idx ON entity_hebbian_edge (entity_a_id, strength DESC)`);
37
+ await knex.raw(`CREATE INDEX entity_hebbian_edge_b_idx ON entity_hebbian_edge (entity_b_id, strength DESC)`);
38
+ };
39
+
40
+ exports.down = async function (knex) {
41
+ await knex.schema.dropTable('entity_hebbian_edge');
42
+ };
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Create the `pod` table — typed memory containers that segregate facts,
3
+ * documents, and entities by source or subject. Pods sit on top of the
4
+ * existing fact/entity/document model; they do not replace AUDM, entity
5
+ * dedup, or the namespace partition.
6
+ *
7
+ * Pod types ship in this branch:
8
+ * - 'session' → one per Claude Code session (external_id = session_id)
9
+ * - 'person' → one per person you have a relationship with
10
+ * (entity_id FK to the canonical entity row)
11
+ *
12
+ * Future types reserved (no auto-creation yet):
13
+ * - 'project', 'connector_workspace', 'custom'
14
+ *
15
+ * Membership lives in a separate `pod_membership` junction (next migration)
16
+ * so the `fact` row stays read-mostly and the HNSW index does not bloat —
17
+ * same discipline as the 20260424 fact_lifecycle split.
18
+ */
19
+
20
+ exports.up = async function (knex) {
21
+ await knex.schema.createTable('pod', (table) => {
22
+ table.increments('id').primary();
23
+ table.text('uid').notNullable().unique();
24
+ table.text('pod_type').notNullable();
25
+ table.text('name').notNullable();
26
+ table.text('namespace').notNullable();
27
+ table.jsonb('attrs').notNullable().defaultTo('{}');
28
+ table.text('status').notNullable().defaultTo('active'); // active | archived
29
+
30
+ // Person/project pods link to their canonical entity. Nullable for
31
+ // session/workspace pods.
32
+ table.integer('entity_id').references('id').inTable('entity');
33
+
34
+ // Connector-workspace pods link to their connection. Nullable for
35
+ // session/person pods.
36
+ table.integer('connection_id').references('id').inTable('connection');
37
+
38
+ // Stable external identifier for upsert idempotency. For session pods
39
+ // this is the Claude Code session_id; for workspace pods this is the
40
+ // platform's team/org id.
41
+ table.text('external_id');
42
+
43
+ table.timestamp('started_at');
44
+ table.timestamp('ended_at');
45
+
46
+ // Denormalised member counters, refreshed by `sigil maintain` (or
47
+ // incrementally by membership writes). Cheap to keep, expensive to
48
+ // recompute on demand.
49
+ table.integer('member_doc_count').notNullable().defaultTo(0);
50
+ table.integer('member_fact_count').notNullable().defaultTo(0);
51
+
52
+ table.timestamps(false, true);
53
+
54
+ table.index('pod_type');
55
+ table.index('namespace');
56
+ table.index(['namespace', 'pod_type', 'status']);
57
+ });
58
+
59
+ // Upsert key: (pod_type, external_id, namespace) where external_id is set.
60
+ // Partial unique because external_id is nullable (custom pods may have none).
61
+ await knex.raw(`
62
+ CREATE UNIQUE INDEX pod_external_id_unique
63
+ ON pod (pod_type, external_id, namespace)
64
+ WHERE external_id IS NOT NULL
65
+ `);
66
+ };
67
+
68
+ exports.down = async function (knex) {
69
+ await knex.raw('DROP INDEX IF EXISTS pod_external_id_unique');
70
+ await knex.schema.dropTableIfExists('pod');
71
+ };
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Polymorphic many-to-many junction linking pods to facts, documents, and
3
+ * entities. A fact can legitimately belong to multiple pods (e.g., a fact
4
+ * about Dhaval extracted in a Claude Code session belongs to both the
5
+ * person pod for Dhaval and the session pod for that conversation), so a
6
+ * single `pod_id` column on the fact row would force a lossy "primary
7
+ * pod" choice.
8
+ *
9
+ * Keeping membership in a junction also preserves the fact row's
10
+ * read-mostly invariant (no HNSW index churn on pod attach/detach) — same
11
+ * discipline as fact_lifecycle (20260424120000).
12
+ *
13
+ * member_id uses bigInteger because fact.id is bigint; document.id and
14
+ * entity.id are int4 but fit fine in the wider column.
15
+ */
16
+
17
+ exports.up = async function (knex) {
18
+ await knex.schema.createTable('pod_membership', (table) => {
19
+ table.increments('id').primary();
20
+ table
21
+ .integer('pod_id')
22
+ .notNullable()
23
+ .references('id')
24
+ .inTable('pod')
25
+ .onDelete('CASCADE');
26
+
27
+ // 'fact' | 'document' | 'entity'. FK not enforced because Postgres
28
+ // does not support polymorphic FKs; integrity is the caller's
29
+ // responsibility (membership.js).
30
+ table.text('member_type').notNullable();
31
+ table.bigInteger('member_id').notNullable();
32
+
33
+ // 'primary' (this pod owns the member) | 'contextual' (member is
34
+ // referenced from this pod's perspective) | 'mention' (member just
35
+ // mentions an entity associated with this pod). Free string for now;
36
+ // promote to enum once the values settle.
37
+ table.text('role');
38
+
39
+ table.timestamp('created_at').notNullable().defaultTo(knex.fn.now());
40
+
41
+ table.unique(['pod_id', 'member_type', 'member_id']);
42
+
43
+ // Reverse lookup: "what pods is this fact/document/entity in?"
44
+ table.index(['member_type', 'member_id']);
45
+ });
46
+ };
47
+
48
+ exports.down = async function (knex) {
49
+ await knex.schema.dropTableIfExists('pod_membership');
50
+ };
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Add `source_metadata` jsonb and `connection_id` FK to `document`.
3
+ *
4
+ * Why: the ingestion pipeline accepts a `metadata` arg (from sources/file.js,
5
+ * sources/url.js, future connectors) but currently drops it on the floor —
6
+ * it reaches `parse()` for format hints and `linkDocumentEntities()` for
7
+ * minor signals, but never lands on the document row. That made source-
8
+ * instance reasoning ("this came from Slack message ts=X in team=Y")
9
+ * impossible.
10
+ *
11
+ * Pods need this to attach connector-sourced documents to the right
12
+ * workspace pod and to derive person pods from senders.
13
+ *
14
+ * Defaults to '{}' so all existing rows have a sensible empty value.
15
+ */
16
+
17
+ exports.up = async function (knex) {
18
+ await knex.schema.alterTable('document', (table) => {
19
+ table.integer('connection_id').references('id').inTable('connection');
20
+ table.jsonb('source_metadata').notNullable().defaultTo('{}');
21
+ });
22
+
23
+ await knex.raw('CREATE INDEX document_connection_id_idx ON document (connection_id)');
24
+ };
25
+
26
+ exports.down = async function (knex) {
27
+ await knex.raw('DROP INDEX IF EXISTS document_connection_id_idx');
28
+ await knex.schema.alterTable('document', (table) => {
29
+ table.dropColumn('source_metadata');
30
+ table.dropColumn('connection_id');
31
+ });
32
+ };
@@ -0,0 +1,86 @@
1
+ /**
2
+ * 0.10.0 — Pod distinction layer foundation.
3
+ *
4
+ * Three changes:
5
+ * 1. Rewrite pod_type='session' rows to 'claude_session'. The pod kind
6
+ * registry treats the original Claude Code session pod as one of many
7
+ * kinds (alongside project, person, playbook, vital); the name needs
8
+ * to reflect that. No CHECK constraint exists on pod_type — the
9
+ * column is plain text — so this is just an UPDATE.
10
+ *
11
+ * 2. Add fact.importance_score INTEGER. Existing fact.importance is a
12
+ * text enum (vital | high | medium | supplementary | trivial); the
13
+ * hot-context decay function in 0.10.0 needs a numeric scale.
14
+ * Backfill: vital=5, high=4, medium=3, supplementary=2, trivial=1.
15
+ * The text column stays as the authoritative input from the LLM
16
+ * extractor; the numeric is the derived score retrieval uses.
17
+ *
18
+ * 3. Add fact.superseded_at TIMESTAMP and fact.superseded_by_fact_uid
19
+ * TEXT for the append-only / bi-temporal pattern (Graphiti). Existing
20
+ * valid_from / valid_until already cover event-time validity; these
21
+ * add transaction-time supersession (the arbiter agent that lands in
22
+ * 0.11.0 will populate them).
23
+ */
24
+
25
+ exports.up = async function (knex) {
26
+ // 1. Rewrite session → claude_session
27
+ await knex.raw("UPDATE pod SET pod_type = 'claude_session' WHERE pod_type = 'session'");
28
+
29
+ // 2. Add fact.importance_score with backfill
30
+ const hasImportanceScore = await knex.schema.hasColumn('fact', 'importance_score');
31
+ if (!hasImportanceScore) {
32
+ await knex.schema.alterTable('fact', (table) => {
33
+ table.integer('importance_score');
34
+ });
35
+ await knex.raw(`
36
+ UPDATE fact SET importance_score = CASE importance
37
+ WHEN 'vital' THEN 5
38
+ WHEN 'high' THEN 4
39
+ WHEN 'medium' THEN 3
40
+ WHEN 'supplementary' THEN 2
41
+ WHEN 'trivial' THEN 1
42
+ ELSE 2
43
+ END
44
+ `);
45
+ await knex.schema.alterTable('fact', (table) => {
46
+ table.integer('importance_score').defaultTo(2).notNullable().alter();
47
+ });
48
+ await knex.schema.alterTable('fact', (table) => {
49
+ table.index('importance_score');
50
+ });
51
+ }
52
+
53
+ // 3. Add supersession columns
54
+ const hasSupersededAt = await knex.schema.hasColumn('fact', 'superseded_at');
55
+ if (!hasSupersededAt) {
56
+ await knex.schema.alterTable('fact', (table) => {
57
+ table.timestamp('superseded_at');
58
+ table.text('superseded_by_fact_uid');
59
+ });
60
+ await knex.schema.alterTable('fact', (table) => {
61
+ table.index('superseded_at');
62
+ });
63
+ }
64
+ };
65
+
66
+ exports.down = async function (knex) {
67
+ // 3. Drop supersession columns
68
+ if (await knex.schema.hasColumn('fact', 'superseded_at')) {
69
+ await knex.schema.alterTable('fact', (table) => {
70
+ table.dropIndex('superseded_at');
71
+ table.dropColumn('superseded_at');
72
+ table.dropColumn('superseded_by_fact_uid');
73
+ });
74
+ }
75
+
76
+ // 2. Drop importance_score
77
+ if (await knex.schema.hasColumn('fact', 'importance_score')) {
78
+ await knex.schema.alterTable('fact', (table) => {
79
+ table.dropIndex('importance_score');
80
+ table.dropColumn('importance_score');
81
+ });
82
+ }
83
+
84
+ // 1. Rewrite claude_session → session
85
+ await knex.raw("UPDATE pod SET pod_type = 'session' WHERE pod_type = 'claude_session'");
86
+ };