@anmol-srv/sigil 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +417 -0
  3. package/dist/cli.js +1019 -0
  4. package/dist/hooks/post-tool-use.js +70 -0
  5. package/dist/hooks/session-end.js +222 -0
  6. package/dist/hooks/stop.js +259 -0
  7. package/dist/hooks/user-prompt-submit.js +279 -0
  8. package/dist/server.js +573 -0
  9. package/integrations/hermes/README.md +41 -0
  10. package/integrations/hermes/plugin/README.md +72 -0
  11. package/integrations/hermes/plugin/__init__.py +353 -0
  12. package/integrations/hermes/plugin/plugin.yaml +10 -0
  13. package/knexfile.js +15 -0
  14. package/package.json +100 -0
  15. package/prompts/audm-decision.md +31 -0
  16. package/prompts/chunk-context.md +23 -0
  17. package/prompts/default-extraction.md +35 -0
  18. package/prompts/entity-extraction.md +37 -0
  19. package/prompts/input-classifier.md +23 -0
  20. package/prompts/query-router.md +18 -0
  21. package/src/db/migrations/20260310120000_create-cortex-document-table.cjs +21 -0
  22. package/src/db/migrations/20260310120001_create-cortex-chunk-table.cjs +37 -0
  23. package/src/db/migrations/20260310120002_create-cortex-fact-table.cjs +37 -0
  24. package/src/db/migrations/20260310120003_create-cortex-entity-table.cjs +26 -0
  25. package/src/db/migrations/20260310120004_create-cortex-relation-table.cjs +27 -0
  26. package/src/db/migrations/20260310120005_create-cortex-history-table.cjs +16 -0
  27. package/src/db/migrations/20260311120000_add-entity-namespace-and-relation-indexes.cjs +32 -0
  28. package/src/db/migrations/20260312120000_add-fact-entity-linking.cjs +22 -0
  29. package/src/db/migrations/20260313093130_create-api-key-table.cjs +15 -0
  30. package/src/db/migrations/20260313120000_add-entity-dedup-support.cjs +13 -0
  31. package/src/db/migrations/20260313150000_create-connector-tables.cjs +46 -0
  32. package/src/db/migrations/20260318120000_add-contextual-chunk-prefix.cjs +11 -0
  33. package/src/db/migrations/20260318120001_add-fact-temporal-validity.cjs +15 -0
  34. package/src/db/migrations/20260318120002_add-fact-importance.cjs +11 -0
  35. package/src/db/migrations/20260318120003_add-fact-access-tracking.cjs +13 -0
  36. package/src/db/migrations/20260405120000_add-unique-constraints.cjs +58 -0
  37. package/src/db/migrations/20260405140000_create-llm-log-table.cjs +21 -0
  38. package/src/db/migrations/20260424120000_split-fact-lifecycle.cjs +86 -0
  39. package/src/db/migrations/20260424120002_create-embedding-cache.cjs +26 -0
  40. package/src/db/migrations/20260429120000_halfvec-index-compression.cjs +34 -0
  41. package/src/db/migrations/20260429120100_create-hebbian-edge-table.cjs +37 -0
  42. package/src/db/migrations/20260429120200_upgrade-embedding-dim-1024.cjs +68 -0
  43. package/src/db/migrations/20260504120000_scope-document-source-path-uniqueness.cjs +45 -0
  44. package/src/db/migrations/20260508001733_add-entity-aliases.cjs +42 -0
  45. package/src/db/migrations/20260512120000_create-entity-hebbian-edge.cjs +42 -0
  46. package/src/db/migrations/20260512120000_create-pod-tables.cjs +71 -0
  47. package/src/db/migrations/20260512120100_create-pod-membership.cjs +50 -0
  48. package/src/db/migrations/20260512120200_add-document-source-metadata.cjs +32 -0
  49. package/src/db/migrations/20260514023428_rewrite-session-pods-and-add-fact-attribution-columns.cjs +86 -0
@@ -0,0 +1,35 @@
1
+ You are extracting structured facts from a document in a personal knowledge base. Extract every discrete, atomic fact that would be useful for someone querying this knowledge base later.
2
+
3
+ ## Categories
4
+
5
+ 1. **preference** — Personal likes, dislikes, favorites, preferred tools/foods/methods
6
+ 2. **opinion** — Personal views, assessments, evaluations of tools/concepts/approaches
7
+ 3. **personal** — Personal facts: birthday, workplace, location, biographical details
8
+ 4. **experience** — Personal experiences: projects built, tools used, skills acquired, years of use
9
+ 5. **business_rule** — Organizational rules, policies, constraints, requirements
10
+ 6. **workflow** — Process flows, state transitions, step-by-step procedures
11
+ 7. **architecture** — System design, service interactions, infrastructure decisions
12
+ 8. **convention** — Coding patterns, naming rules, team standards, style guidelines
13
+ 9. **decision** — Why choices were made, tradeoffs considered, alternatives rejected
14
+ 10. **domain_knowledge** — Domain-specific terminology, concepts, definitions
15
+ 11. **key_insight** — Important takeaways, notable explanations, lessons learned
16
+ 12. **metric** — Quantitative data, measurements, statistics, benchmarks
17
+ 13. **issue** — Known problems, bugs, limitations, risks, caveats
18
+ 14. **action_item** — Tasks, follow-ups, assignments, deadlines, TODOs
19
+
20
+ ## Rules
21
+
22
+ - Each fact must be **self-contained** — include enough context (names, identifiers) so the fact makes sense without the source document.
23
+ - Facts should be **atomic** — one idea per fact. Don't combine multiple facts into one.
24
+ - Include **specific details** — numbers, names, identifiers, exact values when available. **Never paraphrase or drop specific terms** (brand names, technical terms, model names, proper nouns). "sodium vapor lamps" must appear as "sodium vapor lamps", not just "lamps".
25
+ - Do NOT extract generic knowledge (widely known programming concepts, common definitions). Only extract facts specific to THIS document and THIS person/organization.
26
+ - Use personal categories (preference, opinion, personal, experience) when the content expresses subjective or biographical information. Use knowledge categories for objective/organizational information.
27
+ - Set confidence to "high" when the fact comes directly from explicit statements. Set to "medium" for facts inferred or summarized. Set to "low" for uncertain or speculative information.
28
+ - Set importance to "vital" if the fact is essential to understanding the topic — core preferences, key decisions, critical constraints. Set to "supplementary" for supporting details, examples, or background context.
29
+ - Aim for 5-20 facts per document depending on length and density. Don't pad with low-value facts, but don't miss important details either.
30
+
31
+ ## Anti-Redundancy Rules
32
+
33
+ - **No rephrased duplicates.** If you already extracted a fact about a topic, don't extract the same thing in different words.
34
+ - **Combine related items.** If multiple items convey the same point, combine into one fact.
35
+ - **Be specific, not generic.** "I prefer Fastify over Express for Node.js APIs" is good. "The user has framework preferences" is bad.
@@ -0,0 +1,37 @@
1
+ You are extracting topic entities from a set of facts in an organizational knowledge base.
2
+ A "topic" is a distinct concept, technology, system, process, or subject referenced in the facts.
3
+
4
+ ## Rules
5
+
6
+ - Extract 3-8 topics. Only meaningful, distinct topics.
7
+ - Use canonical names: "normalization" not "database normalization concepts", "React hooks" not "React.js hooks pattern".
8
+ - If two facts mention the same topic with different wording, extract it once with the canonical name.
9
+ - Include a brief description (1 sentence) for context.
10
+ - Do NOT extract generic terms like "programming", "coding", "software". Be specific.
11
+ - Do NOT extract people names or document titles — those are handled separately.
12
+ - Topics should be reusable across documents — "database indexing" not "the indexing discussion in doc 12".
13
+
14
+ ## Rename contexts — ALWAYS extract BOTH names
15
+
16
+ When the source mentions a rename ("X is now named Y", "X has been renamed to Y", "X used to be called Y", "we renamed X to Y", etc.) — extract **both** the old and new names as separate topic entries. Do not collapse the rename into a single topic. The downstream entity resolver needs both names so it can recognise the rename and merge them into one entity with the old name preserved as an alias. Skipping the old name will cause the system to create a duplicate entity.
17
+
18
+ ## Output Format
19
+
20
+ Respond with ONLY a JSON array. Each item:
21
+ - "name" (string): canonical topic name, lowercase
22
+ - "description" (string): one-sentence description of what this topic covers in context
23
+
24
+ Example:
25
+ [
26
+ { "name": "3NF normalization", "description": "Third normal form and eliminating transitive dependencies in relational databases" },
27
+ { "name": "foreign key cascades", "description": "CASCADE vs SET NULL behavior when deleting referenced rows" },
28
+ { "name": "query optimization", "description": "Techniques for improving SQL query performance including indexing and query plans" }
29
+ ]
30
+
31
+ Rename example — note BOTH names are extracted:
32
+ Input: "Smara is now named Sigil"
33
+ Output:
34
+ [
35
+ { "name": "smara", "description": "the project's previous name (renamed)" },
36
+ { "name": "sigil", "description": "the project's current name; was previously called Smara" }
37
+ ]
@@ -0,0 +1,23 @@
1
+ Classify this input for a personal knowledge base. Determine how it should be stored.
2
+
3
+ ## Routes
4
+
5
+ - **thought**: Short personal statement — preference, opinion, note, personal fact, experience. Extract facts directly. No chunking needed.
6
+ - **knowledge**: Structured information with substance — explanations, rules, procedures, technical content. Needs full extraction pipeline.
7
+ - **noise**: Not worth storing — greetings, incomplete fragments, test input, nonsense. Skip entirely.
8
+
9
+ ## Categories for extracted facts
10
+
11
+ preference, opinion, personal, experience, business_rule, workflow, architecture, convention, decision, domain_knowledge, key_insight, metric, issue, action_item
12
+
13
+ ## Rules
14
+
15
+ - If input expresses a preference, opinion, personal fact, or experience, route as "thought"
16
+ - For "thought" route: extract 1-3 atomic facts. Each fact must be self-contained.
17
+ - **Preserve all specific details verbatim** — technical terms, brand names, proper nouns, measurements, model names. Never paraphrase, generalize, or drop specifics. "sodium vapor streetlamps" must stay "sodium vapor streetlamps", not become "streetlamps".
18
+ - If the input mentions multiple specific things (e.g. "amber, sodium vapor, streetlamps"), include all of them in the fact content.
19
+ - For "knowledge" route: set facts to empty array (the pipeline will extract them)
20
+ - For "noise" route: set facts to empty array
21
+ - Always list mentioned entities (people, tools, technologies, places, concepts)
22
+ - Confidence: "high" for explicit statements, "medium" for inferred
23
+ - Importance: "vital" for core preferences/facts, "supplementary" for casual mentions
@@ -0,0 +1,18 @@
1
+ Classify the intent of this search query for a personal knowledge base.
2
+
3
+ ## Intents
4
+
5
+ - **preference**: Asking about likes, dislikes, preferences, opinions ("which fruit do I like", "what's my favorite tool", "do I prefer X or Y")
6
+ - **factual**: Asking for specific information ("how does X work", "what is the rule for Y", "what BLEU score did the model achieve")
7
+ - **entity_lookup**: Looking up a specific person, tool, system, or concept by name ("tell me about Redis", "what do I know about John")
8
+ - **exploratory**: Broad exploration of a topic ("everything about auth", "what do I know about databases", "summarize my knowledge on X")
9
+ - **temporal**: Time-dependent query ("what changed last month", "what was the status in January")
10
+
11
+ ## Rules
12
+
13
+ - For preference: set categories to ["preference", "opinion", "personal"]
14
+ - For factual: set categories to [] (search all categories)
15
+ - For entity_lookup: set entities to the entity name(s), categories to []
16
+ - For exploratory: set expand to true, categories to []
17
+ - For temporal: extract the time reference as an ISO date in pointInTime
18
+ - Always list entity names mentioned in the query
@@ -0,0 +1,21 @@
1
+ exports.up = function (knex) {
2
+ return knex.raw('CREATE EXTENSION IF NOT EXISTS vector').then(() =>
3
+ knex.schema.createTable('document', (table) => {
4
+ table.increments('id').primary();
5
+ table.text('uid').notNullable().unique();
6
+ table.text('source_path').notNullable();
7
+ table.text('source_type').notNullable();
8
+ table.text('title');
9
+ table.text('content_hash');
10
+ table.text('namespace').notNullable().index();
11
+ table.integer('chunk_count').defaultTo(0);
12
+ table.integer('fact_count').defaultTo(0);
13
+ table.timestamp('last_ingested_at');
14
+ table.timestamps(false, true);
15
+ })
16
+ );
17
+ };
18
+
19
+ exports.down = function (knex) {
20
+ return knex.schema.dropTable('document');
21
+ };
@@ -0,0 +1,37 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema
3
+ .createTable('chunk', (table) => {
4
+ table.increments('id').primary();
5
+ table
6
+ .integer('document_id')
7
+ .notNullable()
8
+ .references('id')
9
+ .inTable('document')
10
+ .onDelete('CASCADE');
11
+ table.integer('chunk_index').notNullable();
12
+ table.text('content').notNullable();
13
+ table.text('section_heading');
14
+ table.text('namespace').notNullable().index();
15
+ table.specificType('search_vector', 'tsvector');
16
+ table.timestamps(false, true);
17
+ })
18
+ .then(() =>
19
+ knex.raw(
20
+ `ALTER TABLE chunk ADD COLUMN embedding vector(768)`
21
+ )
22
+ )
23
+ .then(() =>
24
+ knex.raw(
25
+ `CREATE INDEX chunk_embedding_idx ON chunk USING hnsw (embedding vector_cosine_ops)`
26
+ )
27
+ )
28
+ .then(() =>
29
+ knex.raw(
30
+ `CREATE INDEX chunk_search_idx ON chunk USING gin (search_vector)`
31
+ )
32
+ );
33
+ };
34
+
35
+ exports.down = function (knex) {
36
+ return knex.schema.dropTable('chunk');
37
+ };
@@ -0,0 +1,37 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema
3
+ .createTable('fact', (table) => {
4
+ table.increments('id').primary();
5
+ table.text('uid').notNullable().unique();
6
+ table.text('content').notNullable();
7
+ table.text('category').notNullable().index();
8
+ table.text('confidence').defaultTo('medium');
9
+ table.text('namespace').notNullable().index();
10
+ table.text('status').notNullable().defaultTo('active').index();
11
+ table.integer('contradicted_by_id').references('id').inTable('fact');
12
+ table.integer('superseded_by_id').references('id').inTable('fact');
13
+ table.specificType('source_document_ids', 'integer[]');
14
+ table.text('source_section');
15
+ table.specificType('search_vector', 'tsvector');
16
+ table.timestamps(false, true);
17
+ })
18
+ .then(() =>
19
+ knex.raw(
20
+ `ALTER TABLE fact ADD COLUMN embedding vector(768)`
21
+ )
22
+ )
23
+ .then(() =>
24
+ knex.raw(
25
+ `CREATE INDEX fact_embedding_idx ON fact USING hnsw (embedding vector_cosine_ops)`
26
+ )
27
+ )
28
+ .then(() =>
29
+ knex.raw(
30
+ `CREATE INDEX fact_search_idx ON fact USING gin (search_vector)`
31
+ )
32
+ );
33
+ };
34
+
35
+ exports.down = function (knex) {
36
+ return knex.schema.dropTable('fact');
37
+ };
@@ -0,0 +1,26 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema
3
+ .createTable('entity', (table) => {
4
+ table.increments('id').primary();
5
+ table.text('uid').notNullable().unique();
6
+ table.text('name').notNullable();
7
+ table.text('entity_type').notNullable().index();
8
+ table.text('description');
9
+ table.integer('mention_count').defaultTo(0);
10
+ table.timestamps(false, true);
11
+ })
12
+ .then(() =>
13
+ knex.raw(
14
+ `ALTER TABLE entity ADD COLUMN embedding vector(768)`
15
+ )
16
+ )
17
+ .then(() =>
18
+ knex.raw(
19
+ `CREATE INDEX entity_embedding_idx ON entity USING hnsw (embedding vector_cosine_ops)`
20
+ )
21
+ );
22
+ };
23
+
24
+ exports.down = function (knex) {
25
+ return knex.schema.dropTable('entity');
26
+ };
@@ -0,0 +1,27 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema.createTable('relation', (table) => {
3
+ table.increments('id').primary();
4
+ table
5
+ .integer('source_id')
6
+ .notNullable()
7
+ .references('id')
8
+ .inTable('entity')
9
+ .onDelete('CASCADE');
10
+ table
11
+ .integer('target_id')
12
+ .notNullable()
13
+ .references('id')
14
+ .inTable('entity')
15
+ .onDelete('CASCADE');
16
+ table.text('relation_type').notNullable().index();
17
+ table.integer('source_fact_id').references('id').inTable('fact');
18
+ table.integer('mention_count').defaultTo(1);
19
+ table.timestamp('valid_at');
20
+ table.timestamp('invalid_at');
21
+ table.timestamps(false, true);
22
+ });
23
+ };
24
+
25
+ exports.down = function (knex) {
26
+ return knex.schema.dropTable('relation');
27
+ };
@@ -0,0 +1,16 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema.createTable('history', (table) => {
3
+ table.increments('id').primary();
4
+ table.text('target_type').notNullable();
5
+ table.integer('target_id').notNullable();
6
+ table.text('event').notNullable();
7
+ table.text('old_content');
8
+ table.text('new_content');
9
+ table.text('triggered_by');
10
+ table.timestamps(false, true);
11
+ });
12
+ };
13
+
14
+ exports.down = function (knex) {
15
+ return knex.schema.dropTable('history');
16
+ };
@@ -0,0 +1,32 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema
3
+ .alterTable('entity', (table) => {
4
+ table.text('namespace').defaultTo('product/lms').index();
5
+ table.text('external_id');
6
+ table.unique(['name', 'entity_type', 'namespace']);
7
+ })
8
+ .then(() =>
9
+ knex.raw(`
10
+ CREATE INDEX relation_source_type_idx
11
+ ON relation (source_id, relation_type)
12
+ WHERE invalid_at IS NULL;
13
+ CREATE INDEX relation_target_type_idx
14
+ ON relation (target_id, relation_type)
15
+ WHERE invalid_at IS NULL;
16
+ `)
17
+ );
18
+ };
19
+
20
+ exports.down = function (knex) {
21
+ return knex.raw(`
22
+ DROP INDEX IF EXISTS relation_target_type_idx;
23
+ DROP INDEX IF EXISTS relation_source_type_idx;
24
+ `)
25
+ .then(() =>
26
+ knex.schema.alterTable('entity', (table) => {
27
+ table.dropUnique(['name', 'entity_type', 'namespace']);
28
+ table.dropColumn('external_id');
29
+ table.dropColumn('namespace');
30
+ })
31
+ );
32
+ };
@@ -0,0 +1,22 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema
3
+ .createTable('fact_entity', (table) => {
4
+ table.increments('id').primary();
5
+ table.integer('fact_id').notNullable().references('id').inTable('fact').onDelete('CASCADE');
6
+ table.integer('entity_id').notNullable().references('id').inTable('entity').onDelete('CASCADE');
7
+ table.text('mention_type').defaultTo('content');
8
+ table.integer('mention_count').defaultTo(1);
9
+ table.timestamps(false, true);
10
+ })
11
+ .then(() =>
12
+ knex.raw(`
13
+ CREATE INDEX fact_entity_fact_id_idx ON fact_entity (fact_id);
14
+ CREATE INDEX fact_entity_entity_id_idx ON fact_entity (entity_id);
15
+ CREATE UNIQUE INDEX fact_entity_unique_idx ON fact_entity (fact_id, entity_id, mention_type);
16
+ `)
17
+ );
18
+ };
19
+
20
+ exports.down = function (knex) {
21
+ return knex.schema.dropTable('fact_entity');
22
+ };
@@ -0,0 +1,15 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema.createTable('api_key', (table) => {
3
+ table.increments('id').primary();
4
+ table.text('key_hash').notNullable().unique();
5
+ table.text('name').notNullable();
6
+ table.specificType('namespaces', 'text[]').notNullable().defaultTo('{}');
7
+ table.text('role').notNullable().defaultTo('reader');
8
+ table.boolean('active').notNullable().defaultTo(true);
9
+ table.timestamps(false, true);
10
+ });
11
+ };
12
+
13
+ exports.down = function (knex) {
14
+ return knex.schema.dropTable('api_key');
15
+ };
@@ -0,0 +1,13 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema.table('entity', (table) => {
3
+ table.text('entity_types').nullable();
4
+ table.integer('merged_with').nullable().references('id').inTable('entity');
5
+ });
6
+ };
7
+
8
+ exports.down = function (knex) {
9
+ return knex.schema.table('entity', (table) => {
10
+ table.dropColumn('entity_types');
11
+ table.dropColumn('merged_with');
12
+ });
13
+ };
@@ -0,0 +1,46 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema
3
+ .createTable('connection', (table) => {
4
+ table.increments('id').primary();
5
+ table.text('uid').notNullable().unique();
6
+ table.text('name').notNullable();
7
+ table.text('connector_type').notNullable();
8
+ table.jsonb('config').notNullable().defaultTo('{}');
9
+ table.binary('credentials_encrypted');
10
+ table.text('namespace').notNullable();
11
+ table.text('status').notNullable().defaultTo('pending');
12
+ table.timestamp('last_check_at');
13
+ table.timestamps(false, true);
14
+ })
15
+ .createTable('sync_run', (table) => {
16
+ table.increments('id').primary();
17
+ table.text('uid').notNullable().unique();
18
+ table.integer('connection_id').notNullable().references('id').inTable('connection').onDelete('CASCADE');
19
+ table.text('pipeline_type').notNullable();
20
+ table.text('sync_type').notNullable().defaultTo('full');
21
+ table.text('status').notNullable().defaultTo('pending');
22
+ table.jsonb('state_before');
23
+ table.jsonb('state_after');
24
+ table.integer('records_read').defaultTo(0);
25
+ table.integer('records_written').defaultTo(0);
26
+ table.text('error_message');
27
+ table.timestamp('started_at');
28
+ table.timestamp('completed_at');
29
+ table.timestamps(false, true);
30
+ })
31
+ .createTable('sync_schedule', (table) => {
32
+ table.increments('id').primary();
33
+ table.integer('connection_id').notNullable().references('id').inTable('connection').onDelete('CASCADE');
34
+ table.text('cron_expression');
35
+ table.text('sync_type').notNullable().defaultTo('incremental');
36
+ table.boolean('enabled').notNullable().defaultTo(true);
37
+ table.timestamps(false, true);
38
+ });
39
+ };
40
+
41
+ exports.down = function (knex) {
42
+ return knex.schema
43
+ .dropTableIfExists('sync_schedule')
44
+ .dropTableIfExists('sync_run')
45
+ .dropTableIfExists('connection');
46
+ };
@@ -0,0 +1,11 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema.alterTable('chunk', (table) => {
3
+ table.text('contextual_prefix');
4
+ });
5
+ };
6
+
7
+ exports.down = function (knex) {
8
+ return knex.schema.alterTable('chunk', (table) => {
9
+ table.dropColumn('contextual_prefix');
10
+ });
11
+ };
@@ -0,0 +1,15 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema
3
+ .alterTable('fact', (table) => {
4
+ table.timestamp('valid_from');
5
+ table.timestamp('valid_until');
6
+ })
7
+ .then(() => knex.raw('UPDATE fact SET valid_from = created_at'));
8
+ };
9
+
10
+ exports.down = function (knex) {
11
+ return knex.schema.alterTable('fact', (table) => {
12
+ table.dropColumn('valid_from');
13
+ table.dropColumn('valid_until');
14
+ });
15
+ };
@@ -0,0 +1,11 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema.alterTable('fact', (table) => {
3
+ table.text('importance').defaultTo('supplementary');
4
+ });
5
+ };
6
+
7
+ exports.down = function (knex) {
8
+ return knex.schema.alterTable('fact', (table) => {
9
+ table.dropColumn('importance');
10
+ });
11
+ };
@@ -0,0 +1,13 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema.alterTable('fact', (table) => {
3
+ table.integer('access_count').defaultTo(0);
4
+ table.timestamp('last_accessed_at');
5
+ });
6
+ };
7
+
8
+ exports.down = function (knex) {
9
+ return knex.schema.alterTable('fact', (table) => {
10
+ table.dropColumn('access_count');
11
+ table.dropColumn('last_accessed_at');
12
+ });
13
+ };
@@ -0,0 +1,58 @@
1
+ exports.up = async function (knex) {
2
+ // Count duplicates before deleting so the operator knows what was removed
3
+ const docDupes = await knex.raw(`
4
+ SELECT COUNT(*) AS cnt FROM document
5
+ WHERE id NOT IN (SELECT MAX(id) FROM document GROUP BY source_path)
6
+ `);
7
+ const docCount = parseInt(docDupes.rows?.[0]?.cnt ?? 0, 10);
8
+ if (docCount > 0) {
9
+ console.warn(`[migration] Removing ${docCount} duplicate document rows (keeping latest per source_path)`);
10
+ }
11
+
12
+ await knex.raw(`
13
+ DELETE FROM document
14
+ WHERE id NOT IN (
15
+ SELECT MAX(id) FROM document GROUP BY source_path
16
+ )
17
+ `);
18
+
19
+ await knex.schema.alterTable('document', (table) => {
20
+ table.unique('source_path');
21
+ });
22
+
23
+ const relDupes = await knex.raw(`
24
+ SELECT COUNT(*) AS cnt FROM relation
25
+ WHERE id NOT IN (
26
+ SELECT MAX(id) FROM relation
27
+ WHERE invalid_at IS NULL
28
+ GROUP BY source_id, target_id, relation_type
29
+ )
30
+ `);
31
+ const relCount = parseInt(relDupes.rows?.[0]?.cnt ?? 0, 10);
32
+ if (relCount > 0) {
33
+ console.warn(`[migration] Removing ${relCount} duplicate relation rows (keeping latest per source/target/type)`);
34
+ }
35
+
36
+ await knex.raw(`
37
+ DELETE FROM relation
38
+ WHERE id NOT IN (
39
+ SELECT MAX(id) FROM relation
40
+ WHERE invalid_at IS NULL
41
+ GROUP BY source_id, target_id, relation_type
42
+ )
43
+ `);
44
+
45
+ await knex.schema.alterTable('relation', (table) => {
46
+ table.unique(['source_id', 'target_id', 'relation_type']);
47
+ });
48
+ };
49
+
50
+ exports.down = async function (knex) {
51
+ await knex.schema.alterTable('relation', (table) => {
52
+ table.dropUnique(['source_id', 'target_id', 'relation_type']);
53
+ });
54
+
55
+ await knex.schema.alterTable('document', (table) => {
56
+ table.dropUnique('source_path');
57
+ });
58
+ };
@@ -0,0 +1,21 @@
1
+ exports.up = function (knex) {
2
+ return knex.schema.createTable('llm_log', (table) => {
3
+ table.increments('id').primary();
4
+ table.text('provider').notNullable().index();
5
+ table.text('model').notNullable().index();
6
+ table.text('caller').index();
7
+ table.text('input');
8
+ table.text('response');
9
+ table.integer('input_tokens').defaultTo(0);
10
+ table.integer('output_tokens').defaultTo(0);
11
+ table.decimal('cost', 10, 6).defaultTo(0);
12
+ table.integer('duration_ms').defaultTo(0);
13
+ table.text('status').defaultTo('success').index();
14
+ table.text('error');
15
+ table.timestamp('created_at').defaultTo(knex.fn.now());
16
+ });
17
+ };
18
+
19
+ exports.down = function (knex) {
20
+ return knex.schema.dropTable('llm_log');
21
+ };
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Split volatile lifecycle state (access_count, last_accessed_at) off the fact row
3
+ * into a dedicated fact_lifecycle table.
4
+ *
5
+ * Reason: Postgres HNSW indexes cannot do HOT updates. When a column on a row with
6
+ * an HNSW index is UPDATEd — even a column unrelated to the embedding — Postgres
7
+ * creates a new tuple and rewrites the index entry. At high search volume, this
8
+ * causes catastrophic HNSW index bloat and autovacuum pressure.
9
+ *
10
+ * Fix: keep the fact row read-mostly. Lifecycle state (access_count, last_accessed_at)
11
+ * lives in fact_lifecycle with a FK and a trigger for auto-insert on new facts.
12
+ */
13
+
14
+ exports.up = async function (knex) {
15
+ await knex.schema.createTable('fact_lifecycle', (table) => {
16
+ table.bigInteger('fact_id').primary().references('id').inTable('fact').onDelete('CASCADE');
17
+ table.integer('access_count').notNullable().defaultTo(0);
18
+ table.timestamp('last_accessed_at');
19
+ table.string('stage').notNullable().defaultTo('fresh'); // fresh | stable | editing
20
+ table.timestamp('stage_entered_at').notNullable().defaultTo(knex.fn.now());
21
+ table.timestamp('created_at').notNullable().defaultTo(knex.fn.now());
22
+
23
+ table.index('last_accessed_at');
24
+ table.index(['stage', 'stage_entered_at']);
25
+ });
26
+
27
+ // Backfill existing facts into fact_lifecycle
28
+ await knex.raw(`
29
+ INSERT INTO fact_lifecycle (fact_id, access_count, last_accessed_at, stage, stage_entered_at, created_at)
30
+ SELECT
31
+ id,
32
+ COALESCE(access_count, 0),
33
+ last_accessed_at,
34
+ 'stable' AS stage,
35
+ COALESCE(created_at, NOW()) AS stage_entered_at,
36
+ COALESCE(created_at, NOW()) AS created_at
37
+ FROM fact
38
+ ON CONFLICT (fact_id) DO NOTHING
39
+ `);
40
+
41
+ // Trigger to auto-insert a lifecycle row when a new fact is inserted.
42
+ await knex.raw(`
43
+ CREATE OR REPLACE FUNCTION fact_init_lifecycle() RETURNS trigger AS $$
44
+ BEGIN
45
+ INSERT INTO fact_lifecycle (fact_id, access_count, last_accessed_at, stage, stage_entered_at, created_at)
46
+ VALUES (NEW.id, 0, NULL, 'fresh', NOW(), NOW())
47
+ ON CONFLICT (fact_id) DO NOTHING;
48
+ RETURN NEW;
49
+ END;
50
+ $$ LANGUAGE plpgsql;
51
+ `);
52
+
53
+ await knex.raw(`
54
+ DROP TRIGGER IF EXISTS fact_init_lifecycle_trigger ON fact;
55
+ CREATE TRIGGER fact_init_lifecycle_trigger
56
+ AFTER INSERT ON fact
57
+ FOR EACH ROW EXECUTE FUNCTION fact_init_lifecycle();
58
+ `);
59
+
60
+ // Drop the old columns from fact — these have moved to fact_lifecycle.
61
+ await knex.schema.alterTable('fact', (table) => {
62
+ table.dropColumn('access_count');
63
+ table.dropColumn('last_accessed_at');
64
+ });
65
+ };
66
+
67
+ exports.down = async function (knex) {
68
+ // Re-add columns to fact
69
+ await knex.schema.alterTable('fact', (table) => {
70
+ table.integer('access_count').defaultTo(0);
71
+ table.timestamp('last_accessed_at');
72
+ });
73
+
74
+ // Copy data back
75
+ await knex.raw(`
76
+ UPDATE fact f
77
+ SET access_count = fl.access_count,
78
+ last_accessed_at = fl.last_accessed_at
79
+ FROM fact_lifecycle fl
80
+ WHERE f.id = fl.fact_id
81
+ `);
82
+
83
+ await knex.raw('DROP TRIGGER IF EXISTS fact_init_lifecycle_trigger ON fact');
84
+ await knex.raw('DROP FUNCTION IF EXISTS fact_init_lifecycle()');
85
+ await knex.schema.dropTable('fact_lifecycle');
86
+ };