@rkarim08/sia 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +35 -0
- package/.claude-plugin/plugin.json +27 -0
- package/.mcp.json +13 -0
- package/CLAUDE.md +226 -0
- package/LICENSE +202 -0
- package/PLUGIN_README.md +253 -0
- package/README.md +1013 -0
- package/agents/sia-changelog-writer.md +89 -0
- package/agents/sia-code-reviewer.md +86 -0
- package/agents/sia-conflict-resolver.md +100 -0
- package/agents/sia-convention-enforcer.md +69 -0
- package/agents/sia-debug.md +106 -0
- package/agents/sia-decision-reviewer.md +101 -0
- package/agents/sia-dependency-tracker.md +80 -0
- package/agents/sia-explain.md +126 -0
- package/agents/sia-feature.md +116 -0
- package/agents/sia-knowledge-capture.md +117 -0
- package/agents/sia-lead-architecture-advisor.md +93 -0
- package/agents/sia-lead-team-health.md +107 -0
- package/agents/sia-migration.md +100 -0
- package/agents/sia-onboarding.md +115 -0
- package/agents/sia-orientation.md +99 -0
- package/agents/sia-pm-briefing.md +106 -0
- package/agents/sia-pm-risk-advisor.md +82 -0
- package/agents/sia-qa-analyst.md +116 -0
- package/agents/sia-qa-regression-map.md +94 -0
- package/agents/sia-refactor.md +115 -0
- package/agents/sia-regression.md +112 -0
- package/agents/sia-security-audit.md +125 -0
- package/agents/sia-test-advisor.md +91 -0
- package/hooks/hooks.json +98 -0
- package/migrations/bridge/001_initial.sql +34 -0
- package/migrations/episodic/001_initial.sql +35 -0
- package/migrations/meta/001_initial.sql +68 -0
- package/migrations/semantic/001_initial.sql +292 -0
- package/migrations/semantic/002_ontology.sql +89 -0
- package/migrations/semantic/003_freshness.sql +63 -0
- package/migrations/semantic/004_v5_unified_schema.sql +194 -0
- package/migrations/semantic/005_backfill_event_kinds.sql +8 -0
- package/migrations/semantic/006_tree_sitter.sql +6 -0
- package/migrations/semantic/007_branch_snapshots.sql +22 -0
- package/package.json +110 -0
- package/scripts/branch-switch.sh +13 -0
- package/scripts/build-wasm-grammars.sh +81 -0
- package/scripts/post-compact.sh +8 -0
- package/scripts/post-tool-use.sh +10 -0
- package/scripts/pre-compact.sh +8 -0
- package/scripts/session-end.sh +8 -0
- package/scripts/session-start.sh +8 -0
- package/scripts/start-mcp.ts +45 -0
- package/scripts/stop-hook.sh +8 -0
- package/scripts/user-prompt-submit.sh +8 -0
- package/scripts/viz-server.ts +152 -0
- package/skills/sia-brainstorm/SKILL.md +156 -0
- package/skills/sia-brainstorm/scripts/frame-template.html +214 -0
- package/skills/sia-brainstorm/scripts/helper.js +95 -0
- package/skills/sia-brainstorm/scripts/server.cjs +338 -0
- package/skills/sia-brainstorm/scripts/start-server.sh +153 -0
- package/skills/sia-brainstorm/scripts/stop-server.sh +55 -0
- package/skills/sia-brainstorm/spec-document-reviewer-prompt.md +49 -0
- package/skills/sia-brainstorm/visual-companion.md +286 -0
- package/skills/sia-capture/SKILL.md +64 -0
- package/skills/sia-compare/SKILL.md +33 -0
- package/skills/sia-conflicts/SKILL.md +38 -0
- package/skills/sia-debug-workflow/SKILL.md +120 -0
- package/skills/sia-debug-workflow/root-cause-tracing.md +70 -0
- package/skills/sia-debug-workflow/scripts/find-polluter.sh +64 -0
- package/skills/sia-debug-workflow/temporal-investigation.md +72 -0
- package/skills/sia-digest/SKILL.md +23 -0
- package/skills/sia-dispatch/SKILL.md +69 -0
- package/skills/sia-dispatch/agent-task-template.md +99 -0
- package/skills/sia-doctor/SKILL.md +39 -0
- package/skills/sia-execute/SKILL.md +70 -0
- package/skills/sia-execute-plan/SKILL.md +85 -0
- package/skills/sia-export-import/SKILL.md +49 -0
- package/skills/sia-export-knowledge/SKILL.md +46 -0
- package/skills/sia-finish/SKILL.md +100 -0
- package/skills/sia-finish/pr-summary-template.md +54 -0
- package/skills/sia-freshness/SKILL.md +38 -0
- package/skills/sia-history/SKILL.md +42 -0
- package/skills/sia-impact/SKILL.md +70 -0
- package/skills/sia-index/SKILL.md +54 -0
- package/skills/sia-install/SKILL.md +39 -0
- package/skills/sia-lead-compliance/SKILL.md +16 -0
- package/skills/sia-lead-drift-report/SKILL.md +16 -0
- package/skills/sia-lead-knowledge-map/SKILL.md +16 -0
- package/skills/sia-learn/SKILL.md +58 -0
- package/skills/sia-plan/SKILL.md +68 -0
- package/skills/sia-plan/plan-reviewer-prompt.md +63 -0
- package/skills/sia-playbooks/SKILL.md +29 -0
- package/skills/sia-playbooks/reference-feature.md +100 -0
- package/skills/sia-playbooks/reference-flagging.md +50 -0
- package/skills/sia-playbooks/reference-orientation.md +92 -0
- package/skills/sia-playbooks/reference-regression.md +115 -0
- package/skills/sia-playbooks/reference-review.md +64 -0
- package/skills/sia-playbooks/reference-tools.md +239 -0
- package/skills/sia-pm-decision-log/SKILL.md +28 -0
- package/skills/sia-pm-risk-dashboard/SKILL.md +24 -0
- package/skills/sia-pm-sprint-summary/SKILL.md +27 -0
- package/skills/sia-prune/SKILL.md +45 -0
- package/skills/sia-qa-coverage/SKILL.md +28 -0
- package/skills/sia-qa-flaky/SKILL.md +20 -0
- package/skills/sia-qa-report/SKILL.md +26 -0
- package/skills/sia-reindex/SKILL.md +30 -0
- package/skills/sia-review-respond/SKILL.md +88 -0
- package/skills/sia-review-respond/pushback-patterns.md +90 -0
- package/skills/sia-search/SKILL.md +47 -0
- package/skills/sia-setup/SKILL.md +82 -0
- package/skills/sia-setup/setup-checklist.md +97 -0
- package/skills/sia-stats/SKILL.md +36 -0
- package/skills/sia-status/SKILL.md +44 -0
- package/skills/sia-sync/SKILL.md +46 -0
- package/skills/sia-team/SKILL.md +64 -0
- package/skills/sia-test/SKILL.md +92 -0
- package/skills/sia-test/testing-anti-patterns.md +104 -0
- package/skills/sia-tour/SKILL.md +29 -0
- package/skills/sia-upgrade/SKILL.md +43 -0
- package/skills/sia-verify/SKILL.md +81 -0
- package/skills/sia-visualize/SKILL.md +28 -0
- package/skills/sia-visualize-live/SKILL.md +55 -0
- package/skills/sia-visualize-live/scripts/graph-template.html +389 -0
- package/skills/sia-visualize-live/scripts/start-visualizer.sh +161 -0
- package/skills/sia-visualize-live/scripts/stop-visualizer.sh +55 -0
- package/skills/sia-visualize-live/scripts/visualizer-server.cjs +264 -0
- package/skills/sia-workspace/SKILL.md +57 -0
- package/src/agent/claude-md-template-flagging.md +219 -0
- package/src/agent/claude-md-template.md +213 -0
- package/src/agent/modules/sia-feature.md +100 -0
- package/src/agent/modules/sia-flagging.md +50 -0
- package/src/agent/modules/sia-orientation.md +92 -0
- package/src/agent/modules/sia-regression.md +115 -0
- package/src/agent/modules/sia-review.md +64 -0
- package/src/agent/modules/sia-tools.md +239 -0
- package/src/ast/extractors/c-include.ts +189 -0
- package/src/ast/extractors/csharp-project.ts +260 -0
- package/src/ast/extractors/prisma-schema.ts +44 -0
- package/src/ast/extractors/project-manifest.ts +111 -0
- package/src/ast/extractors/sql-schema.ts +67 -0
- package/src/ast/extractors/tier-a.ts +423 -0
- package/src/ast/extractors/tier-b.ts +289 -0
- package/src/ast/extractors/tier-dispatch.ts +247 -0
- package/src/ast/index-worker.ts +108 -0
- package/src/ast/indexer.ts +484 -0
- package/src/ast/languages.ts +408 -0
- package/src/ast/pagerank-builder.ts +125 -0
- package/src/ast/path-utils.ts +137 -0
- package/src/ast/tree-sitter/backends/native.ts +57 -0
- package/src/ast/tree-sitter/backends/wasm.ts +39 -0
- package/src/ast/tree-sitter/call-walker.ts +44 -0
- package/src/ast/tree-sitter/edit-computer.ts +55 -0
- package/src/ast/tree-sitter/query-runner.ts +46 -0
- package/src/ast/tree-sitter/service.ts +174 -0
- package/src/ast/tree-sitter/tree-cache.ts +39 -0
- package/src/ast/tree-sitter/types.ts +79 -0
- package/src/ast/watcher.ts +322 -0
- package/src/capture/chunker.ts +169 -0
- package/src/capture/consolidate.ts +127 -0
- package/src/capture/edge-inferrer.ts +161 -0
- package/src/capture/embedder.ts +166 -0
- package/src/capture/embedding-cache.ts +73 -0
- package/src/capture/flag-processor.ts +64 -0
- package/src/capture/hook.ts +67 -0
- package/src/capture/pipeline.ts +450 -0
- package/src/capture/prompts/consolidate.ts +25 -0
- package/src/capture/prompts/edge-infer.ts +29 -0
- package/src/capture/prompts/extract-flagged.ts +36 -0
- package/src/capture/prompts/extract.ts +42 -0
- package/src/capture/tokenizer.ts +147 -0
- package/src/capture/track-a-ast.ts +93 -0
- package/src/capture/track-b-llm.ts +149 -0
- package/src/capture/types.ts +64 -0
- package/src/cli/commands/community.ts +137 -0
- package/src/cli/commands/compare.ts +123 -0
- package/src/cli/commands/conflicts.ts +41 -0
- package/src/cli/commands/digest.ts +197 -0
- package/src/cli/commands/disable-flagging.ts +34 -0
- package/src/cli/commands/doctor.ts +240 -0
- package/src/cli/commands/download-model.ts +161 -0
- package/src/cli/commands/enable-flagging.ts +34 -0
- package/src/cli/commands/export-knowledge.ts +208 -0
- package/src/cli/commands/export.ts +85 -0
- package/src/cli/commands/freshness.ts +164 -0
- package/src/cli/commands/graph.ts +51 -0
- package/src/cli/commands/history.ts +139 -0
- package/src/cli/commands/import.ts +335 -0
- package/src/cli/commands/install.ts +156 -0
- package/src/cli/commands/lead-report.ts +241 -0
- package/src/cli/commands/learn.ts +321 -0
- package/src/cli/commands/pm-report.ts +413 -0
- package/src/cli/commands/prune.ts +75 -0
- package/src/cli/commands/qa-report.ts +278 -0
- package/src/cli/commands/reindex.ts +104 -0
- package/src/cli/commands/rollback.ts +70 -0
- package/src/cli/commands/search.ts +103 -0
- package/src/cli/commands/server.ts +91 -0
- package/src/cli/commands/share.ts +33 -0
- package/src/cli/commands/stats.ts +79 -0
- package/src/cli/commands/status.ts +176 -0
- package/src/cli/commands/sync.ts +96 -0
- package/src/cli/commands/team.ts +118 -0
- package/src/cli/commands/tour.ts +157 -0
- package/src/cli/commands/visualize-live.ts +162 -0
- package/src/cli/commands/workspace.ts +117 -0
- package/src/cli/index.ts +424 -0
- package/src/cli/learn-progress.ts +87 -0
- package/src/community/detection-bridge.ts +344 -0
- package/src/community/leiden.ts +462 -0
- package/src/community/raptor.ts +210 -0
- package/src/community/scheduler.ts +74 -0
- package/src/community/summarize.ts +115 -0
- package/src/decay/archiver.ts +73 -0
- package/src/decay/bridge-orphan-cleanup.ts +212 -0
- package/src/decay/consolidation-sweep.ts +112 -0
- package/src/decay/decay.ts +116 -0
- package/src/decay/deep-validator.ts +62 -0
- package/src/decay/episodic-promoter.ts +132 -0
- package/src/decay/maintenance-scheduler.ts +326 -0
- package/src/decay/scheduler.ts +6 -0
- package/src/decay/session-sweeper.ts +79 -0
- package/src/decay/types.ts +17 -0
- package/src/freshness/confidence-decay.ts +122 -0
- package/src/freshness/cuckoo-filter.ts +176 -0
- package/src/freshness/deep-validation.ts +345 -0
- package/src/freshness/dirty-tracker.ts +237 -0
- package/src/freshness/file-watcher-layer.ts +119 -0
- package/src/freshness/firewall.ts +64 -0
- package/src/freshness/git-reconcile-layer.ts +161 -0
- package/src/freshness/inverted-index.ts +158 -0
- package/src/freshness/stale-read-layer.ts +222 -0
- package/src/graph/audit.ts +69 -0
- package/src/graph/bridge-db.ts +141 -0
- package/src/graph/communities.ts +195 -0
- package/src/graph/db-interface.ts +259 -0
- package/src/graph/edges.ts +163 -0
- package/src/graph/entities.ts +327 -0
- package/src/graph/episodic-db.ts +113 -0
- package/src/graph/flags.ts +31 -0
- package/src/graph/meta-db.ts +200 -0
- package/src/graph/semantic-db.ts +101 -0
- package/src/graph/session-resume.ts +56 -0
- package/src/graph/snapshots.ts +342 -0
- package/src/graph/staging.ts +151 -0
- package/src/graph/types.ts +128 -0
- package/src/hooks/adapters/claude-code.ts +21 -0
- package/src/hooks/adapters/cline.ts +43 -0
- package/src/hooks/adapters/cursor.ts +65 -0
- package/src/hooks/adapters/generic.ts +12 -0
- package/src/hooks/agent-detect.ts +34 -0
- package/src/hooks/claude-md-directives.ts +32 -0
- package/src/hooks/event-router.ts +182 -0
- package/src/hooks/extractors/pattern-detector.ts +111 -0
- package/src/hooks/handlers/post-compact.ts +30 -0
- package/src/hooks/handlers/post-tool-use.ts +403 -0
- package/src/hooks/handlers/pre-compact.ts +100 -0
- package/src/hooks/handlers/session-end.ts +47 -0
- package/src/hooks/handlers/session-start.ts +154 -0
- package/src/hooks/handlers/stop.ts +128 -0
- package/src/hooks/handlers/user-prompt-submit.ts +68 -0
- package/src/hooks/plugin-branch-switch.ts +68 -0
- package/src/hooks/plugin-common.ts +47 -0
- package/src/hooks/plugin-post-compact.ts +28 -0
- package/src/hooks/plugin-post-tool-use.ts +38 -0
- package/src/hooks/plugin-pre-compact.ts +37 -0
- package/src/hooks/plugin-session-end.ts +37 -0
- package/src/hooks/plugin-session-start.ts +75 -0
- package/src/hooks/plugin-stop.ts +61 -0
- package/src/hooks/plugin-user-prompt-submit.ts +47 -0
- package/src/hooks/types.ts +43 -0
- package/src/knowledge/discovery.ts +238 -0
- package/src/knowledge/external-refs.ts +98 -0
- package/src/knowledge/freshness.ts +221 -0
- package/src/knowledge/ingest.ts +330 -0
- package/src/knowledge/markdown-export.ts +229 -0
- package/src/knowledge/markdown-import.ts +359 -0
- package/src/knowledge/patterns.ts +74 -0
- package/src/knowledge/templates.ts +307 -0
- package/src/llm/ai-sdk-adapter.ts +46 -0
- package/src/llm/config.ts +88 -0
- package/src/llm/cost-tracker.ts +110 -0
- package/src/llm/prompts/extraction.ts +55 -0
- package/src/llm/prompts/summarization.ts +36 -0
- package/src/llm/prompts/validation.ts +37 -0
- package/src/llm/provider-registry.ts +68 -0
- package/src/llm/reliability.ts +179 -0
- package/src/llm/schemas.ts +52 -0
- package/src/mcp/freshness-annotator.ts +69 -0
- package/src/mcp/server.ts +949 -0
- package/src/mcp/tools/sia-ast-query.ts +225 -0
- package/src/mcp/tools/sia-at-time.ts +151 -0
- package/src/mcp/tools/sia-backlinks.ts +87 -0
- package/src/mcp/tools/sia-batch-execute.ts +169 -0
- package/src/mcp/tools/sia-by-file.ts +89 -0
- package/src/mcp/tools/sia-community.ts +113 -0
- package/src/mcp/tools/sia-doctor.ts +73 -0
- package/src/mcp/tools/sia-execute-file.ts +122 -0
- package/src/mcp/tools/sia-execute.ts +104 -0
- package/src/mcp/tools/sia-expand.ts +158 -0
- package/src/mcp/tools/sia-fetch-and-index.ts +241 -0
- package/src/mcp/tools/sia-flag.ts +65 -0
- package/src/mcp/tools/sia-index.ts +111 -0
- package/src/mcp/tools/sia-note.ts +134 -0
- package/src/mcp/tools/sia-search.ts +105 -0
- package/src/mcp/tools/sia-stats.ts +63 -0
- package/src/mcp/tools/sia-sync-status.ts +44 -0
- package/src/mcp/tools/sia-upgrade.ts +247 -0
- package/src/mcp/truncate.ts +231 -0
- package/src/native/bridge.ts +167 -0
- package/src/native/fallback-ast-diff.ts +144 -0
- package/src/native/fallback-graph.ts +325 -0
- package/src/ontology/constraints.ts +56 -0
- package/src/ontology/errors.ts +8 -0
- package/src/ontology/middleware.ts +266 -0
- package/src/retrieval/bm25-search.ts +151 -0
- package/src/retrieval/context-assembly.ts +76 -0
- package/src/retrieval/graph-traversal.ts +168 -0
- package/src/retrieval/pagerank.ts +40 -0
- package/src/retrieval/query-classifier.ts +106 -0
- package/src/retrieval/reranker.ts +156 -0
- package/src/retrieval/search.ts +236 -0
- package/src/retrieval/throttle.ts +102 -0
- package/src/retrieval/vector-search.ts +203 -0
- package/src/retrieval/workspace-search.ts +130 -0
- package/src/sandbox/context-mode.ts +285 -0
- package/src/sandbox/credential-pass.ts +55 -0
- package/src/sandbox/executor.ts +235 -0
- package/src/security/pattern-detector.ts +127 -0
- package/src/security/rule-of-two.ts +50 -0
- package/src/security/sanitize.ts +46 -0
- package/src/security/semantic-consistency.ts +93 -0
- package/src/security/staging-promoter.ts +154 -0
- package/src/shared/config.ts +302 -0
- package/src/shared/diagnostics.ts +210 -0
- package/src/shared/errors.ts +48 -0
- package/src/shared/git-utils.ts +143 -0
- package/src/shared/llm-client.ts +120 -0
- package/src/shared/logger.ts +99 -0
- package/src/shared/types.ts +79 -0
- package/src/sync/client.ts +43 -0
- package/src/sync/conflict.ts +106 -0
- package/src/sync/dedup.ts +183 -0
- package/src/sync/hlc.ts +117 -0
- package/src/sync/keychain.ts +144 -0
- package/src/sync/pull.ts +232 -0
- package/src/sync/push.ts +131 -0
- package/src/types/chokidar.d.ts +23 -0
- package/src/visualization/graph-renderer.ts +312 -0
- package/src/visualization/subgraph-extract.ts +208 -0
- package/src/visualization/views/community-clusters.ts +246 -0
- package/src/visualization/views/dependency-map.ts +189 -0
- package/src/visualization/views/graph-explorer.ts +364 -0
- package/src/visualization/views/timeline.ts +247 -0
- package/src/workspace/api-contracts.ts +226 -0
- package/src/workspace/cross-repo.ts +61 -0
- package/src/workspace/detector.ts +190 -0
- package/src/workspace/manifest.ts +141 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
// Module: deep-validator — LLM re-verification of lowest-confidence Tier 3 entities
|
|
2
|
+
|
|
3
|
+
import type { BatchResult } from "@/decay/types";
|
|
4
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
5
|
+
import { invalidateEntity, updateEntity } from "@/graph/entities";
|
|
6
|
+
import type { LlmClient } from "@/shared/llm-client";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Validate one low-confidence Tier 3 entity using LLM classification.
|
|
10
|
+
*
|
|
11
|
+
* Picks the active Tier 3 entity with the lowest confidence that hasn't
|
|
12
|
+
* been validated recently (last_accessed > 24h ago), asks the LLM whether
|
|
13
|
+
* the entity's content is still valid, and either boosts confidence or
|
|
14
|
+
* invalidates it.
|
|
15
|
+
*
|
|
16
|
+
* Rate-limited externally by the maintenance scheduler to 1 call per
|
|
17
|
+
* config.deepValidationRateMs (default 5s).
|
|
18
|
+
*/
|
|
19
|
+
export async function deepValidateBatch(
|
|
20
|
+
db: SiaDb,
|
|
21
|
+
llmClient: LlmClient,
|
|
22
|
+
batchSize: number,
|
|
23
|
+
): Promise<BatchResult> {
|
|
24
|
+
const cutoff = Date.now() - 86_400_000; // 24h ago
|
|
25
|
+
|
|
26
|
+
const { rows } = await db.execute(
|
|
27
|
+
`SELECT id, name, type, content, confidence FROM graph_nodes
|
|
28
|
+
WHERE trust_tier = 3
|
|
29
|
+
AND t_valid_until IS NULL
|
|
30
|
+
AND archived_at IS NULL
|
|
31
|
+
AND last_accessed < ?
|
|
32
|
+
ORDER BY confidence ASC
|
|
33
|
+
LIMIT ?`,
|
|
34
|
+
[cutoff, batchSize],
|
|
35
|
+
);
|
|
36
|
+
|
|
37
|
+
let processed = 0;
|
|
38
|
+
|
|
39
|
+
for (const row of rows) {
|
|
40
|
+
const entityId = row.id as string;
|
|
41
|
+
const name = row.name as string;
|
|
42
|
+
const content = row.content as string;
|
|
43
|
+
const currentConfidence = row.confidence as number;
|
|
44
|
+
|
|
45
|
+
const prompt = `Is the following knowledge entity still likely to be accurate and relevant?\n\nName: ${name}\nContent: ${content}\n\nRespond with YES if it appears valid, or NO if it appears outdated or incorrect.`;
|
|
46
|
+
|
|
47
|
+
const verdict = await llmClient.classify(prompt, ["YES", "NO"]);
|
|
48
|
+
|
|
49
|
+
if (verdict === "YES") {
|
|
50
|
+
// Boost confidence slightly (capped at 0.9 for Tier 3)
|
|
51
|
+
const newConfidence = Math.min(currentConfidence + 0.1, 0.9);
|
|
52
|
+
await updateEntity(db, entityId, { confidence: newConfidence });
|
|
53
|
+
} else {
|
|
54
|
+
// Invalidate — the LLM thinks this is no longer valid
|
|
55
|
+
await invalidateEntity(db, entityId);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
processed++;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return { processed, remaining: processed === batchSize };
|
|
62
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
// Module: episodic-promoter — re-process failed/incomplete sessions
|
|
2
|
+
|
|
3
|
+
import type { CandidateFact } from "@/capture/types";
|
|
4
|
+
import type { BatchResult } from "@/decay/types";
|
|
5
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Gather session IDs that need (re-)processing:
|
|
9
|
+
* 1. Explicitly failed sessions (processing_status = 'failed')
|
|
10
|
+
* 2. Orphaned sessions (episodes exist but no sessions_processed row)
|
|
11
|
+
*/
|
|
12
|
+
async function findUnprocessedSessions(episodicDb: SiaDb): Promise<string[]> {
|
|
13
|
+
const failed = await episodicDb.execute(
|
|
14
|
+
"SELECT session_id FROM sessions_processed WHERE processing_status = 'failed'",
|
|
15
|
+
);
|
|
16
|
+
|
|
17
|
+
const orphaned = await episodicDb.execute(
|
|
18
|
+
"SELECT DISTINCT session_id FROM episodes WHERE session_id NOT IN (SELECT session_id FROM sessions_processed)",
|
|
19
|
+
);
|
|
20
|
+
|
|
21
|
+
const ids = new Set<string>();
|
|
22
|
+
for (const row of failed.rows) {
|
|
23
|
+
ids.add(row.session_id as string);
|
|
24
|
+
}
|
|
25
|
+
for (const row of orphaned.rows) {
|
|
26
|
+
ids.add(row.session_id as string);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return [...ids];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Convert raw episode rows into CandidateFact instances.
|
|
34
|
+
*
|
|
35
|
+
* Uses a simplified extraction approach: each episode becomes a single
|
|
36
|
+
* Concept candidate. Full Track A + Track B extraction is not invoked here
|
|
37
|
+
* because this is a maintenance sweep, not a real-time pipeline.
|
|
38
|
+
*/
|
|
39
|
+
function episodesToCandidates(episodes: Record<string, unknown>[]): CandidateFact[] {
|
|
40
|
+
const candidates: CandidateFact[] = [];
|
|
41
|
+
|
|
42
|
+
for (const ep of episodes) {
|
|
43
|
+
const content = (ep.content as string) ?? "";
|
|
44
|
+
if (content.trim().length === 0) continue;
|
|
45
|
+
|
|
46
|
+
const filePaths: string[] = [];
|
|
47
|
+
if (ep.file_path && typeof ep.file_path === "string") {
|
|
48
|
+
filePaths.push(ep.file_path);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const trustTier = (ep.trust_tier as 1 | 2 | 3 | 4) ?? 2;
|
|
52
|
+
|
|
53
|
+
candidates.push({
|
|
54
|
+
type: "Concept",
|
|
55
|
+
name: content.slice(0, 50),
|
|
56
|
+
content,
|
|
57
|
+
summary: content.slice(0, 80),
|
|
58
|
+
tags: [],
|
|
59
|
+
file_paths: filePaths,
|
|
60
|
+
trust_tier: trustTier,
|
|
61
|
+
confidence: 0.5,
|
|
62
|
+
extraction_method: "episodic-promoter",
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return candidates;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Process up to `batchSize` failed or orphaned sessions, promoting their
|
|
71
|
+
* episodic content into the semantic graph via consolidation.
|
|
72
|
+
*
|
|
73
|
+
* Returns `{ processed, remaining }` where `remaining` is true when the
|
|
74
|
+
* batch was full (more work may exist).
|
|
75
|
+
*/
|
|
76
|
+
export async function promoteBatch(
|
|
77
|
+
graphDb: SiaDb,
|
|
78
|
+
episodicDb: SiaDb,
|
|
79
|
+
batchSize: number,
|
|
80
|
+
): Promise<BatchResult> {
|
|
81
|
+
const sessionIds = await findUnprocessedSessions(episodicDb);
|
|
82
|
+
const toProcess = sessionIds.slice(0, batchSize);
|
|
83
|
+
|
|
84
|
+
// Dynamic import so capture internals are only loaded when actually needed
|
|
85
|
+
const { consolidate } = await import("@/capture/consolidate");
|
|
86
|
+
|
|
87
|
+
let processed = 0;
|
|
88
|
+
|
|
89
|
+
for (const sessionId of toProcess) {
|
|
90
|
+
const { rows: episodes } = await episodicDb.execute(
|
|
91
|
+
"SELECT content, trust_tier, tool_name, file_path FROM episodes WHERE session_id = ? ORDER BY ts ASC",
|
|
92
|
+
[sessionId],
|
|
93
|
+
);
|
|
94
|
+
|
|
95
|
+
const candidates = episodesToCandidates(episodes);
|
|
96
|
+
|
|
97
|
+
try {
|
|
98
|
+
await consolidate(graphDb, candidates);
|
|
99
|
+
|
|
100
|
+
await episodicDb.execute(
|
|
101
|
+
"INSERT OR REPLACE INTO sessions_processed (session_id, processing_status, processed_at, entity_count, pipeline_version) VALUES (?, 'complete', ?, ?, 'maintenance-sweep')",
|
|
102
|
+
[sessionId, Date.now(), candidates.length],
|
|
103
|
+
);
|
|
104
|
+
|
|
105
|
+
processed++;
|
|
106
|
+
} catch (_err) {
|
|
107
|
+
// Mark as failed so next sweep retries
|
|
108
|
+
await episodicDb.execute(
|
|
109
|
+
"INSERT OR REPLACE INTO sessions_processed (session_id, processing_status, processed_at, entity_count, pipeline_version) VALUES (?, 'failed', ?, 0, 'maintenance-sweep')",
|
|
110
|
+
[sessionId, Date.now()],
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return { processed, remaining: processed === batchSize };
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Drain all failed and orphaned sessions, processing one at a time.
|
|
120
|
+
* Returns the total number of sessions successfully promoted.
|
|
121
|
+
*/
|
|
122
|
+
export async function promoteFailedSessions(graphDb: SiaDb, episodicDb: SiaDb): Promise<number> {
|
|
123
|
+
let total = 0;
|
|
124
|
+
|
|
125
|
+
for (;;) {
|
|
126
|
+
const { processed, remaining } = await promoteBatch(graphDb, episodicDb, 1);
|
|
127
|
+
total += processed;
|
|
128
|
+
if (!remaining) break;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return total;
|
|
132
|
+
}
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
// Module: maintenance-scheduler — central orchestrator for decay/lifecycle work units
|
|
2
|
+
//
|
|
3
|
+
// Three trigger modes:
|
|
4
|
+
// 1. Startup Catchup — if > maintenanceInterval since last sweep, run full sweep
|
|
5
|
+
// 2. Idle Opportunistic — on 60s idle gap, run ONE batch from highest-priority unit
|
|
6
|
+
// 3. Session-End Sweep — targeted dedup of current session's entities
|
|
7
|
+
|
|
8
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
9
|
+
import { join } from "node:path";
|
|
10
|
+
import { archiveBatch } from "@/decay/archiver";
|
|
11
|
+
import { bridgeOrphanBatch } from "@/decay/bridge-orphan-cleanup";
|
|
12
|
+
import { consolidationSweepBatch } from "@/decay/consolidation-sweep";
|
|
13
|
+
import { decayBatch } from "@/decay/decay";
|
|
14
|
+
import { deepValidateBatch } from "@/decay/deep-validator";
|
|
15
|
+
import { promoteBatch } from "@/decay/episodic-promoter";
|
|
16
|
+
import { sweepSession } from "@/decay/session-sweeper";
|
|
17
|
+
import type { BatchResult } from "@/decay/types";
|
|
18
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
19
|
+
import { SIA_HOME, type SiaConfig } from "@/shared/config";
|
|
20
|
+
import type { LlmClient } from "@/shared/llm-client";
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// State file persistence
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
interface MaintenanceState {
|
|
27
|
+
lastSweepAt: number;
|
|
28
|
+
lastSessionSweepAt: number;
|
|
29
|
+
pendingBatchOffset: number;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const DEFAULT_STATE: MaintenanceState = {
|
|
33
|
+
lastSweepAt: 0,
|
|
34
|
+
lastSessionSweepAt: 0,
|
|
35
|
+
pendingBatchOffset: 0,
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
function stateFilePath(repoHash: string, siaHome?: string): string {
|
|
39
|
+
return join(siaHome ?? SIA_HOME, "repos", repoHash, "maintenance.json");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function loadMaintenanceState(repoHash: string, siaHome?: string): MaintenanceState {
|
|
43
|
+
const path = stateFilePath(repoHash, siaHome);
|
|
44
|
+
if (!existsSync(path)) return { ...DEFAULT_STATE };
|
|
45
|
+
try {
|
|
46
|
+
return JSON.parse(readFileSync(path, "utf-8")) as MaintenanceState;
|
|
47
|
+
} catch {
|
|
48
|
+
return { ...DEFAULT_STATE };
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function saveMaintenanceState(
|
|
53
|
+
repoHash: string,
|
|
54
|
+
state: MaintenanceState,
|
|
55
|
+
siaHome?: string,
|
|
56
|
+
): void {
|
|
57
|
+
const path = stateFilePath(repoHash, siaHome);
|
|
58
|
+
const dir = join(siaHome ?? SIA_HOME, "repos", repoHash);
|
|
59
|
+
mkdirSync(dir, { recursive: true });
|
|
60
|
+
writeFileSync(path, JSON.stringify(state, null, 2), "utf-8");
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// Work unit registry
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
type WorkUnitFn = (batchSize: number) => Promise<BatchResult>;
|
|
68
|
+
|
|
69
|
+
interface WorkUnit {
|
|
70
|
+
name: string;
|
|
71
|
+
fn: WorkUnitFn;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function buildWorkUnits(
|
|
75
|
+
graphDb: SiaDb,
|
|
76
|
+
episodicDb: SiaDb | null,
|
|
77
|
+
bridgeDb: SiaDb | null,
|
|
78
|
+
config: SiaConfig,
|
|
79
|
+
llmClient: LlmClient | null,
|
|
80
|
+
): WorkUnit[] {
|
|
81
|
+
const units: WorkUnit[] = [
|
|
82
|
+
{
|
|
83
|
+
name: "decay",
|
|
84
|
+
fn: (batchSize) => decayBatch(graphDb, config, batchSize, 0),
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
name: "archival",
|
|
88
|
+
fn: (batchSize) => archiveBatch(graphDb, config, batchSize),
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
name: "consolidation",
|
|
92
|
+
fn: (batchSize) => consolidationSweepBatch(graphDb, batchSize),
|
|
93
|
+
},
|
|
94
|
+
];
|
|
95
|
+
|
|
96
|
+
if (episodicDb) {
|
|
97
|
+
units.push({
|
|
98
|
+
name: "episodic-promotion",
|
|
99
|
+
fn: (batchSize) => promoteBatch(graphDb, episodicDb, batchSize),
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (llmClient) {
|
|
104
|
+
units.push({
|
|
105
|
+
name: "deep-validation",
|
|
106
|
+
fn: (batchSize) => deepValidateBatch(graphDb, llmClient, batchSize),
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (bridgeDb) {
|
|
111
|
+
units.push({
|
|
112
|
+
name: "bridge-orphan",
|
|
113
|
+
fn: (batchSize) => bridgeOrphanBatch(bridgeDb, batchSize),
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return units;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// ---------------------------------------------------------------------------
|
|
121
|
+
// Scheduler implementation
|
|
122
|
+
// ---------------------------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
export interface MaintenanceSchedulerOpts {
|
|
125
|
+
graphDb: SiaDb;
|
|
126
|
+
episodicDb?: SiaDb | null;
|
|
127
|
+
bridgeDb?: SiaDb | null;
|
|
128
|
+
config: SiaConfig;
|
|
129
|
+
repoHash: string;
|
|
130
|
+
llmClient?: LlmClient | null;
|
|
131
|
+
siaHome?: string;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export interface MaintenanceScheduler {
|
|
135
|
+
onStartup(repoHash: string): Promise<void>;
|
|
136
|
+
onPostToolUse(): void;
|
|
137
|
+
onSessionEnd(sessionId: string): Promise<void>;
|
|
138
|
+
stop(): void;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Create a maintenance scheduler instance.
|
|
143
|
+
*
|
|
144
|
+
* The scheduler manages three trigger modes:
|
|
145
|
+
* - Startup: full sweep if overdue (> maintenanceInterval)
|
|
146
|
+
* - Idle: one batch per idle cycle (60s gap in PostToolUse events)
|
|
147
|
+
* - Session-end: targeted dedup of session entities
|
|
148
|
+
*/
|
|
149
|
+
export function createMaintenanceScheduler(opts: MaintenanceSchedulerOpts): MaintenanceScheduler {
|
|
150
|
+
const { graphDb, config, repoHash, siaHome } = opts;
|
|
151
|
+
const episodicDb = opts.episodicDb ?? null;
|
|
152
|
+
const bridgeDb = opts.bridgeDb ?? null;
|
|
153
|
+
const llmClient = opts.llmClient ?? null;
|
|
154
|
+
|
|
155
|
+
let idleTimer: ReturnType<typeof setTimeout> | null = null;
|
|
156
|
+
let stopped = false;
|
|
157
|
+
let lastDeepValidation = 0;
|
|
158
|
+
|
|
159
|
+
const workUnits = buildWorkUnits(graphDb, episodicDb, bridgeDb, config, llmClient);
|
|
160
|
+
|
|
161
|
+
// -----------------------------------------------------------------------
|
|
162
|
+
// Startup catchup
|
|
163
|
+
// -----------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
async function onStartup(): Promise<void> {
|
|
166
|
+
if (stopped) return;
|
|
167
|
+
|
|
168
|
+
const state = loadMaintenanceState(repoHash, siaHome);
|
|
169
|
+
const elapsed = Date.now() - state.lastSweepAt;
|
|
170
|
+
|
|
171
|
+
if (elapsed < config.maintenanceInterval) return;
|
|
172
|
+
|
|
173
|
+
// Run full sweep with large batches (500)
|
|
174
|
+
const STARTUP_BATCH = 500;
|
|
175
|
+
|
|
176
|
+
for (const unit of workUnits) {
|
|
177
|
+
if (stopped) break;
|
|
178
|
+
|
|
179
|
+
// Rate-limit deep validation
|
|
180
|
+
if (unit.name === "deep-validation") {
|
|
181
|
+
const sinceLast = Date.now() - lastDeepValidation;
|
|
182
|
+
if (sinceLast < config.deepValidationRateMs) continue;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
let hasMore = true;
|
|
186
|
+
while (hasMore && !stopped) {
|
|
187
|
+
if (unit.name === "deep-validation") {
|
|
188
|
+
lastDeepValidation = Date.now();
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const result = await unit.fn(STARTUP_BATCH);
|
|
192
|
+
hasMore = result.remaining;
|
|
193
|
+
|
|
194
|
+
// Yield to event loop between batches
|
|
195
|
+
await new Promise((r) => setTimeout(r, 0));
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// FTS5 optimization after full sweep
|
|
200
|
+
try {
|
|
201
|
+
await graphDb.execute("INSERT INTO graph_nodes_fts(graph_nodes_fts) VALUES('optimize')");
|
|
202
|
+
} catch {
|
|
203
|
+
// FTS5 table may not exist in all configurations
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
state.lastSweepAt = Date.now();
|
|
207
|
+
state.pendingBatchOffset = 0;
|
|
208
|
+
saveMaintenanceState(repoHash, state, siaHome);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// -----------------------------------------------------------------------
|
|
212
|
+
// Idle opportunistic
|
|
213
|
+
// -----------------------------------------------------------------------
|
|
214
|
+
|
|
215
|
+
function scheduleIdleCheck(): void {
|
|
216
|
+
if (stopped || idleTimer) return;
|
|
217
|
+
|
|
218
|
+
idleTimer = setTimeout(async () => {
|
|
219
|
+
idleTimer = null;
|
|
220
|
+
if (stopped) return;
|
|
221
|
+
|
|
222
|
+
// Run ONE batch from the highest-priority unit that has work
|
|
223
|
+
for (const unit of workUnits) {
|
|
224
|
+
if (stopped) break;
|
|
225
|
+
|
|
226
|
+
// Rate-limit deep validation
|
|
227
|
+
if (unit.name === "deep-validation") {
|
|
228
|
+
const sinceLast = Date.now() - lastDeepValidation;
|
|
229
|
+
if (sinceLast < config.deepValidationRateMs) continue;
|
|
230
|
+
lastDeepValidation = Date.now();
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const result = await unit.fn(50);
|
|
234
|
+
if (result.processed > 0) break; // Did some work, yield back
|
|
235
|
+
}
|
|
236
|
+
}, config.idleTimeoutMs);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
function onPostToolUse(): void {
|
|
240
|
+
// Reset idle timer on each tool use
|
|
241
|
+
if (idleTimer) {
|
|
242
|
+
clearTimeout(idleTimer);
|
|
243
|
+
idleTimer = null;
|
|
244
|
+
}
|
|
245
|
+
scheduleIdleCheck();
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// -----------------------------------------------------------------------
|
|
249
|
+
// Session-end sweep
|
|
250
|
+
// -----------------------------------------------------------------------
|
|
251
|
+
|
|
252
|
+
async function onSessionEnd(sessionId: string): Promise<void> {
|
|
253
|
+
if (stopped) return;
|
|
254
|
+
|
|
255
|
+
await sweepSession(graphDb, sessionId);
|
|
256
|
+
|
|
257
|
+
const state = loadMaintenanceState(repoHash, siaHome);
|
|
258
|
+
state.lastSessionSweepAt = Date.now();
|
|
259
|
+
saveMaintenanceState(repoHash, state, siaHome);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// -----------------------------------------------------------------------
|
|
263
|
+
// Stop
|
|
264
|
+
// -----------------------------------------------------------------------
|
|
265
|
+
|
|
266
|
+
function stop(): void {
|
|
267
|
+
stopped = true;
|
|
268
|
+
if (idleTimer) {
|
|
269
|
+
clearTimeout(idleTimer);
|
|
270
|
+
idleTimer = null;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return {
|
|
275
|
+
onStartup: () => onStartup(),
|
|
276
|
+
onPostToolUse,
|
|
277
|
+
onSessionEnd,
|
|
278
|
+
stop,
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// ---------------------------------------------------------------------------
|
|
283
|
+
// Standalone full sweep (for backward compatibility / CLI usage)
|
|
284
|
+
// ---------------------------------------------------------------------------
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Run all maintenance jobs in sequence.
|
|
288
|
+
*
|
|
289
|
+
* This is the "old" runNightlyJobs interface, preserved for CLI invocation.
|
|
290
|
+
* Errors in one job do not prevent others from running.
|
|
291
|
+
*/
|
|
292
|
+
export async function runMaintenanceJobs(
|
|
293
|
+
config: SiaConfig,
|
|
294
|
+
graphDb: SiaDb,
|
|
295
|
+
episodicDb?: SiaDb,
|
|
296
|
+
bridgeDb?: SiaDb,
|
|
297
|
+
llmClient?: LlmClient,
|
|
298
|
+
): Promise<void> {
|
|
299
|
+
const units = buildWorkUnits(
|
|
300
|
+
graphDb,
|
|
301
|
+
episodicDb ?? null,
|
|
302
|
+
bridgeDb ?? null,
|
|
303
|
+
config,
|
|
304
|
+
llmClient ?? null,
|
|
305
|
+
);
|
|
306
|
+
|
|
307
|
+
for (const unit of units) {
|
|
308
|
+
try {
|
|
309
|
+
let hasMore = true;
|
|
310
|
+
while (hasMore) {
|
|
311
|
+
const result = await unit.fn(500);
|
|
312
|
+
hasMore = result.remaining;
|
|
313
|
+
}
|
|
314
|
+
console.log(`[maintenance] ${unit.name}: complete`);
|
|
315
|
+
} catch (err) {
|
|
316
|
+
console.error(`[maintenance] ${unit.name}: failed`, err);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// FTS5 optimization
|
|
321
|
+
try {
|
|
322
|
+
await graphDb.execute("INSERT INTO graph_nodes_fts(graph_nodes_fts) VALUES('optimize')");
|
|
323
|
+
} catch {
|
|
324
|
+
// FTS5 may not exist
|
|
325
|
+
}
|
|
326
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
// Module: scheduler — re-exports from maintenance-scheduler for backward compatibility
|
|
2
|
+
//
|
|
3
|
+
// The old "nightly scheduler" model has been replaced by the maintenance-scheduler
|
|
4
|
+
// with startup-catchup + idle-opportunistic + session-end triggers.
|
|
5
|
+
|
|
6
|
+
export { runMaintenanceJobs } from "@/decay/maintenance-scheduler";
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
// Module: session-sweeper — session-end focused dedup of current session's entities
|
|
2
|
+
|
|
3
|
+
import { wordJaccard } from "@/capture/consolidate";
|
|
4
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Targeted sweep of entities created during a specific session.
|
|
8
|
+
*
|
|
9
|
+
* On SessionEnd, deduplicates the session's output against the existing graph:
|
|
10
|
+
* 1. Query entities where source_episode = sessionId
|
|
11
|
+
* 2. For each, check local_dedup_log for existing pairs
|
|
12
|
+
* 3. Compare against same-type active entities (word Jaccard > 0.92)
|
|
13
|
+
* 4. Write results to local_dedup_log
|
|
14
|
+
*
|
|
15
|
+
* Typically completes in < 2s for a session's worth of entities (5-20).
|
|
16
|
+
*/
|
|
17
|
+
export async function sweepSession(db: SiaDb, sessionId: string): Promise<number> {
|
|
18
|
+
const { rows: sessionEntities } = await db.execute(
|
|
19
|
+
`SELECT id, type, content FROM graph_nodes
|
|
20
|
+
WHERE source_episode = ?
|
|
21
|
+
AND t_valid_until IS NULL
|
|
22
|
+
AND archived_at IS NULL`,
|
|
23
|
+
[sessionId],
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
if (sessionEntities.length === 0) return 0;
|
|
27
|
+
|
|
28
|
+
let pairsProcessed = 0;
|
|
29
|
+
const now = Date.now();
|
|
30
|
+
|
|
31
|
+
for (const entity of sessionEntities) {
|
|
32
|
+
const entityId = entity.id as string;
|
|
33
|
+
const entityType = entity.type as string;
|
|
34
|
+
const entityContent = entity.content as string;
|
|
35
|
+
|
|
36
|
+
// Find same-type active entities (excluding self)
|
|
37
|
+
const { rows: candidates } = await db.execute(
|
|
38
|
+
`SELECT id, content FROM graph_nodes
|
|
39
|
+
WHERE type = ?
|
|
40
|
+
AND id != ?
|
|
41
|
+
AND t_valid_until IS NULL
|
|
42
|
+
AND archived_at IS NULL`,
|
|
43
|
+
[entityType, entityId],
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
for (const candidate of candidates) {
|
|
47
|
+
const candidateId = candidate.id as string;
|
|
48
|
+
|
|
49
|
+
// Canonical pair ordering
|
|
50
|
+
const [aId, bId] = entityId < candidateId ? [entityId, candidateId] : [candidateId, entityId];
|
|
51
|
+
|
|
52
|
+
// Skip if already checked
|
|
53
|
+
const existing = await db.execute(
|
|
54
|
+
"SELECT 1 FROM local_dedup_log WHERE entity_a_id = ? AND entity_b_id = ?",
|
|
55
|
+
[aId, bId],
|
|
56
|
+
);
|
|
57
|
+
if (existing.rows.length > 0) continue;
|
|
58
|
+
|
|
59
|
+
const similarity = wordJaccard(entityContent, candidate.content as string);
|
|
60
|
+
|
|
61
|
+
let decision: string;
|
|
62
|
+
if (similarity > 0.92) {
|
|
63
|
+
decision = "merged";
|
|
64
|
+
} else if (similarity > 0.5) {
|
|
65
|
+
decision = "related";
|
|
66
|
+
} else {
|
|
67
|
+
decision = "different";
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
await db.execute(
|
|
71
|
+
"INSERT INTO local_dedup_log (entity_a_id, entity_b_id, decision, checked_at) VALUES (?, ?, ?, ?)",
|
|
72
|
+
[aId, bId, decision, now],
|
|
73
|
+
);
|
|
74
|
+
pairsProcessed++;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return pairsProcessed;
|
|
79
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
// Module: decay/types — shared types for maintenance work units
|
|
2
|
+
|
|
3
|
+
/** Result of processing a single batch within a work unit. */
|
|
4
|
+
export interface BatchResult {
|
|
5
|
+
/** Number of items processed in this batch. */
|
|
6
|
+
processed: number;
|
|
7
|
+
/** Whether more work remains after this batch. */
|
|
8
|
+
remaining: boolean;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/** Result of a full importance decay run. */
|
|
12
|
+
export interface DecayResult {
|
|
13
|
+
/** Number of entities processed. */
|
|
14
|
+
processed: number;
|
|
15
|
+
/** Wall-clock duration in milliseconds. */
|
|
16
|
+
durationMs: number;
|
|
17
|
+
}
|