@rkarim08/sia 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +35 -0
- package/.claude-plugin/plugin.json +27 -0
- package/.mcp.json +13 -0
- package/CLAUDE.md +226 -0
- package/LICENSE +202 -0
- package/PLUGIN_README.md +253 -0
- package/README.md +1013 -0
- package/agents/sia-changelog-writer.md +89 -0
- package/agents/sia-code-reviewer.md +86 -0
- package/agents/sia-conflict-resolver.md +100 -0
- package/agents/sia-convention-enforcer.md +69 -0
- package/agents/sia-debug.md +106 -0
- package/agents/sia-decision-reviewer.md +101 -0
- package/agents/sia-dependency-tracker.md +80 -0
- package/agents/sia-explain.md +126 -0
- package/agents/sia-feature.md +116 -0
- package/agents/sia-knowledge-capture.md +117 -0
- package/agents/sia-lead-architecture-advisor.md +93 -0
- package/agents/sia-lead-team-health.md +107 -0
- package/agents/sia-migration.md +100 -0
- package/agents/sia-onboarding.md +115 -0
- package/agents/sia-orientation.md +99 -0
- package/agents/sia-pm-briefing.md +106 -0
- package/agents/sia-pm-risk-advisor.md +82 -0
- package/agents/sia-qa-analyst.md +116 -0
- package/agents/sia-qa-regression-map.md +94 -0
- package/agents/sia-refactor.md +115 -0
- package/agents/sia-regression.md +112 -0
- package/agents/sia-security-audit.md +125 -0
- package/agents/sia-test-advisor.md +91 -0
- package/hooks/hooks.json +98 -0
- package/migrations/bridge/001_initial.sql +34 -0
- package/migrations/episodic/001_initial.sql +35 -0
- package/migrations/meta/001_initial.sql +68 -0
- package/migrations/semantic/001_initial.sql +292 -0
- package/migrations/semantic/002_ontology.sql +89 -0
- package/migrations/semantic/003_freshness.sql +63 -0
- package/migrations/semantic/004_v5_unified_schema.sql +194 -0
- package/migrations/semantic/005_backfill_event_kinds.sql +8 -0
- package/migrations/semantic/006_tree_sitter.sql +6 -0
- package/migrations/semantic/007_branch_snapshots.sql +22 -0
- package/package.json +110 -0
- package/scripts/branch-switch.sh +13 -0
- package/scripts/build-wasm-grammars.sh +81 -0
- package/scripts/post-compact.sh +8 -0
- package/scripts/post-tool-use.sh +10 -0
- package/scripts/pre-compact.sh +8 -0
- package/scripts/session-end.sh +8 -0
- package/scripts/session-start.sh +8 -0
- package/scripts/start-mcp.ts +45 -0
- package/scripts/stop-hook.sh +8 -0
- package/scripts/user-prompt-submit.sh +8 -0
- package/scripts/viz-server.ts +152 -0
- package/skills/sia-brainstorm/SKILL.md +156 -0
- package/skills/sia-brainstorm/scripts/frame-template.html +214 -0
- package/skills/sia-brainstorm/scripts/helper.js +95 -0
- package/skills/sia-brainstorm/scripts/server.cjs +338 -0
- package/skills/sia-brainstorm/scripts/start-server.sh +153 -0
- package/skills/sia-brainstorm/scripts/stop-server.sh +55 -0
- package/skills/sia-brainstorm/spec-document-reviewer-prompt.md +49 -0
- package/skills/sia-brainstorm/visual-companion.md +286 -0
- package/skills/sia-capture/SKILL.md +64 -0
- package/skills/sia-compare/SKILL.md +33 -0
- package/skills/sia-conflicts/SKILL.md +38 -0
- package/skills/sia-debug-workflow/SKILL.md +120 -0
- package/skills/sia-debug-workflow/root-cause-tracing.md +70 -0
- package/skills/sia-debug-workflow/scripts/find-polluter.sh +64 -0
- package/skills/sia-debug-workflow/temporal-investigation.md +72 -0
- package/skills/sia-digest/SKILL.md +23 -0
- package/skills/sia-dispatch/SKILL.md +69 -0
- package/skills/sia-dispatch/agent-task-template.md +99 -0
- package/skills/sia-doctor/SKILL.md +39 -0
- package/skills/sia-execute/SKILL.md +70 -0
- package/skills/sia-execute-plan/SKILL.md +85 -0
- package/skills/sia-export-import/SKILL.md +49 -0
- package/skills/sia-export-knowledge/SKILL.md +46 -0
- package/skills/sia-finish/SKILL.md +100 -0
- package/skills/sia-finish/pr-summary-template.md +54 -0
- package/skills/sia-freshness/SKILL.md +38 -0
- package/skills/sia-history/SKILL.md +42 -0
- package/skills/sia-impact/SKILL.md +70 -0
- package/skills/sia-index/SKILL.md +54 -0
- package/skills/sia-install/SKILL.md +39 -0
- package/skills/sia-lead-compliance/SKILL.md +16 -0
- package/skills/sia-lead-drift-report/SKILL.md +16 -0
- package/skills/sia-lead-knowledge-map/SKILL.md +16 -0
- package/skills/sia-learn/SKILL.md +58 -0
- package/skills/sia-plan/SKILL.md +68 -0
- package/skills/sia-plan/plan-reviewer-prompt.md +63 -0
- package/skills/sia-playbooks/SKILL.md +29 -0
- package/skills/sia-playbooks/reference-feature.md +100 -0
- package/skills/sia-playbooks/reference-flagging.md +50 -0
- package/skills/sia-playbooks/reference-orientation.md +92 -0
- package/skills/sia-playbooks/reference-regression.md +115 -0
- package/skills/sia-playbooks/reference-review.md +64 -0
- package/skills/sia-playbooks/reference-tools.md +239 -0
- package/skills/sia-pm-decision-log/SKILL.md +28 -0
- package/skills/sia-pm-risk-dashboard/SKILL.md +24 -0
- package/skills/sia-pm-sprint-summary/SKILL.md +27 -0
- package/skills/sia-prune/SKILL.md +45 -0
- package/skills/sia-qa-coverage/SKILL.md +28 -0
- package/skills/sia-qa-flaky/SKILL.md +20 -0
- package/skills/sia-qa-report/SKILL.md +26 -0
- package/skills/sia-reindex/SKILL.md +30 -0
- package/skills/sia-review-respond/SKILL.md +88 -0
- package/skills/sia-review-respond/pushback-patterns.md +90 -0
- package/skills/sia-search/SKILL.md +47 -0
- package/skills/sia-setup/SKILL.md +82 -0
- package/skills/sia-setup/setup-checklist.md +97 -0
- package/skills/sia-stats/SKILL.md +36 -0
- package/skills/sia-status/SKILL.md +44 -0
- package/skills/sia-sync/SKILL.md +46 -0
- package/skills/sia-team/SKILL.md +64 -0
- package/skills/sia-test/SKILL.md +92 -0
- package/skills/sia-test/testing-anti-patterns.md +104 -0
- package/skills/sia-tour/SKILL.md +29 -0
- package/skills/sia-upgrade/SKILL.md +43 -0
- package/skills/sia-verify/SKILL.md +81 -0
- package/skills/sia-visualize/SKILL.md +28 -0
- package/skills/sia-visualize-live/SKILL.md +55 -0
- package/skills/sia-visualize-live/scripts/graph-template.html +389 -0
- package/skills/sia-visualize-live/scripts/start-visualizer.sh +161 -0
- package/skills/sia-visualize-live/scripts/stop-visualizer.sh +55 -0
- package/skills/sia-visualize-live/scripts/visualizer-server.cjs +264 -0
- package/skills/sia-workspace/SKILL.md +57 -0
- package/src/agent/claude-md-template-flagging.md +219 -0
- package/src/agent/claude-md-template.md +213 -0
- package/src/agent/modules/sia-feature.md +100 -0
- package/src/agent/modules/sia-flagging.md +50 -0
- package/src/agent/modules/sia-orientation.md +92 -0
- package/src/agent/modules/sia-regression.md +115 -0
- package/src/agent/modules/sia-review.md +64 -0
- package/src/agent/modules/sia-tools.md +239 -0
- package/src/ast/extractors/c-include.ts +189 -0
- package/src/ast/extractors/csharp-project.ts +260 -0
- package/src/ast/extractors/prisma-schema.ts +44 -0
- package/src/ast/extractors/project-manifest.ts +111 -0
- package/src/ast/extractors/sql-schema.ts +67 -0
- package/src/ast/extractors/tier-a.ts +423 -0
- package/src/ast/extractors/tier-b.ts +289 -0
- package/src/ast/extractors/tier-dispatch.ts +247 -0
- package/src/ast/index-worker.ts +108 -0
- package/src/ast/indexer.ts +484 -0
- package/src/ast/languages.ts +408 -0
- package/src/ast/pagerank-builder.ts +125 -0
- package/src/ast/path-utils.ts +137 -0
- package/src/ast/tree-sitter/backends/native.ts +57 -0
- package/src/ast/tree-sitter/backends/wasm.ts +39 -0
- package/src/ast/tree-sitter/call-walker.ts +44 -0
- package/src/ast/tree-sitter/edit-computer.ts +55 -0
- package/src/ast/tree-sitter/query-runner.ts +46 -0
- package/src/ast/tree-sitter/service.ts +174 -0
- package/src/ast/tree-sitter/tree-cache.ts +39 -0
- package/src/ast/tree-sitter/types.ts +79 -0
- package/src/ast/watcher.ts +322 -0
- package/src/capture/chunker.ts +169 -0
- package/src/capture/consolidate.ts +127 -0
- package/src/capture/edge-inferrer.ts +161 -0
- package/src/capture/embedder.ts +166 -0
- package/src/capture/embedding-cache.ts +73 -0
- package/src/capture/flag-processor.ts +64 -0
- package/src/capture/hook.ts +67 -0
- package/src/capture/pipeline.ts +450 -0
- package/src/capture/prompts/consolidate.ts +25 -0
- package/src/capture/prompts/edge-infer.ts +29 -0
- package/src/capture/prompts/extract-flagged.ts +36 -0
- package/src/capture/prompts/extract.ts +42 -0
- package/src/capture/tokenizer.ts +147 -0
- package/src/capture/track-a-ast.ts +93 -0
- package/src/capture/track-b-llm.ts +149 -0
- package/src/capture/types.ts +64 -0
- package/src/cli/commands/community.ts +137 -0
- package/src/cli/commands/compare.ts +123 -0
- package/src/cli/commands/conflicts.ts +41 -0
- package/src/cli/commands/digest.ts +197 -0
- package/src/cli/commands/disable-flagging.ts +34 -0
- package/src/cli/commands/doctor.ts +240 -0
- package/src/cli/commands/download-model.ts +161 -0
- package/src/cli/commands/enable-flagging.ts +34 -0
- package/src/cli/commands/export-knowledge.ts +208 -0
- package/src/cli/commands/export.ts +85 -0
- package/src/cli/commands/freshness.ts +164 -0
- package/src/cli/commands/graph.ts +51 -0
- package/src/cli/commands/history.ts +139 -0
- package/src/cli/commands/import.ts +335 -0
- package/src/cli/commands/install.ts +156 -0
- package/src/cli/commands/lead-report.ts +241 -0
- package/src/cli/commands/learn.ts +321 -0
- package/src/cli/commands/pm-report.ts +413 -0
- package/src/cli/commands/prune.ts +75 -0
- package/src/cli/commands/qa-report.ts +278 -0
- package/src/cli/commands/reindex.ts +104 -0
- package/src/cli/commands/rollback.ts +70 -0
- package/src/cli/commands/search.ts +103 -0
- package/src/cli/commands/server.ts +91 -0
- package/src/cli/commands/share.ts +33 -0
- package/src/cli/commands/stats.ts +79 -0
- package/src/cli/commands/status.ts +176 -0
- package/src/cli/commands/sync.ts +96 -0
- package/src/cli/commands/team.ts +118 -0
- package/src/cli/commands/tour.ts +157 -0
- package/src/cli/commands/visualize-live.ts +162 -0
- package/src/cli/commands/workspace.ts +117 -0
- package/src/cli/index.ts +424 -0
- package/src/cli/learn-progress.ts +87 -0
- package/src/community/detection-bridge.ts +344 -0
- package/src/community/leiden.ts +462 -0
- package/src/community/raptor.ts +210 -0
- package/src/community/scheduler.ts +74 -0
- package/src/community/summarize.ts +115 -0
- package/src/decay/archiver.ts +73 -0
- package/src/decay/bridge-orphan-cleanup.ts +212 -0
- package/src/decay/consolidation-sweep.ts +112 -0
- package/src/decay/decay.ts +116 -0
- package/src/decay/deep-validator.ts +62 -0
- package/src/decay/episodic-promoter.ts +132 -0
- package/src/decay/maintenance-scheduler.ts +326 -0
- package/src/decay/scheduler.ts +6 -0
- package/src/decay/session-sweeper.ts +79 -0
- package/src/decay/types.ts +17 -0
- package/src/freshness/confidence-decay.ts +122 -0
- package/src/freshness/cuckoo-filter.ts +176 -0
- package/src/freshness/deep-validation.ts +345 -0
- package/src/freshness/dirty-tracker.ts +237 -0
- package/src/freshness/file-watcher-layer.ts +119 -0
- package/src/freshness/firewall.ts +64 -0
- package/src/freshness/git-reconcile-layer.ts +161 -0
- package/src/freshness/inverted-index.ts +158 -0
- package/src/freshness/stale-read-layer.ts +222 -0
- package/src/graph/audit.ts +69 -0
- package/src/graph/bridge-db.ts +141 -0
- package/src/graph/communities.ts +195 -0
- package/src/graph/db-interface.ts +259 -0
- package/src/graph/edges.ts +163 -0
- package/src/graph/entities.ts +327 -0
- package/src/graph/episodic-db.ts +113 -0
- package/src/graph/flags.ts +31 -0
- package/src/graph/meta-db.ts +200 -0
- package/src/graph/semantic-db.ts +101 -0
- package/src/graph/session-resume.ts +56 -0
- package/src/graph/snapshots.ts +342 -0
- package/src/graph/staging.ts +151 -0
- package/src/graph/types.ts +128 -0
- package/src/hooks/adapters/claude-code.ts +21 -0
- package/src/hooks/adapters/cline.ts +43 -0
- package/src/hooks/adapters/cursor.ts +65 -0
- package/src/hooks/adapters/generic.ts +12 -0
- package/src/hooks/agent-detect.ts +34 -0
- package/src/hooks/claude-md-directives.ts +32 -0
- package/src/hooks/event-router.ts +182 -0
- package/src/hooks/extractors/pattern-detector.ts +111 -0
- package/src/hooks/handlers/post-compact.ts +30 -0
- package/src/hooks/handlers/post-tool-use.ts +403 -0
- package/src/hooks/handlers/pre-compact.ts +100 -0
- package/src/hooks/handlers/session-end.ts +47 -0
- package/src/hooks/handlers/session-start.ts +154 -0
- package/src/hooks/handlers/stop.ts +128 -0
- package/src/hooks/handlers/user-prompt-submit.ts +68 -0
- package/src/hooks/plugin-branch-switch.ts +68 -0
- package/src/hooks/plugin-common.ts +47 -0
- package/src/hooks/plugin-post-compact.ts +28 -0
- package/src/hooks/plugin-post-tool-use.ts +38 -0
- package/src/hooks/plugin-pre-compact.ts +37 -0
- package/src/hooks/plugin-session-end.ts +37 -0
- package/src/hooks/plugin-session-start.ts +75 -0
- package/src/hooks/plugin-stop.ts +61 -0
- package/src/hooks/plugin-user-prompt-submit.ts +47 -0
- package/src/hooks/types.ts +43 -0
- package/src/knowledge/discovery.ts +238 -0
- package/src/knowledge/external-refs.ts +98 -0
- package/src/knowledge/freshness.ts +221 -0
- package/src/knowledge/ingest.ts +330 -0
- package/src/knowledge/markdown-export.ts +229 -0
- package/src/knowledge/markdown-import.ts +359 -0
- package/src/knowledge/patterns.ts +74 -0
- package/src/knowledge/templates.ts +307 -0
- package/src/llm/ai-sdk-adapter.ts +46 -0
- package/src/llm/config.ts +88 -0
- package/src/llm/cost-tracker.ts +110 -0
- package/src/llm/prompts/extraction.ts +55 -0
- package/src/llm/prompts/summarization.ts +36 -0
- package/src/llm/prompts/validation.ts +37 -0
- package/src/llm/provider-registry.ts +68 -0
- package/src/llm/reliability.ts +179 -0
- package/src/llm/schemas.ts +52 -0
- package/src/mcp/freshness-annotator.ts +69 -0
- package/src/mcp/server.ts +949 -0
- package/src/mcp/tools/sia-ast-query.ts +225 -0
- package/src/mcp/tools/sia-at-time.ts +151 -0
- package/src/mcp/tools/sia-backlinks.ts +87 -0
- package/src/mcp/tools/sia-batch-execute.ts +169 -0
- package/src/mcp/tools/sia-by-file.ts +89 -0
- package/src/mcp/tools/sia-community.ts +113 -0
- package/src/mcp/tools/sia-doctor.ts +73 -0
- package/src/mcp/tools/sia-execute-file.ts +122 -0
- package/src/mcp/tools/sia-execute.ts +104 -0
- package/src/mcp/tools/sia-expand.ts +158 -0
- package/src/mcp/tools/sia-fetch-and-index.ts +241 -0
- package/src/mcp/tools/sia-flag.ts +65 -0
- package/src/mcp/tools/sia-index.ts +111 -0
- package/src/mcp/tools/sia-note.ts +134 -0
- package/src/mcp/tools/sia-search.ts +105 -0
- package/src/mcp/tools/sia-stats.ts +63 -0
- package/src/mcp/tools/sia-sync-status.ts +44 -0
- package/src/mcp/tools/sia-upgrade.ts +247 -0
- package/src/mcp/truncate.ts +231 -0
- package/src/native/bridge.ts +167 -0
- package/src/native/fallback-ast-diff.ts +144 -0
- package/src/native/fallback-graph.ts +325 -0
- package/src/ontology/constraints.ts +56 -0
- package/src/ontology/errors.ts +8 -0
- package/src/ontology/middleware.ts +266 -0
- package/src/retrieval/bm25-search.ts +151 -0
- package/src/retrieval/context-assembly.ts +76 -0
- package/src/retrieval/graph-traversal.ts +168 -0
- package/src/retrieval/pagerank.ts +40 -0
- package/src/retrieval/query-classifier.ts +106 -0
- package/src/retrieval/reranker.ts +156 -0
- package/src/retrieval/search.ts +236 -0
- package/src/retrieval/throttle.ts +102 -0
- package/src/retrieval/vector-search.ts +203 -0
- package/src/retrieval/workspace-search.ts +130 -0
- package/src/sandbox/context-mode.ts +285 -0
- package/src/sandbox/credential-pass.ts +55 -0
- package/src/sandbox/executor.ts +235 -0
- package/src/security/pattern-detector.ts +127 -0
- package/src/security/rule-of-two.ts +50 -0
- package/src/security/sanitize.ts +46 -0
- package/src/security/semantic-consistency.ts +93 -0
- package/src/security/staging-promoter.ts +154 -0
- package/src/shared/config.ts +302 -0
- package/src/shared/diagnostics.ts +210 -0
- package/src/shared/errors.ts +48 -0
- package/src/shared/git-utils.ts +143 -0
- package/src/shared/llm-client.ts +120 -0
- package/src/shared/logger.ts +99 -0
- package/src/shared/types.ts +79 -0
- package/src/sync/client.ts +43 -0
- package/src/sync/conflict.ts +106 -0
- package/src/sync/dedup.ts +183 -0
- package/src/sync/hlc.ts +117 -0
- package/src/sync/keychain.ts +144 -0
- package/src/sync/pull.ts +232 -0
- package/src/sync/push.ts +131 -0
- package/src/types/chokidar.d.ts +23 -0
- package/src/visualization/graph-renderer.ts +312 -0
- package/src/visualization/subgraph-extract.ts +208 -0
- package/src/visualization/views/community-clusters.ts +246 -0
- package/src/visualization/views/dependency-map.ts +189 -0
- package/src/visualization/views/graph-explorer.ts +364 -0
- package/src/visualization/views/timeline.ts +247 -0
- package/src/workspace/api-contracts.ts +226 -0
- package/src/workspace/cross-repo.ts +61 -0
- package/src/workspace/detector.ts +190 -0
- package/src/workspace/manifest.ts +141 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
// Module: scheduler — decides when to run community detection
|
|
2
|
+
|
|
3
|
+
import { detectCommunities } from "@/community/leiden";
|
|
4
|
+
import { buildSummaryTree } from "@/community/raptor";
|
|
5
|
+
import { summarizeCommunities } from "@/community/summarize";
|
|
6
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
7
|
+
import type { SiaConfig } from "@/shared/config";
|
|
8
|
+
import type { LlmClient } from "@/shared/llm-client";
|
|
9
|
+
|
|
10
|
+
async function countActiveEntities(db: SiaDb): Promise<number> {
|
|
11
|
+
const result = await db.execute(
|
|
12
|
+
`SELECT COUNT(*) as count
|
|
13
|
+
FROM graph_nodes
|
|
14
|
+
WHERE t_valid_until IS NULL AND archived_at IS NULL`,
|
|
15
|
+
);
|
|
16
|
+
return Number((result.rows[0] as { count: number }).count ?? 0);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
async function lastRunAt(db: SiaDb): Promise<number> {
|
|
20
|
+
const result = await db.execute("SELECT MAX(updated_at) as ts FROM communities");
|
|
21
|
+
const ts = (result.rows[0] as { ts: number | null }).ts;
|
|
22
|
+
return typeof ts === "number" ? ts : 0;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
async function newEntitiesSince(db: SiaDb, since: number): Promise<number> {
|
|
26
|
+
const result = await db.execute(
|
|
27
|
+
`SELECT COUNT(*) as count
|
|
28
|
+
FROM graph_nodes
|
|
29
|
+
WHERE t_valid_until IS NULL
|
|
30
|
+
AND archived_at IS NULL
|
|
31
|
+
AND (created_at > ? OR t_created > ?)`,
|
|
32
|
+
[since, since],
|
|
33
|
+
);
|
|
34
|
+
return Number((result.rows[0] as { count: number }).count ?? 0);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export async function shouldRunDetection(db: SiaDb, config: SiaConfig): Promise<boolean> {
|
|
38
|
+
const totalEntities = await countActiveEntities(db);
|
|
39
|
+
if (totalEntities < config.communityMinGraphSize) {
|
|
40
|
+
console.warn(
|
|
41
|
+
`Graph has fewer than ${config.communityMinGraphSize} entities (${totalEntities}) — skipping community detection`,
|
|
42
|
+
);
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const lastRun = await lastRunAt(db);
|
|
47
|
+
const fresh = await newEntitiesSince(db, lastRun);
|
|
48
|
+
return fresh > config.communityTriggerNodeCount;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export class CommunityScheduler {
|
|
52
|
+
constructor(
|
|
53
|
+
private readonly db: SiaDb,
|
|
54
|
+
private readonly config: SiaConfig,
|
|
55
|
+
private readonly llmClient?: LlmClient,
|
|
56
|
+
) {}
|
|
57
|
+
|
|
58
|
+
async check(): Promise<boolean> {
|
|
59
|
+
return shouldRunDetection(this.db, this.config);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async run(): Promise<void> {
|
|
63
|
+
const shouldRun = await this.check();
|
|
64
|
+
if (!shouldRun) return;
|
|
65
|
+
|
|
66
|
+
await detectCommunities(this.db);
|
|
67
|
+
await summarizeCommunities(this.db, { airGapped: this.config.airGapped }, this.llmClient);
|
|
68
|
+
await buildSummaryTree(this.db, this.llmClient);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
runInBackground(): void {
|
|
72
|
+
void this.run().catch((err) => console.error("Community detection failed:", err));
|
|
73
|
+
}
|
|
74
|
+
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
// Module: summarize — community summary generation and caching
|
|
2
|
+
|
|
3
|
+
import { createHash } from "node:crypto";
|
|
4
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
5
|
+
import type { LlmClient } from "@/shared/llm-client";
|
|
6
|
+
|
|
7
|
+
interface CommunityRow {
|
|
8
|
+
id: string;
|
|
9
|
+
member_count: number;
|
|
10
|
+
last_summary_member_count: number;
|
|
11
|
+
summary: string | null;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
interface TopEntityRow {
|
|
15
|
+
id: string;
|
|
16
|
+
name: string;
|
|
17
|
+
summary: string;
|
|
18
|
+
importance: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function sha256(value: string): string {
|
|
22
|
+
return createHash("sha256").update(value).digest("hex");
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
async function generateSummary(entities: TopEntityRow[], llmClient?: LlmClient): Promise<string> {
|
|
26
|
+
if (entities.length === 0) {
|
|
27
|
+
return "Community has no active members (all entities invalidated or archived).";
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const entityDescriptions = entities
|
|
31
|
+
.map((e) => `${e.name}: ${e.summary || "No summary available."}`)
|
|
32
|
+
.join("\n");
|
|
33
|
+
|
|
34
|
+
if (!llmClient) {
|
|
35
|
+
return `Top members — ${entityDescriptions.replace(/\n/g, "; ")}`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const prompt = `Summarize this code community in a single coherent paragraph (2-4 sentences). Describe what the community does, how its members relate, and what purpose it serves in the codebase.\n\nMembers:\n${entityDescriptions}`;
|
|
39
|
+
return llmClient.summarize(prompt);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async function loadCommunities(db: SiaDb): Promise<CommunityRow[]> {
|
|
43
|
+
const result = await db.execute(
|
|
44
|
+
`SELECT id, member_count, last_summary_member_count, summary
|
|
45
|
+
FROM communities`,
|
|
46
|
+
);
|
|
47
|
+
return result.rows as unknown as CommunityRow[];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async function topEntities(db: SiaDb, communityId: string): Promise<TopEntityRow[]> {
|
|
51
|
+
const result = await db.execute(
|
|
52
|
+
`SELECT e.id, e.name, e.summary, e.importance
|
|
53
|
+
FROM community_members cm
|
|
54
|
+
JOIN graph_nodes e ON cm.entity_id = e.id
|
|
55
|
+
WHERE cm.community_id = ?
|
|
56
|
+
AND e.t_valid_until IS NULL
|
|
57
|
+
AND e.archived_at IS NULL
|
|
58
|
+
ORDER BY e.importance DESC
|
|
59
|
+
LIMIT 5`,
|
|
60
|
+
[communityId],
|
|
61
|
+
);
|
|
62
|
+
return result.rows as unknown as TopEntityRow[];
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async function memberIds(db: SiaDb, communityId: string): Promise<string[]> {
|
|
66
|
+
const result = await db.execute(
|
|
67
|
+
`SELECT entity_id
|
|
68
|
+
FROM community_members
|
|
69
|
+
WHERE community_id = ?
|
|
70
|
+
ORDER BY entity_id`,
|
|
71
|
+
[communityId],
|
|
72
|
+
);
|
|
73
|
+
return (result.rows as Array<{ entity_id: string }>).map((r) => r.entity_id);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export async function summarizeCommunities(
|
|
77
|
+
db: SiaDb,
|
|
78
|
+
config: { airGapped: boolean },
|
|
79
|
+
llmClient?: LlmClient,
|
|
80
|
+
): Promise<number> {
|
|
81
|
+
if (config.airGapped) {
|
|
82
|
+
return 0;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const communities = await loadCommunities(db);
|
|
86
|
+
let generated = 0;
|
|
87
|
+
const now = Date.now();
|
|
88
|
+
|
|
89
|
+
await db.transaction(async (tx) => {
|
|
90
|
+
for (const community of communities) {
|
|
91
|
+
const changeRatio =
|
|
92
|
+
Math.abs(community.member_count - community.last_summary_member_count) /
|
|
93
|
+
Math.max(community.last_summary_member_count, 1);
|
|
94
|
+
const needsSummary = !community.summary || changeRatio > 0.2;
|
|
95
|
+
if (!needsSummary) continue;
|
|
96
|
+
|
|
97
|
+
const entities = await topEntities(tx, community.id);
|
|
98
|
+
const summary = await generateSummary(entities, llmClient);
|
|
99
|
+
const ids = await memberIds(tx, community.id);
|
|
100
|
+
const summaryHash = sha256(ids.join(","));
|
|
101
|
+
|
|
102
|
+
await tx.execute(
|
|
103
|
+
`UPDATE communities
|
|
104
|
+
SET summary = ?, summary_hash = ?, last_summary_member_count = ?, updated_at = ?
|
|
105
|
+
WHERE id = ?`,
|
|
106
|
+
[summary, summaryHash, community.member_count, now, community.id],
|
|
107
|
+
);
|
|
108
|
+
generated++;
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
return generated;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export type { TopEntityRow };
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
// Module: archiver — soft-archive decayed entities
|
|
2
|
+
|
|
3
|
+
import type { BatchResult } from "@/decay/types";
|
|
4
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
5
|
+
import { archiveEntity } from "@/graph/entities";
|
|
6
|
+
import type { SiaConfig } from "@/shared/config";
|
|
7
|
+
|
|
8
|
+
/** Default batch size for archiveDecayedEntities loop. */
|
|
9
|
+
const DEFAULT_BATCH_SIZE = 50;
|
|
10
|
+
|
|
11
|
+
/** Inactivity window: 90 days in milliseconds. */
|
|
12
|
+
const INACTIVE_DAYS_MS = 90 * 86400000;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Archive a single batch of decayed entities.
|
|
16
|
+
*
|
|
17
|
+
* Selects entities matching ALL conditions:
|
|
18
|
+
* - importance below config.archiveThreshold (default 0.05)
|
|
19
|
+
* - zero edges (isolated node)
|
|
20
|
+
* - not accessed in 90 days
|
|
21
|
+
* - not bi-temporally invalidated (t_valid_until IS NULL)
|
|
22
|
+
* - not already archived (archived_at IS NULL)
|
|
23
|
+
*
|
|
24
|
+
* For each matched entity, calls archiveEntity which sets archived_at ONLY
|
|
25
|
+
* (never t_valid_until or t_expired).
|
|
26
|
+
*/
|
|
27
|
+
export async function archiveBatch(
|
|
28
|
+
db: SiaDb,
|
|
29
|
+
config: SiaConfig,
|
|
30
|
+
batchSize: number,
|
|
31
|
+
): Promise<BatchResult> {
|
|
32
|
+
const cutoff = Date.now() - INACTIVE_DAYS_MS;
|
|
33
|
+
|
|
34
|
+
const { rows } = await db.execute(
|
|
35
|
+
`SELECT id FROM graph_nodes
|
|
36
|
+
WHERE importance < ?
|
|
37
|
+
AND edge_count = 0
|
|
38
|
+
AND last_accessed < ?
|
|
39
|
+
AND t_valid_until IS NULL
|
|
40
|
+
AND archived_at IS NULL
|
|
41
|
+
ORDER BY importance ASC
|
|
42
|
+
LIMIT ?`,
|
|
43
|
+
[config.archiveThreshold, cutoff, batchSize],
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
for (const row of rows) {
|
|
47
|
+
await archiveEntity(db, row.id as string);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const processed = rows.length;
|
|
51
|
+
return { processed, remaining: processed === batchSize };
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Archive all decayed entities across the entire graph.
|
|
56
|
+
*
|
|
57
|
+
* Repeatedly calls archiveBatch in batches of 50 until no more
|
|
58
|
+
* archivable entities remain.
|
|
59
|
+
*
|
|
60
|
+
* Returns the total number of entities archived.
|
|
61
|
+
*/
|
|
62
|
+
export async function archiveDecayedEntities(db: SiaDb, config: SiaConfig): Promise<number> {
|
|
63
|
+
let total = 0;
|
|
64
|
+
let hasMore = true;
|
|
65
|
+
|
|
66
|
+
while (hasMore) {
|
|
67
|
+
const result = await archiveBatch(db, config, DEFAULT_BATCH_SIZE);
|
|
68
|
+
total += result.processed;
|
|
69
|
+
hasMore = result.remaining;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return total;
|
|
73
|
+
}
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
// Module: bridge-orphan-cleanup — invalidate cross-repo edges where source/target no longer active
|
|
2
|
+
|
|
3
|
+
import type { BatchResult } from "@/decay/types";
|
|
4
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Validate that a string is safe to use as a SQLite ATTACH alias.
|
|
8
|
+
* Only allows alphanumeric characters and underscores (no injection vectors).
|
|
9
|
+
*/
|
|
10
|
+
function isSafeAlias(s: string): boolean {
|
|
11
|
+
return /^[a-zA-Z0-9_]+$/.test(s);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Derive a safe ATTACH alias from a repo id (UUID or hash).
|
|
16
|
+
* Strips hyphens and takes the first 16 alphanumeric chars with a prefix.
|
|
17
|
+
*/
|
|
18
|
+
function repoAlias(repoId: string): string {
|
|
19
|
+
const safe = repoId.replace(/[^a-zA-Z0-9]/g, "").slice(0, 16);
|
|
20
|
+
return `peer_${safe}`;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Check whether an entity is "live" in a peer graph.db using ATTACH.
|
|
25
|
+
*
|
|
26
|
+
* Live = exists in entities table, archived_at IS NULL, t_valid_until IS NULL.
|
|
27
|
+
*
|
|
28
|
+
* Returns true if the entity is live, false if it's gone or the repo path isn't known.
|
|
29
|
+
*/
|
|
30
|
+
function checkEntityLivenessViaAttach(
|
|
31
|
+
rawSqlite: {
|
|
32
|
+
prepare: (sql: string) => {
|
|
33
|
+
get: (...args: unknown[]) => unknown;
|
|
34
|
+
run: (...args: unknown[]) => void;
|
|
35
|
+
};
|
|
36
|
+
exec?: (sql: string) => void;
|
|
37
|
+
},
|
|
38
|
+
graphDbPath: string,
|
|
39
|
+
entityId: string,
|
|
40
|
+
alias: string,
|
|
41
|
+
): boolean {
|
|
42
|
+
if (!isSafeAlias(alias)) {
|
|
43
|
+
return true; // be conservative — don't invalidate if alias is unsafe
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
try {
|
|
47
|
+
// ATTACH database names cannot be parameterized in SQLite — use the validated alias
|
|
48
|
+
rawSqlite.prepare(`ATTACH DATABASE ? AS ${alias}`).run(graphDbPath);
|
|
49
|
+
|
|
50
|
+
const row = rawSqlite
|
|
51
|
+
.prepare(
|
|
52
|
+
`SELECT 1 FROM ${alias}.graph_nodes
|
|
53
|
+
WHERE id = ?
|
|
54
|
+
AND archived_at IS NULL
|
|
55
|
+
AND t_valid_until IS NULL`,
|
|
56
|
+
)
|
|
57
|
+
.get(entityId);
|
|
58
|
+
|
|
59
|
+
rawSqlite.prepare(`DETACH DATABASE ${alias}`).run();
|
|
60
|
+
|
|
61
|
+
return row !== undefined && row !== null;
|
|
62
|
+
} catch {
|
|
63
|
+
// If ATTACH fails (e.g., file doesn't exist), try to DETACH and be conservative
|
|
64
|
+
try {
|
|
65
|
+
rawSqlite.prepare(`DETACH DATABASE ${alias}`).run();
|
|
66
|
+
} catch {
|
|
67
|
+
// ignore DETACH error
|
|
68
|
+
}
|
|
69
|
+
return true; // be conservative — don't invalidate if we can't verify
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Find and invalidate orphaned cross-repo edges in bridge.db.
|
|
75
|
+
*
|
|
76
|
+
* An edge is orphaned when its source or target entity is no longer active
|
|
77
|
+
* in the respective repo's graph.db. We ATTACH each peer's graph.db to
|
|
78
|
+
* check entity liveness, then invalidate dead edges.
|
|
79
|
+
*
|
|
80
|
+
* When `metaDb` is provided AND `bridgeDb.rawSqlite()` returns a handle,
|
|
81
|
+
* this function uses SQLite's ATTACH to check entity liveness in each peer
|
|
82
|
+
* repo's graph.db. The graph.db path is derived from the repo's `path` column
|
|
83
|
+
* in metaDb's repos table (the path IS the graph.db file path as registered).
|
|
84
|
+
*
|
|
85
|
+
* Falls back to the simplified null-endpoint check when ATTACH isn't available
|
|
86
|
+
* (e.g., LibSqlDb where rawSqlite() returns null).
|
|
87
|
+
*/
|
|
88
|
+
export async function bridgeOrphanBatch(
|
|
89
|
+
bridgeDb: SiaDb,
|
|
90
|
+
batchSize: number,
|
|
91
|
+
metaDb?: SiaDb,
|
|
92
|
+
): Promise<BatchResult> {
|
|
93
|
+
// Get active cross-repo edges that might be orphaned
|
|
94
|
+
const { rows } = await bridgeDb.execute(
|
|
95
|
+
`SELECT id, source_repo_id, source_entity_id, target_repo_id, target_entity_id
|
|
96
|
+
FROM cross_repo_edges
|
|
97
|
+
WHERE t_valid_until IS NULL
|
|
98
|
+
LIMIT ?`,
|
|
99
|
+
[batchSize],
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
if (rows.length === 0) {
|
|
103
|
+
return { processed: 0, remaining: false };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
let processed = 0;
|
|
107
|
+
const now = Date.now();
|
|
108
|
+
|
|
109
|
+
// Determine if we can use ATTACH-based verification
|
|
110
|
+
const rawSqlite = bridgeDb.rawSqlite();
|
|
111
|
+
const canUseAttach = rawSqlite !== null && metaDb !== undefined;
|
|
112
|
+
|
|
113
|
+
// Cache repo paths looked up from metaDb to avoid repeated queries
|
|
114
|
+
const repoPathCache = new Map<string, string | null>();
|
|
115
|
+
|
|
116
|
+
async function getRepoPath(repoId: string): Promise<string | null> {
|
|
117
|
+
if (repoPathCache.has(repoId)) {
|
|
118
|
+
return repoPathCache.get(repoId) ?? null;
|
|
119
|
+
}
|
|
120
|
+
const result = await metaDb?.execute("SELECT path FROM repos WHERE id = ?", [repoId]);
|
|
121
|
+
const path = (result?.rows[0]?.path as string) ?? null;
|
|
122
|
+
repoPathCache.set(repoId, path);
|
|
123
|
+
return path;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
for (const row of rows) {
|
|
127
|
+
const edgeId = row.id as string;
|
|
128
|
+
const sourceId = row.source_entity_id as string;
|
|
129
|
+
const targetId = row.target_entity_id as string;
|
|
130
|
+
const sourceRepoId = row.source_repo_id as string;
|
|
131
|
+
const targetRepoId = row.target_repo_id as string;
|
|
132
|
+
|
|
133
|
+
// Check if source/target are null or empty — these are definitely orphaned
|
|
134
|
+
if (!sourceId || !targetId) {
|
|
135
|
+
await bridgeDb.execute(
|
|
136
|
+
"UPDATE cross_repo_edges SET t_valid_until = ?, t_expired = ? WHERE id = ?",
|
|
137
|
+
[now, now, edgeId],
|
|
138
|
+
);
|
|
139
|
+
processed++;
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (canUseAttach) {
|
|
144
|
+
// ATTACH-based liveness verification
|
|
145
|
+
const sourceRepoPath = await getRepoPath(sourceRepoId);
|
|
146
|
+
const targetRepoPath = await getRepoPath(targetRepoId);
|
|
147
|
+
|
|
148
|
+
let isOrphan = false;
|
|
149
|
+
|
|
150
|
+
if (sourceRepoPath) {
|
|
151
|
+
const sourceAlias = repoAlias(sourceRepoId);
|
|
152
|
+
const sourceLive = checkEntityLivenessViaAttach(
|
|
153
|
+
rawSqlite as Parameters<typeof checkEntityLivenessViaAttach>[0],
|
|
154
|
+
sourceRepoPath,
|
|
155
|
+
sourceId,
|
|
156
|
+
sourceAlias,
|
|
157
|
+
);
|
|
158
|
+
if (!sourceLive) {
|
|
159
|
+
isOrphan = true;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (!isOrphan && targetRepoPath) {
|
|
164
|
+
const targetAlias = repoAlias(targetRepoId);
|
|
165
|
+
// If source and target are in the same repo, generate a distinct alias
|
|
166
|
+
const sourceAlias = repoAlias(sourceRepoId);
|
|
167
|
+
const targetAliasResolved = targetAlias === sourceAlias ? `${targetAlias}t` : targetAlias;
|
|
168
|
+
|
|
169
|
+
const targetLive = checkEntityLivenessViaAttach(
|
|
170
|
+
rawSqlite as Parameters<typeof checkEntityLivenessViaAttach>[0],
|
|
171
|
+
targetRepoPath,
|
|
172
|
+
targetId,
|
|
173
|
+
targetAliasResolved,
|
|
174
|
+
);
|
|
175
|
+
if (!targetLive) {
|
|
176
|
+
isOrphan = true;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (isOrphan) {
|
|
181
|
+
await bridgeDb.execute(
|
|
182
|
+
"UPDATE cross_repo_edges SET t_valid_until = ?, t_expired = ? WHERE id = ?",
|
|
183
|
+
[now, now, edgeId],
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Mark as processed
|
|
189
|
+
processed++;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return { processed, remaining: processed === batchSize };
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Full cleanup pass: invalidate all orphaned cross-repo edges.
|
|
197
|
+
* Processes in batches of 50.
|
|
198
|
+
*
|
|
199
|
+
* Optionally accepts a `metaDb` to enable ATTACH-based entity liveness
|
|
200
|
+
* verification against peer graph.db files.
|
|
201
|
+
*/
|
|
202
|
+
export async function cleanupBridgeOrphans(bridgeDb: SiaDb, metaDb?: SiaDb): Promise<number> {
|
|
203
|
+
let total = 0;
|
|
204
|
+
|
|
205
|
+
for (;;) {
|
|
206
|
+
const { processed, remaining } = await bridgeOrphanBatch(bridgeDb, 50, metaDb);
|
|
207
|
+
total += processed;
|
|
208
|
+
if (!remaining) break;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
return total;
|
|
212
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
// Module: consolidation-sweep — maintenance dedup of similar entities
|
|
2
|
+
|
|
3
|
+
import { wordJaccard } from "@/capture/consolidate";
|
|
4
|
+
import type { BatchResult } from "@/decay/types";
|
|
5
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
6
|
+
|
|
7
|
+
interface ActiveEntity {
|
|
8
|
+
id: string;
|
|
9
|
+
type: string;
|
|
10
|
+
name: string;
|
|
11
|
+
content: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Ensure canonical ordering: the smaller ID is always first.
|
|
16
|
+
*/
|
|
17
|
+
function canonicalPair(idA: string, idB: string): [string, string] {
|
|
18
|
+
return idA < idB ? [idA, idB] : [idB, idA];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Process a batch of entity pairs for deduplication.
|
|
23
|
+
*
|
|
24
|
+
* Finds active entity pairs of the same type that have not yet been checked,
|
|
25
|
+
* computes word Jaccard similarity, and records a decision in local_dedup_log:
|
|
26
|
+
* - > 0.92 => 'merged'
|
|
27
|
+
* - > 0.5 => 'related'
|
|
28
|
+
* - else => 'different'
|
|
29
|
+
*/
|
|
30
|
+
export async function consolidationSweepBatch(db: SiaDb, batchSize: number): Promise<BatchResult> {
|
|
31
|
+
// 1. Fetch all active entities grouped by type, name
|
|
32
|
+
const { rows } = await db.execute(
|
|
33
|
+
`SELECT id, type, name, content FROM graph_nodes
|
|
34
|
+
WHERE t_valid_until IS NULL AND archived_at IS NULL
|
|
35
|
+
ORDER BY type, name`,
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
const entities = rows as unknown as ActiveEntity[];
|
|
39
|
+
|
|
40
|
+
// 2. Group entities by type
|
|
41
|
+
const byType = new Map<string, ActiveEntity[]>();
|
|
42
|
+
for (const entity of entities) {
|
|
43
|
+
let group = byType.get(entity.type);
|
|
44
|
+
if (!group) {
|
|
45
|
+
group = [];
|
|
46
|
+
byType.set(entity.type, group);
|
|
47
|
+
}
|
|
48
|
+
group.push(entity);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// 3. Iterate pairs within each type group, up to batchSize
|
|
52
|
+
let pairsProcessed = 0;
|
|
53
|
+
|
|
54
|
+
for (const group of byType.values()) {
|
|
55
|
+
if (pairsProcessed >= batchSize) break;
|
|
56
|
+
|
|
57
|
+
for (let i = 0; i < group.length && pairsProcessed < batchSize; i++) {
|
|
58
|
+
for (let j = i + 1; j < group.length && pairsProcessed < batchSize; j++) {
|
|
59
|
+
const [aId, bId] = canonicalPair(group[i].id, group[j].id);
|
|
60
|
+
|
|
61
|
+
// Check if this pair was already processed
|
|
62
|
+
const existing = await db.execute(
|
|
63
|
+
"SELECT 1 FROM local_dedup_log WHERE entity_a_id = ? AND entity_b_id = ?",
|
|
64
|
+
[aId, bId],
|
|
65
|
+
);
|
|
66
|
+
|
|
67
|
+
if (existing.rows.length > 0) continue;
|
|
68
|
+
|
|
69
|
+
// Compute similarity
|
|
70
|
+
const similarity = wordJaccard(group[i].content, group[j].content);
|
|
71
|
+
|
|
72
|
+
let decision: string;
|
|
73
|
+
if (similarity > 0.92) {
|
|
74
|
+
decision = "merged";
|
|
75
|
+
} else if (similarity > 0.5) {
|
|
76
|
+
decision = "related";
|
|
77
|
+
} else {
|
|
78
|
+
decision = "different";
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const now = Date.now();
|
|
82
|
+
await db.execute(
|
|
83
|
+
"INSERT INTO local_dedup_log (entity_a_id, entity_b_id, decision, checked_at) VALUES (?, ?, ?, ?)",
|
|
84
|
+
[aId, bId, decision, now],
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
pairsProcessed++;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return { processed: pairsProcessed, remaining: pairsProcessed === batchSize };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Run the full consolidation sweep across all entity pairs.
|
|
97
|
+
* Processes in batches of 50 until no work remains.
|
|
98
|
+
* Returns the total number of pairs processed.
|
|
99
|
+
*/
|
|
100
|
+
export async function runConsolidationSweep(db: SiaDb): Promise<number> {
|
|
101
|
+
const BATCH_SIZE = 50;
|
|
102
|
+
let total = 0;
|
|
103
|
+
let remaining = true;
|
|
104
|
+
|
|
105
|
+
while (remaining) {
|
|
106
|
+
const result = await consolidationSweepBatch(db, BATCH_SIZE);
|
|
107
|
+
total += result.processed;
|
|
108
|
+
remaining = result.remaining;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return total;
|
|
112
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
// Module: decay — importance decay batch processing
|
|
2
|
+
|
|
3
|
+
import type { BatchResult, DecayResult } from "@/decay/types";
|
|
4
|
+
import { computeConfidence } from "@/freshness/confidence-decay";
|
|
5
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
6
|
+
import type { Entity } from "@/graph/entities";
|
|
7
|
+
import { updateEntity } from "@/graph/entities";
|
|
8
|
+
import type { DecayHalfLife, SiaConfig } from "@/shared/config";
|
|
9
|
+
|
|
10
|
+
export type { BatchResult, DecayResult };
|
|
11
|
+
|
|
12
|
+
const BATCH_SIZE = 500;
|
|
13
|
+
const MS_PER_DAY = 86_400_000;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Compute decayed importance for a single entity.
|
|
17
|
+
*
|
|
18
|
+
* Formula:
|
|
19
|
+
* daysSinceAccess = (now - entity.last_accessed) / 86400000
|
|
20
|
+
* halfLife = config.decayHalfLife[entity.type] ?? config.decayHalfLife.default
|
|
21
|
+
* decayFactor = 0.5 ^ (daysSinceAccess / halfLife)
|
|
22
|
+
* edgeBoost = min(entity.edge_count * 0.02, 0.3)
|
|
23
|
+
* newImportance = max(entity.base_importance * decayFactor + edgeBoost, 0.01)
|
|
24
|
+
*
|
|
25
|
+
* Highly-connected entities (edge_count > 20) never drop below 0.25.
|
|
26
|
+
*/
|
|
27
|
+
function computeDecayedImportance(entity: Entity, config: SiaConfig, now: number): number {
|
|
28
|
+
const daysSinceAccess = (now - entity.last_accessed) / MS_PER_DAY;
|
|
29
|
+
const halfLife =
|
|
30
|
+
config.decayHalfLife[entity.type as keyof DecayHalfLife] ?? config.decayHalfLife.default;
|
|
31
|
+
const decayFactor = 0.5 ** (daysSinceAccess / halfLife);
|
|
32
|
+
const edgeBoost = Math.min(entity.edge_count * 0.02, 0.3);
|
|
33
|
+
|
|
34
|
+
let newImportance = Math.max(entity.base_importance * decayFactor + edgeBoost, 0.01);
|
|
35
|
+
|
|
36
|
+
// Highly-connected entities never drop below 0.25
|
|
37
|
+
if (entity.edge_count > 20 && newImportance < 0.25) {
|
|
38
|
+
newImportance = 0.25;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return newImportance;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Process a single batch of entities for importance decay.
|
|
46
|
+
*
|
|
47
|
+
* Queries active, non-invalidated entities ordered by least-recently accessed,
|
|
48
|
+
* applies the decay formula, and updates each entity's importance. Entities
|
|
49
|
+
* whose importance falls below `config.archiveThreshold` are archived.
|
|
50
|
+
*/
|
|
51
|
+
export async function decayBatch(
|
|
52
|
+
db: SiaDb,
|
|
53
|
+
config: SiaConfig,
|
|
54
|
+
batchSize: number,
|
|
55
|
+
offset: number,
|
|
56
|
+
): Promise<BatchResult> {
|
|
57
|
+
const result = await db.execute(
|
|
58
|
+
`SELECT * FROM graph_nodes
|
|
59
|
+
WHERE archived_at IS NULL AND t_valid_until IS NULL
|
|
60
|
+
ORDER BY last_accessed ASC
|
|
61
|
+
LIMIT ? OFFSET ?`,
|
|
62
|
+
[batchSize, offset],
|
|
63
|
+
);
|
|
64
|
+
|
|
65
|
+
const entities = result.rows as unknown as Entity[];
|
|
66
|
+
const now = Date.now();
|
|
67
|
+
|
|
68
|
+
for (const entity of entities) {
|
|
69
|
+
const newImportance = computeDecayedImportance(entity, config, now);
|
|
70
|
+
const daysSinceAccess = (now - entity.last_accessed) / MS_PER_DAY;
|
|
71
|
+
|
|
72
|
+
const newConfidence = computeConfidence(
|
|
73
|
+
entity.base_confidence,
|
|
74
|
+
entity.trust_tier as 1 | 2 | 3 | 4,
|
|
75
|
+
entity.type,
|
|
76
|
+
daysSinceAccess,
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
const updates: Record<string, unknown> = { importance: newImportance };
|
|
80
|
+
if (Math.abs(newConfidence - entity.confidence) > 0.01) {
|
|
81
|
+
updates.confidence = newConfidence;
|
|
82
|
+
}
|
|
83
|
+
await updateEntity(db, entity.id, updates);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
processed: entities.length,
|
|
88
|
+
remaining: entities.length === batchSize,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Run importance decay across all active entities.
|
|
94
|
+
*
|
|
95
|
+
* Iterates through the full set in batches of 500, applying `decayBatch`
|
|
96
|
+
* to each page. Returns the total number of entities processed and the
|
|
97
|
+
* wall-clock duration.
|
|
98
|
+
*/
|
|
99
|
+
export async function decayImportance(db: SiaDb, config: SiaConfig): Promise<DecayResult> {
|
|
100
|
+
const start = Date.now();
|
|
101
|
+
let total = 0;
|
|
102
|
+
let offset = 0;
|
|
103
|
+
let hasMore = true;
|
|
104
|
+
|
|
105
|
+
while (hasMore) {
|
|
106
|
+
const batch = await decayBatch(db, config, BATCH_SIZE, offset);
|
|
107
|
+
total += batch.processed;
|
|
108
|
+
hasMore = batch.remaining;
|
|
109
|
+
offset += BATCH_SIZE;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
processed: total,
|
|
114
|
+
durationMs: Date.now() - start,
|
|
115
|
+
};
|
|
116
|
+
}
|