@rkarim08/sia 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +35 -0
- package/.claude-plugin/plugin.json +27 -0
- package/.mcp.json +13 -0
- package/CLAUDE.md +226 -0
- package/LICENSE +202 -0
- package/PLUGIN_README.md +253 -0
- package/README.md +1013 -0
- package/agents/sia-changelog-writer.md +89 -0
- package/agents/sia-code-reviewer.md +86 -0
- package/agents/sia-conflict-resolver.md +100 -0
- package/agents/sia-convention-enforcer.md +69 -0
- package/agents/sia-debug.md +106 -0
- package/agents/sia-decision-reviewer.md +101 -0
- package/agents/sia-dependency-tracker.md +80 -0
- package/agents/sia-explain.md +126 -0
- package/agents/sia-feature.md +116 -0
- package/agents/sia-knowledge-capture.md +117 -0
- package/agents/sia-lead-architecture-advisor.md +93 -0
- package/agents/sia-lead-team-health.md +107 -0
- package/agents/sia-migration.md +100 -0
- package/agents/sia-onboarding.md +115 -0
- package/agents/sia-orientation.md +99 -0
- package/agents/sia-pm-briefing.md +106 -0
- package/agents/sia-pm-risk-advisor.md +82 -0
- package/agents/sia-qa-analyst.md +116 -0
- package/agents/sia-qa-regression-map.md +94 -0
- package/agents/sia-refactor.md +115 -0
- package/agents/sia-regression.md +112 -0
- package/agents/sia-security-audit.md +125 -0
- package/agents/sia-test-advisor.md +91 -0
- package/hooks/hooks.json +98 -0
- package/migrations/bridge/001_initial.sql +34 -0
- package/migrations/episodic/001_initial.sql +35 -0
- package/migrations/meta/001_initial.sql +68 -0
- package/migrations/semantic/001_initial.sql +292 -0
- package/migrations/semantic/002_ontology.sql +89 -0
- package/migrations/semantic/003_freshness.sql +63 -0
- package/migrations/semantic/004_v5_unified_schema.sql +194 -0
- package/migrations/semantic/005_backfill_event_kinds.sql +8 -0
- package/migrations/semantic/006_tree_sitter.sql +6 -0
- package/migrations/semantic/007_branch_snapshots.sql +22 -0
- package/package.json +110 -0
- package/scripts/branch-switch.sh +13 -0
- package/scripts/build-wasm-grammars.sh +81 -0
- package/scripts/post-compact.sh +8 -0
- package/scripts/post-tool-use.sh +10 -0
- package/scripts/pre-compact.sh +8 -0
- package/scripts/session-end.sh +8 -0
- package/scripts/session-start.sh +8 -0
- package/scripts/start-mcp.ts +45 -0
- package/scripts/stop-hook.sh +8 -0
- package/scripts/user-prompt-submit.sh +8 -0
- package/scripts/viz-server.ts +152 -0
- package/skills/sia-brainstorm/SKILL.md +156 -0
- package/skills/sia-brainstorm/scripts/frame-template.html +214 -0
- package/skills/sia-brainstorm/scripts/helper.js +95 -0
- package/skills/sia-brainstorm/scripts/server.cjs +338 -0
- package/skills/sia-brainstorm/scripts/start-server.sh +153 -0
- package/skills/sia-brainstorm/scripts/stop-server.sh +55 -0
- package/skills/sia-brainstorm/spec-document-reviewer-prompt.md +49 -0
- package/skills/sia-brainstorm/visual-companion.md +286 -0
- package/skills/sia-capture/SKILL.md +64 -0
- package/skills/sia-compare/SKILL.md +33 -0
- package/skills/sia-conflicts/SKILL.md +38 -0
- package/skills/sia-debug-workflow/SKILL.md +120 -0
- package/skills/sia-debug-workflow/root-cause-tracing.md +70 -0
- package/skills/sia-debug-workflow/scripts/find-polluter.sh +64 -0
- package/skills/sia-debug-workflow/temporal-investigation.md +72 -0
- package/skills/sia-digest/SKILL.md +23 -0
- package/skills/sia-dispatch/SKILL.md +69 -0
- package/skills/sia-dispatch/agent-task-template.md +99 -0
- package/skills/sia-doctor/SKILL.md +39 -0
- package/skills/sia-execute/SKILL.md +70 -0
- package/skills/sia-execute-plan/SKILL.md +85 -0
- package/skills/sia-export-import/SKILL.md +49 -0
- package/skills/sia-export-knowledge/SKILL.md +46 -0
- package/skills/sia-finish/SKILL.md +100 -0
- package/skills/sia-finish/pr-summary-template.md +54 -0
- package/skills/sia-freshness/SKILL.md +38 -0
- package/skills/sia-history/SKILL.md +42 -0
- package/skills/sia-impact/SKILL.md +70 -0
- package/skills/sia-index/SKILL.md +54 -0
- package/skills/sia-install/SKILL.md +39 -0
- package/skills/sia-lead-compliance/SKILL.md +16 -0
- package/skills/sia-lead-drift-report/SKILL.md +16 -0
- package/skills/sia-lead-knowledge-map/SKILL.md +16 -0
- package/skills/sia-learn/SKILL.md +58 -0
- package/skills/sia-plan/SKILL.md +68 -0
- package/skills/sia-plan/plan-reviewer-prompt.md +63 -0
- package/skills/sia-playbooks/SKILL.md +29 -0
- package/skills/sia-playbooks/reference-feature.md +100 -0
- package/skills/sia-playbooks/reference-flagging.md +50 -0
- package/skills/sia-playbooks/reference-orientation.md +92 -0
- package/skills/sia-playbooks/reference-regression.md +115 -0
- package/skills/sia-playbooks/reference-review.md +64 -0
- package/skills/sia-playbooks/reference-tools.md +239 -0
- package/skills/sia-pm-decision-log/SKILL.md +28 -0
- package/skills/sia-pm-risk-dashboard/SKILL.md +24 -0
- package/skills/sia-pm-sprint-summary/SKILL.md +27 -0
- package/skills/sia-prune/SKILL.md +45 -0
- package/skills/sia-qa-coverage/SKILL.md +28 -0
- package/skills/sia-qa-flaky/SKILL.md +20 -0
- package/skills/sia-qa-report/SKILL.md +26 -0
- package/skills/sia-reindex/SKILL.md +30 -0
- package/skills/sia-review-respond/SKILL.md +88 -0
- package/skills/sia-review-respond/pushback-patterns.md +90 -0
- package/skills/sia-search/SKILL.md +47 -0
- package/skills/sia-setup/SKILL.md +82 -0
- package/skills/sia-setup/setup-checklist.md +97 -0
- package/skills/sia-stats/SKILL.md +36 -0
- package/skills/sia-status/SKILL.md +44 -0
- package/skills/sia-sync/SKILL.md +46 -0
- package/skills/sia-team/SKILL.md +64 -0
- package/skills/sia-test/SKILL.md +92 -0
- package/skills/sia-test/testing-anti-patterns.md +104 -0
- package/skills/sia-tour/SKILL.md +29 -0
- package/skills/sia-upgrade/SKILL.md +43 -0
- package/skills/sia-verify/SKILL.md +81 -0
- package/skills/sia-visualize/SKILL.md +28 -0
- package/skills/sia-visualize-live/SKILL.md +55 -0
- package/skills/sia-visualize-live/scripts/graph-template.html +389 -0
- package/skills/sia-visualize-live/scripts/start-visualizer.sh +161 -0
- package/skills/sia-visualize-live/scripts/stop-visualizer.sh +55 -0
- package/skills/sia-visualize-live/scripts/visualizer-server.cjs +264 -0
- package/skills/sia-workspace/SKILL.md +57 -0
- package/src/agent/claude-md-template-flagging.md +219 -0
- package/src/agent/claude-md-template.md +213 -0
- package/src/agent/modules/sia-feature.md +100 -0
- package/src/agent/modules/sia-flagging.md +50 -0
- package/src/agent/modules/sia-orientation.md +92 -0
- package/src/agent/modules/sia-regression.md +115 -0
- package/src/agent/modules/sia-review.md +64 -0
- package/src/agent/modules/sia-tools.md +239 -0
- package/src/ast/extractors/c-include.ts +189 -0
- package/src/ast/extractors/csharp-project.ts +260 -0
- package/src/ast/extractors/prisma-schema.ts +44 -0
- package/src/ast/extractors/project-manifest.ts +111 -0
- package/src/ast/extractors/sql-schema.ts +67 -0
- package/src/ast/extractors/tier-a.ts +423 -0
- package/src/ast/extractors/tier-b.ts +289 -0
- package/src/ast/extractors/tier-dispatch.ts +247 -0
- package/src/ast/index-worker.ts +108 -0
- package/src/ast/indexer.ts +484 -0
- package/src/ast/languages.ts +408 -0
- package/src/ast/pagerank-builder.ts +125 -0
- package/src/ast/path-utils.ts +137 -0
- package/src/ast/tree-sitter/backends/native.ts +57 -0
- package/src/ast/tree-sitter/backends/wasm.ts +39 -0
- package/src/ast/tree-sitter/call-walker.ts +44 -0
- package/src/ast/tree-sitter/edit-computer.ts +55 -0
- package/src/ast/tree-sitter/query-runner.ts +46 -0
- package/src/ast/tree-sitter/service.ts +174 -0
- package/src/ast/tree-sitter/tree-cache.ts +39 -0
- package/src/ast/tree-sitter/types.ts +79 -0
- package/src/ast/watcher.ts +322 -0
- package/src/capture/chunker.ts +169 -0
- package/src/capture/consolidate.ts +127 -0
- package/src/capture/edge-inferrer.ts +161 -0
- package/src/capture/embedder.ts +166 -0
- package/src/capture/embedding-cache.ts +73 -0
- package/src/capture/flag-processor.ts +64 -0
- package/src/capture/hook.ts +67 -0
- package/src/capture/pipeline.ts +450 -0
- package/src/capture/prompts/consolidate.ts +25 -0
- package/src/capture/prompts/edge-infer.ts +29 -0
- package/src/capture/prompts/extract-flagged.ts +36 -0
- package/src/capture/prompts/extract.ts +42 -0
- package/src/capture/tokenizer.ts +147 -0
- package/src/capture/track-a-ast.ts +93 -0
- package/src/capture/track-b-llm.ts +149 -0
- package/src/capture/types.ts +64 -0
- package/src/cli/commands/community.ts +137 -0
- package/src/cli/commands/compare.ts +123 -0
- package/src/cli/commands/conflicts.ts +41 -0
- package/src/cli/commands/digest.ts +197 -0
- package/src/cli/commands/disable-flagging.ts +34 -0
- package/src/cli/commands/doctor.ts +240 -0
- package/src/cli/commands/download-model.ts +161 -0
- package/src/cli/commands/enable-flagging.ts +34 -0
- package/src/cli/commands/export-knowledge.ts +208 -0
- package/src/cli/commands/export.ts +85 -0
- package/src/cli/commands/freshness.ts +164 -0
- package/src/cli/commands/graph.ts +51 -0
- package/src/cli/commands/history.ts +139 -0
- package/src/cli/commands/import.ts +335 -0
- package/src/cli/commands/install.ts +156 -0
- package/src/cli/commands/lead-report.ts +241 -0
- package/src/cli/commands/learn.ts +321 -0
- package/src/cli/commands/pm-report.ts +413 -0
- package/src/cli/commands/prune.ts +75 -0
- package/src/cli/commands/qa-report.ts +278 -0
- package/src/cli/commands/reindex.ts +104 -0
- package/src/cli/commands/rollback.ts +70 -0
- package/src/cli/commands/search.ts +103 -0
- package/src/cli/commands/server.ts +91 -0
- package/src/cli/commands/share.ts +33 -0
- package/src/cli/commands/stats.ts +79 -0
- package/src/cli/commands/status.ts +176 -0
- package/src/cli/commands/sync.ts +96 -0
- package/src/cli/commands/team.ts +118 -0
- package/src/cli/commands/tour.ts +157 -0
- package/src/cli/commands/visualize-live.ts +162 -0
- package/src/cli/commands/workspace.ts +117 -0
- package/src/cli/index.ts +424 -0
- package/src/cli/learn-progress.ts +87 -0
- package/src/community/detection-bridge.ts +344 -0
- package/src/community/leiden.ts +462 -0
- package/src/community/raptor.ts +210 -0
- package/src/community/scheduler.ts +74 -0
- package/src/community/summarize.ts +115 -0
- package/src/decay/archiver.ts +73 -0
- package/src/decay/bridge-orphan-cleanup.ts +212 -0
- package/src/decay/consolidation-sweep.ts +112 -0
- package/src/decay/decay.ts +116 -0
- package/src/decay/deep-validator.ts +62 -0
- package/src/decay/episodic-promoter.ts +132 -0
- package/src/decay/maintenance-scheduler.ts +326 -0
- package/src/decay/scheduler.ts +6 -0
- package/src/decay/session-sweeper.ts +79 -0
- package/src/decay/types.ts +17 -0
- package/src/freshness/confidence-decay.ts +122 -0
- package/src/freshness/cuckoo-filter.ts +176 -0
- package/src/freshness/deep-validation.ts +345 -0
- package/src/freshness/dirty-tracker.ts +237 -0
- package/src/freshness/file-watcher-layer.ts +119 -0
- package/src/freshness/firewall.ts +64 -0
- package/src/freshness/git-reconcile-layer.ts +161 -0
- package/src/freshness/inverted-index.ts +158 -0
- package/src/freshness/stale-read-layer.ts +222 -0
- package/src/graph/audit.ts +69 -0
- package/src/graph/bridge-db.ts +141 -0
- package/src/graph/communities.ts +195 -0
- package/src/graph/db-interface.ts +259 -0
- package/src/graph/edges.ts +163 -0
- package/src/graph/entities.ts +327 -0
- package/src/graph/episodic-db.ts +113 -0
- package/src/graph/flags.ts +31 -0
- package/src/graph/meta-db.ts +200 -0
- package/src/graph/semantic-db.ts +101 -0
- package/src/graph/session-resume.ts +56 -0
- package/src/graph/snapshots.ts +342 -0
- package/src/graph/staging.ts +151 -0
- package/src/graph/types.ts +128 -0
- package/src/hooks/adapters/claude-code.ts +21 -0
- package/src/hooks/adapters/cline.ts +43 -0
- package/src/hooks/adapters/cursor.ts +65 -0
- package/src/hooks/adapters/generic.ts +12 -0
- package/src/hooks/agent-detect.ts +34 -0
- package/src/hooks/claude-md-directives.ts +32 -0
- package/src/hooks/event-router.ts +182 -0
- package/src/hooks/extractors/pattern-detector.ts +111 -0
- package/src/hooks/handlers/post-compact.ts +30 -0
- package/src/hooks/handlers/post-tool-use.ts +403 -0
- package/src/hooks/handlers/pre-compact.ts +100 -0
- package/src/hooks/handlers/session-end.ts +47 -0
- package/src/hooks/handlers/session-start.ts +154 -0
- package/src/hooks/handlers/stop.ts +128 -0
- package/src/hooks/handlers/user-prompt-submit.ts +68 -0
- package/src/hooks/plugin-branch-switch.ts +68 -0
- package/src/hooks/plugin-common.ts +47 -0
- package/src/hooks/plugin-post-compact.ts +28 -0
- package/src/hooks/plugin-post-tool-use.ts +38 -0
- package/src/hooks/plugin-pre-compact.ts +37 -0
- package/src/hooks/plugin-session-end.ts +37 -0
- package/src/hooks/plugin-session-start.ts +75 -0
- package/src/hooks/plugin-stop.ts +61 -0
- package/src/hooks/plugin-user-prompt-submit.ts +47 -0
- package/src/hooks/types.ts +43 -0
- package/src/knowledge/discovery.ts +238 -0
- package/src/knowledge/external-refs.ts +98 -0
- package/src/knowledge/freshness.ts +221 -0
- package/src/knowledge/ingest.ts +330 -0
- package/src/knowledge/markdown-export.ts +229 -0
- package/src/knowledge/markdown-import.ts +359 -0
- package/src/knowledge/patterns.ts +74 -0
- package/src/knowledge/templates.ts +307 -0
- package/src/llm/ai-sdk-adapter.ts +46 -0
- package/src/llm/config.ts +88 -0
- package/src/llm/cost-tracker.ts +110 -0
- package/src/llm/prompts/extraction.ts +55 -0
- package/src/llm/prompts/summarization.ts +36 -0
- package/src/llm/prompts/validation.ts +37 -0
- package/src/llm/provider-registry.ts +68 -0
- package/src/llm/reliability.ts +179 -0
- package/src/llm/schemas.ts +52 -0
- package/src/mcp/freshness-annotator.ts +69 -0
- package/src/mcp/server.ts +949 -0
- package/src/mcp/tools/sia-ast-query.ts +225 -0
- package/src/mcp/tools/sia-at-time.ts +151 -0
- package/src/mcp/tools/sia-backlinks.ts +87 -0
- package/src/mcp/tools/sia-batch-execute.ts +169 -0
- package/src/mcp/tools/sia-by-file.ts +89 -0
- package/src/mcp/tools/sia-community.ts +113 -0
- package/src/mcp/tools/sia-doctor.ts +73 -0
- package/src/mcp/tools/sia-execute-file.ts +122 -0
- package/src/mcp/tools/sia-execute.ts +104 -0
- package/src/mcp/tools/sia-expand.ts +158 -0
- package/src/mcp/tools/sia-fetch-and-index.ts +241 -0
- package/src/mcp/tools/sia-flag.ts +65 -0
- package/src/mcp/tools/sia-index.ts +111 -0
- package/src/mcp/tools/sia-note.ts +134 -0
- package/src/mcp/tools/sia-search.ts +105 -0
- package/src/mcp/tools/sia-stats.ts +63 -0
- package/src/mcp/tools/sia-sync-status.ts +44 -0
- package/src/mcp/tools/sia-upgrade.ts +247 -0
- package/src/mcp/truncate.ts +231 -0
- package/src/native/bridge.ts +167 -0
- package/src/native/fallback-ast-diff.ts +144 -0
- package/src/native/fallback-graph.ts +325 -0
- package/src/ontology/constraints.ts +56 -0
- package/src/ontology/errors.ts +8 -0
- package/src/ontology/middleware.ts +266 -0
- package/src/retrieval/bm25-search.ts +151 -0
- package/src/retrieval/context-assembly.ts +76 -0
- package/src/retrieval/graph-traversal.ts +168 -0
- package/src/retrieval/pagerank.ts +40 -0
- package/src/retrieval/query-classifier.ts +106 -0
- package/src/retrieval/reranker.ts +156 -0
- package/src/retrieval/search.ts +236 -0
- package/src/retrieval/throttle.ts +102 -0
- package/src/retrieval/vector-search.ts +203 -0
- package/src/retrieval/workspace-search.ts +130 -0
- package/src/sandbox/context-mode.ts +285 -0
- package/src/sandbox/credential-pass.ts +55 -0
- package/src/sandbox/executor.ts +235 -0
- package/src/security/pattern-detector.ts +127 -0
- package/src/security/rule-of-two.ts +50 -0
- package/src/security/sanitize.ts +46 -0
- package/src/security/semantic-consistency.ts +93 -0
- package/src/security/staging-promoter.ts +154 -0
- package/src/shared/config.ts +302 -0
- package/src/shared/diagnostics.ts +210 -0
- package/src/shared/errors.ts +48 -0
- package/src/shared/git-utils.ts +143 -0
- package/src/shared/llm-client.ts +120 -0
- package/src/shared/logger.ts +99 -0
- package/src/shared/types.ts +79 -0
- package/src/sync/client.ts +43 -0
- package/src/sync/conflict.ts +106 -0
- package/src/sync/dedup.ts +183 -0
- package/src/sync/hlc.ts +117 -0
- package/src/sync/keychain.ts +144 -0
- package/src/sync/pull.ts +232 -0
- package/src/sync/push.ts +131 -0
- package/src/types/chokidar.d.ts +23 -0
- package/src/visualization/graph-renderer.ts +312 -0
- package/src/visualization/subgraph-extract.ts +208 -0
- package/src/visualization/views/community-clusters.ts +246 -0
- package/src/visualization/views/dependency-map.ts +189 -0
- package/src/visualization/views/graph-explorer.ts +364 -0
- package/src/visualization/views/timeline.ts +247 -0
- package/src/workspace/api-contracts.ts +226 -0
- package/src/workspace/cross-repo.ts +61 -0
- package/src/workspace/detector.ts +190 -0
- package/src/workspace/manifest.ts +141 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* confidence-decay.ts — Layer 4 of the freshness engine.
|
|
3
|
+
*
|
|
4
|
+
* Trust-tier-specific confidence decay with Bayesian re-observation reinforcement.
|
|
5
|
+
*
|
|
6
|
+
* Key insight:
|
|
7
|
+
* - Tier 2 (AST-derived): event-driven invalidation only — no time decay.
|
|
8
|
+
* - Tier 3 (LLM-inferred): exponential decay × Beta(α,β) Bayesian confidence.
|
|
9
|
+
* - Tier 1 (User-stated): slow exponential decay, no Bayesian adjustment.
|
|
10
|
+
* - Tier 4 (External): fast exponential decay, no Bayesian adjustment.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
export interface DecayParams {
|
|
14
|
+
halfLifeDays: number;
|
|
15
|
+
decayMultiplier: number;
|
|
16
|
+
reObservationBoost: number; // how much α increments per re-observation
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/** Decay parameters by trust tier and entity type. */
|
|
20
|
+
export const DECAY_PARAMS: Record<string, DecayParams> = {
|
|
21
|
+
tier2: { halfLifeDays: Infinity, decayMultiplier: 0, reObservationBoost: 0 }, // event-driven only
|
|
22
|
+
Decision: { halfLifeDays: 14, decayMultiplier: 1.0, reObservationBoost: 1 },
|
|
23
|
+
Convention: { halfLifeDays: 21, decayMultiplier: 1.0, reObservationBoost: 1 },
|
|
24
|
+
Bug: { halfLifeDays: 7, decayMultiplier: 1.5, reObservationBoost: 1 },
|
|
25
|
+
Solution: { halfLifeDays: 7, decayMultiplier: 1.5, reObservationBoost: 1 },
|
|
26
|
+
Concept: { halfLifeDays: 14, decayMultiplier: 1.0, reObservationBoost: 1 },
|
|
27
|
+
tier1_user: { halfLifeDays: 30, decayMultiplier: 0.5, reObservationBoost: 2 },
|
|
28
|
+
tier4_external: { halfLifeDays: 7, decayMultiplier: 3.0, reObservationBoost: 1 },
|
|
29
|
+
event: { halfLifeDays: 1 / 24, decayMultiplier: 1.0, reObservationBoost: 0 }, // 1 hour
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/** Default tier-3 decay params used when the entity type is not explicitly listed. */
|
|
33
|
+
const TIER3_DEFAULT = DECAY_PARAMS.Decision as DecayParams;
|
|
34
|
+
|
|
35
|
+
export interface BayesianState {
|
|
36
|
+
alpha: number; // successful re-observations (starts at 1)
|
|
37
|
+
beta: number; // contradictions (starts at 0)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Get the appropriate decay params for a node given its trust tier and entity type.
|
|
42
|
+
*/
|
|
43
|
+
export function getDecayParams(trustTier: 1 | 2 | 3 | 4, entityType: string): DecayParams {
|
|
44
|
+
if (trustTier === 2) return DECAY_PARAMS.tier2 as DecayParams;
|
|
45
|
+
if (trustTier === 1) return DECAY_PARAMS.tier1_user as DecayParams;
|
|
46
|
+
if (trustTier === 4) return DECAY_PARAMS.tier4_external as DecayParams;
|
|
47
|
+
|
|
48
|
+
// Tier 3 — look up by entity type, fall back to Decision
|
|
49
|
+
return DECAY_PARAMS[entityType] ?? TIER3_DEFAULT;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Compute current confidence for a node based on its trust tier, type, and age.
|
|
54
|
+
*
|
|
55
|
+
* Tier 2 (AST-derived): binary — 1.0 if source unchanged, 0.0 if changed.
|
|
56
|
+
* Tier 3 (LLM-inferred): exponential decay × Bayesian re-observation (min of both).
|
|
57
|
+
* Tier 1 (User-stated): slow exponential decay only.
|
|
58
|
+
* Tier 4 (External): fast exponential decay only.
|
|
59
|
+
*
|
|
60
|
+
* Formula:
|
|
61
|
+
* λ = ln(2) / halfLifeDays
|
|
62
|
+
* decayed = baseConfidence × e^(-λ × decayMultiplier × daysSinceAccess)
|
|
63
|
+
* bayesian = α / (α + β)
|
|
64
|
+
* final (tier 3) = min(decayed, bayesian)
|
|
65
|
+
* final (tier 1/4) = decayed
|
|
66
|
+
* final (tier 2) = sourceUnchanged ? 1.0 : 0.0
|
|
67
|
+
*/
|
|
68
|
+
export function computeConfidence(
|
|
69
|
+
baseConfidence: number,
|
|
70
|
+
trustTier: 1 | 2 | 3 | 4,
|
|
71
|
+
entityType: string,
|
|
72
|
+
daysSinceAccess: number,
|
|
73
|
+
bayesian?: BayesianState,
|
|
74
|
+
sourceUnchanged?: boolean,
|
|
75
|
+
): number {
|
|
76
|
+
// Tier 2: binary, event-driven
|
|
77
|
+
if (trustTier === 2) {
|
|
78
|
+
return sourceUnchanged === true ? 1.0 : 0.0;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const params = getDecayParams(trustTier, entityType);
|
|
82
|
+
|
|
83
|
+
// Compute exponential decay
|
|
84
|
+
let decayed: number;
|
|
85
|
+
if (params.halfLifeDays === Infinity || params.decayMultiplier === 0 || daysSinceAccess === 0) {
|
|
86
|
+
decayed = baseConfidence;
|
|
87
|
+
} else {
|
|
88
|
+
const lambda = Math.LN2 / params.halfLifeDays;
|
|
89
|
+
decayed = baseConfidence * Math.exp(-lambda * params.decayMultiplier * daysSinceAccess);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Tier 3: combine decay with Bayesian state
|
|
93
|
+
if (trustTier === 3 && bayesian !== undefined) {
|
|
94
|
+
const bayes = bayesianConfidence(bayesian);
|
|
95
|
+
return Math.min(decayed, bayes);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return decayed;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Record a successful re-observation: increment α by the boost amount.
|
|
103
|
+
* Returns a new BayesianState (does not mutate the original).
|
|
104
|
+
*/
|
|
105
|
+
export function recordReObservation(state: BayesianState, boost = 1): BayesianState {
|
|
106
|
+
return { alpha: state.alpha + boost, beta: state.beta };
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Record a contradiction: increment β by 1.
|
|
111
|
+
* Returns a new BayesianState (does not mutate the original).
|
|
112
|
+
*/
|
|
113
|
+
export function recordContradiction(state: BayesianState): BayesianState {
|
|
114
|
+
return { alpha: state.alpha, beta: state.beta + 1 };
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Get the Bayesian confidence: α / (α + β).
|
|
119
|
+
*/
|
|
120
|
+
export function bayesianConfidence(state: BayesianState): number {
|
|
121
|
+
return state.alpha / (state.alpha + state.beta);
|
|
122
|
+
}
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
// Module: cuckoo-filter — Probabilistic set membership with deletion support
|
|
2
|
+
//
|
|
3
|
+
// Real fingerprint-based Cuckoo filter (Fan et al. 2014):
|
|
4
|
+
// - 16-bit fingerprints in Uint16Array buckets (4 slots/bucket)
|
|
5
|
+
// - FNV-1a hash: lower bits → bucket index, upper 16 bits → fingerprint
|
|
6
|
+
// - Alternate bucket via XOR with prime-multiplied fingerprint
|
|
7
|
+
// - Auxiliary Set<number> of 31-bit hashes for exact size tracking
|
|
8
|
+
// - Max 500 eviction kicks before declaring full
|
|
9
|
+
//
|
|
10
|
+
// Memory: ~328KB for 50K paths (128KB filter + 200KB hash set)
|
|
11
|
+
// FP rate: ~0.012% theoretical (16-bit fingerprints, 4 slots/bucket)
|
|
12
|
+
|
|
13
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
14
|
+
|
|
15
|
+
const BUCKET_SIZE = 4;
|
|
16
|
+
const MAX_KICKS = 500;
|
|
17
|
+
const MAX_BUCKETS = 65536;
|
|
18
|
+
|
|
19
|
+
function fnv1a(str: string): number {
|
|
20
|
+
let hash = 0x811c9dc5;
|
|
21
|
+
for (let i = 0; i < str.length; i++) {
|
|
22
|
+
hash ^= str.charCodeAt(i);
|
|
23
|
+
hash = (hash * 0x01000193) >>> 0;
|
|
24
|
+
}
|
|
25
|
+
return hash;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function nextPowerOfTwo(n: number): number {
|
|
29
|
+
if (n <= 1) return 1;
|
|
30
|
+
let p = 1;
|
|
31
|
+
while (p < n) p <<= 1;
|
|
32
|
+
return p;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export class CuckooFilter {
|
|
36
|
+
private data: Uint16Array;
|
|
37
|
+
private numBuckets: number;
|
|
38
|
+
private hashes: Set<number>;
|
|
39
|
+
private _size: number;
|
|
40
|
+
|
|
41
|
+
constructor(capacity = 65536) {
|
|
42
|
+
this.numBuckets = nextPowerOfTwo(Math.ceil(Math.max(capacity, 1) / BUCKET_SIZE / 0.95));
|
|
43
|
+
if (this.numBuckets > MAX_BUCKETS) {
|
|
44
|
+
throw new Error(
|
|
45
|
+
`CuckooFilter capacity too large: ${capacity} requires ${this.numBuckets} buckets (max ${MAX_BUCKETS})`,
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
this.data = new Uint16Array(this.numBuckets * BUCKET_SIZE);
|
|
49
|
+
this.hashes = new Set();
|
|
50
|
+
this._size = 0;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
add(path: string): boolean {
|
|
54
|
+
const hash = fnv1a(path);
|
|
55
|
+
const smiHash = hash >>> 1;
|
|
56
|
+
if (this.hashes.has(smiHash)) return true;
|
|
57
|
+
|
|
58
|
+
const fp = hash >>> 16 || 1;
|
|
59
|
+
const h1 = hash & (this.numBuckets - 1);
|
|
60
|
+
const h2 = (h1 ^ (((fp * 0x5bd1e995) >>> 0) & (this.numBuckets - 1))) & (this.numBuckets - 1);
|
|
61
|
+
|
|
62
|
+
if (this._bucketInsert(h1, fp)) {
|
|
63
|
+
this.hashes.add(smiHash);
|
|
64
|
+
this._size++;
|
|
65
|
+
return true;
|
|
66
|
+
}
|
|
67
|
+
if (this._bucketInsert(h2, fp)) {
|
|
68
|
+
this.hashes.add(smiHash);
|
|
69
|
+
this._size++;
|
|
70
|
+
return true;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
let evictBucket = Math.random() < 0.5 ? h1 : h2;
|
|
74
|
+
let evictFp = fp;
|
|
75
|
+
|
|
76
|
+
for (let kick = 0; kick < MAX_KICKS; kick++) {
|
|
77
|
+
const slotIdx = Math.floor(Math.random() * BUCKET_SIZE);
|
|
78
|
+
const offset = evictBucket * BUCKET_SIZE + slotIdx;
|
|
79
|
+
const evicted = this.data[offset];
|
|
80
|
+
this.data[offset] = evictFp;
|
|
81
|
+
evictFp = evicted;
|
|
82
|
+
|
|
83
|
+
const altBucket =
|
|
84
|
+
(evictBucket ^ (((evictFp * 0x5bd1e995) >>> 0) & (this.numBuckets - 1))) &
|
|
85
|
+
(this.numBuckets - 1);
|
|
86
|
+
|
|
87
|
+
if (this._bucketInsert(altBucket, evictFp)) {
|
|
88
|
+
this.hashes.add(smiHash);
|
|
89
|
+
this._size++;
|
|
90
|
+
return true;
|
|
91
|
+
}
|
|
92
|
+
evictBucket = altBucket;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return false;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
remove(path: string): boolean {
|
|
99
|
+
const hash = fnv1a(path);
|
|
100
|
+
const smiHash = hash >>> 1;
|
|
101
|
+
if (!this.hashes.has(smiHash)) return false;
|
|
102
|
+
|
|
103
|
+
const fp = hash >>> 16 || 1;
|
|
104
|
+
const h1 = hash & (this.numBuckets - 1);
|
|
105
|
+
const h2 = (h1 ^ (((fp * 0x5bd1e995) >>> 0) & (this.numBuckets - 1))) & (this.numBuckets - 1);
|
|
106
|
+
|
|
107
|
+
if (this._bucketRemove(h1, fp)) {
|
|
108
|
+
this.hashes.delete(smiHash);
|
|
109
|
+
this._size--;
|
|
110
|
+
return true;
|
|
111
|
+
}
|
|
112
|
+
if (this._bucketRemove(h2, fp)) {
|
|
113
|
+
this.hashes.delete(smiHash);
|
|
114
|
+
this._size--;
|
|
115
|
+
return true;
|
|
116
|
+
}
|
|
117
|
+
return false;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
contains(path: string): boolean {
|
|
121
|
+
const hash = fnv1a(path);
|
|
122
|
+
const fp = hash >>> 16 || 1;
|
|
123
|
+
const h1 = hash & (this.numBuckets - 1);
|
|
124
|
+
const h2 = (h1 ^ (((fp * 0x5bd1e995) >>> 0) & (this.numBuckets - 1))) & (this.numBuckets - 1);
|
|
125
|
+
return this._bucketContains(h1, fp) || this._bucketContains(h2, fp);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
clear(): void {
|
|
129
|
+
this.data.fill(0);
|
|
130
|
+
this.hashes.clear();
|
|
131
|
+
this._size = 0;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
get size(): number {
|
|
135
|
+
return this._size;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
private _bucketContains(bucket: number, fp: number): boolean {
|
|
139
|
+
const base = bucket * BUCKET_SIZE;
|
|
140
|
+
for (let i = 0; i < BUCKET_SIZE; i++) {
|
|
141
|
+
if (this.data[base + i] === fp) return true;
|
|
142
|
+
}
|
|
143
|
+
return false;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
private _bucketInsert(bucket: number, fp: number): boolean {
|
|
147
|
+
const base = bucket * BUCKET_SIZE;
|
|
148
|
+
for (let i = 0; i < BUCKET_SIZE; i++) {
|
|
149
|
+
if (this.data[base + i] === 0) {
|
|
150
|
+
this.data[base + i] = fp;
|
|
151
|
+
return true;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return false;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
private _bucketRemove(bucket: number, fp: number): boolean {
|
|
158
|
+
const base = bucket * BUCKET_SIZE;
|
|
159
|
+
for (let i = 0; i < BUCKET_SIZE; i++) {
|
|
160
|
+
if (this.data[base + i] === fp) {
|
|
161
|
+
this.data[base + i] = 0;
|
|
162
|
+
return true;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return false;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
static async fromDatabase(db: SiaDb): Promise<CuckooFilter> {
|
|
169
|
+
const filter = new CuckooFilter();
|
|
170
|
+
const { rows } = await db.execute("SELECT DISTINCT source_path FROM source_deps");
|
|
171
|
+
for (const row of rows) {
|
|
172
|
+
filter.add(row.source_path as string);
|
|
173
|
+
}
|
|
174
|
+
return filter;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* deep-validation.ts — Layer 5 of the freshness engine.
|
|
3
|
+
*
|
|
4
|
+
* Periodic deep validation pipeline (nightly / weekly).
|
|
5
|
+
* Catches anything that real-time layers (1–4) missed:
|
|
6
|
+
*
|
|
7
|
+
* (a) Documentation-vs-code cross-validation
|
|
8
|
+
* (b) Low-confidence LLM-inferred claim re-verification
|
|
9
|
+
* (c) PageRank importance score recomputation
|
|
10
|
+
* (d) Version compaction (archived entity purge + FTS5 optimize)
|
|
11
|
+
*
|
|
12
|
+
* Must be run in a separate DB connection — never blocks the MCP server.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { existsSync, statSync } from "node:fs";
|
|
16
|
+
import { computePageRank } from "@/ast/pagerank-builder";
|
|
17
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
18
|
+
import { invalidateEntity } from "@/graph/entities";
|
|
19
|
+
import { updateImportanceScores } from "@/retrieval/pagerank";
|
|
20
|
+
|
|
21
|
+
// ─── Public types ───────────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
export interface DeepValidationResult {
|
|
24
|
+
documentsChecked: number;
|
|
25
|
+
staleDocsFound: number;
|
|
26
|
+
claimsReVerified: number;
|
|
27
|
+
claimsInvalidated: number;
|
|
28
|
+
claimsConfirmed: number;
|
|
29
|
+
nodesScored: number; // PageRank
|
|
30
|
+
versionsCompacted: number;
|
|
31
|
+
ftsOptimized: boolean;
|
|
32
|
+
durationMs: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface DeepValidationConfig {
|
|
36
|
+
maxClaimsToVerify: number; // default 20
|
|
37
|
+
retentionDays: number; // default 90
|
|
38
|
+
eventRetentionDays: number; // default 30
|
|
39
|
+
archiveThreshold: number; // default 0.05
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const DEFAULT_CONFIG: DeepValidationConfig = {
|
|
43
|
+
maxClaimsToVerify: 20,
|
|
44
|
+
retentionDays: 90,
|
|
45
|
+
eventRetentionDays: 30,
|
|
46
|
+
archiveThreshold: 0.05,
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
// ─── Main pipeline ──────────────────────────────────────────────────────────
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Run the full deep validation pipeline.
|
|
53
|
+
* Should be called nightly via the decay scheduler.
|
|
54
|
+
* Runs in a separate DB connection — never blocks the MCP server.
|
|
55
|
+
*/
|
|
56
|
+
export async function runDeepValidation(
|
|
57
|
+
db: SiaDb,
|
|
58
|
+
repoRoot: string,
|
|
59
|
+
config?: Partial<DeepValidationConfig>,
|
|
60
|
+
): Promise<DeepValidationResult> {
|
|
61
|
+
const cfg: DeepValidationConfig = { ...DEFAULT_CONFIG, ...config };
|
|
62
|
+
const start = Date.now();
|
|
63
|
+
|
|
64
|
+
// (a) Documentation cross-validation
|
|
65
|
+
const { checked, staleFound } = await validateDocumentation(db, repoRoot);
|
|
66
|
+
|
|
67
|
+
// (b) Low-confidence claim re-verification
|
|
68
|
+
const { verified, invalidated, confirmed } = await identifyLowConfidenceClaims(
|
|
69
|
+
db,
|
|
70
|
+
cfg.maxClaimsToVerify,
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
// (c) PageRank recomputation
|
|
74
|
+
const { nodesScored } = await recomputePageRank(db);
|
|
75
|
+
|
|
76
|
+
// (d) Version compaction
|
|
77
|
+
const { compacted, ftsOptimized } = await compactVersions(db, cfg);
|
|
78
|
+
|
|
79
|
+
const durationMs = Date.now() - start;
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
documentsChecked: checked,
|
|
83
|
+
staleDocsFound: staleFound,
|
|
84
|
+
claimsReVerified: verified,
|
|
85
|
+
claimsInvalidated: invalidated,
|
|
86
|
+
claimsConfirmed: confirmed,
|
|
87
|
+
nodesScored,
|
|
88
|
+
versionsCompacted: compacted,
|
|
89
|
+
ftsOptimized,
|
|
90
|
+
durationMs,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ─── Sub-task (a): Documentation-vs-code cross-validation ──────────────────
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Sub-task (a): Documentation-vs-code cross-validation.
|
|
98
|
+
*
|
|
99
|
+
* For each entity with trust_tier 1 and type in ('CodeEntity','Convention','Decision')
|
|
100
|
+
* that has a non-empty file_paths array, stat() the referenced files.
|
|
101
|
+
* If any file was modified (mtime) after the entity's t_created, tag it as potentially-stale
|
|
102
|
+
* and reduce its confidence.
|
|
103
|
+
*/
|
|
104
|
+
export async function validateDocumentation(
|
|
105
|
+
db: SiaDb,
|
|
106
|
+
repoRoot: string,
|
|
107
|
+
): Promise<{ checked: number; staleFound: number }> {
|
|
108
|
+
const { rows } = await db.execute(
|
|
109
|
+
`SELECT id, file_paths, t_created, confidence, tags
|
|
110
|
+
FROM graph_nodes
|
|
111
|
+
WHERE type IN ('CodeEntity', 'Convention', 'Decision')
|
|
112
|
+
AND trust_tier = 1
|
|
113
|
+
AND file_paths IS NOT NULL
|
|
114
|
+
AND file_paths != '[]'
|
|
115
|
+
AND t_valid_until IS NULL
|
|
116
|
+
AND archived_at IS NULL`,
|
|
117
|
+
);
|
|
118
|
+
|
|
119
|
+
let checked = 0;
|
|
120
|
+
let staleFound = 0;
|
|
121
|
+
|
|
122
|
+
for (const row of rows) {
|
|
123
|
+
const entityId = row.id as string;
|
|
124
|
+
const tCreated = row.t_created as number;
|
|
125
|
+
const filePathsRaw = row.file_paths as string;
|
|
126
|
+
const currentTagsRaw = row.tags as string;
|
|
127
|
+
const currentConfidence = row.confidence as number;
|
|
128
|
+
|
|
129
|
+
let filePaths: string[];
|
|
130
|
+
try {
|
|
131
|
+
filePaths = JSON.parse(filePathsRaw) as string[];
|
|
132
|
+
} catch {
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (!Array.isArray(filePaths) || filePaths.length === 0) continue;
|
|
137
|
+
|
|
138
|
+
checked++;
|
|
139
|
+
|
|
140
|
+
let isStale = false;
|
|
141
|
+
|
|
142
|
+
for (const rawPath of filePaths) {
|
|
143
|
+
if (typeof rawPath !== "string" || rawPath.length === 0) continue;
|
|
144
|
+
|
|
145
|
+
// Resolve relative paths against repoRoot
|
|
146
|
+
const absPath = rawPath.startsWith("/") ? rawPath : `${repoRoot}/${rawPath}`;
|
|
147
|
+
|
|
148
|
+
if (!existsSync(absPath)) continue;
|
|
149
|
+
|
|
150
|
+
try {
|
|
151
|
+
const stat = statSync(absPath);
|
|
152
|
+
const mtimeMs = stat.mtimeMs;
|
|
153
|
+
if (mtimeMs > tCreated) {
|
|
154
|
+
isStale = true;
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
} catch {
|
|
158
|
+
// Stat failed — skip this path
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (isStale) {
|
|
163
|
+
staleFound++;
|
|
164
|
+
|
|
165
|
+
// Tag entity as potentially-stale and reduce confidence
|
|
166
|
+
let tags: string[];
|
|
167
|
+
try {
|
|
168
|
+
tags = JSON.parse(currentTagsRaw) as string[];
|
|
169
|
+
} catch {
|
|
170
|
+
tags = [];
|
|
171
|
+
}
|
|
172
|
+
if (!tags.includes("potentially-stale")) {
|
|
173
|
+
tags.push("potentially-stale");
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const newConfidence = Math.max(0.01, currentConfidence - 0.1);
|
|
177
|
+
await db.execute("UPDATE graph_nodes SET tags = ?, confidence = ? WHERE id = ?", [
|
|
178
|
+
JSON.stringify(tags),
|
|
179
|
+
newConfidence,
|
|
180
|
+
entityId,
|
|
181
|
+
]);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return { checked, staleFound };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// ─── Sub-task (b): Low-confidence claim re-verification ────────────────────
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Sub-task (b): Sample lowest-confidence LLM-inferred (tier-3) entities
|
|
192
|
+
* and flag them for re-verification.
|
|
193
|
+
*
|
|
194
|
+
* In production this would call an LLM (e.g. Haiku), but for now we identify
|
|
195
|
+
* candidates and update their Bayesian state based on whether source files
|
|
196
|
+
* still exist on disk.
|
|
197
|
+
*
|
|
198
|
+
* - If the entity has no file_paths or all referenced files still exist → confirmed
|
|
199
|
+
* - If any referenced file has been deleted → invalidated
|
|
200
|
+
*/
|
|
201
|
+
export async function identifyLowConfidenceClaims(
|
|
202
|
+
db: SiaDb,
|
|
203
|
+
maxClaims = 20,
|
|
204
|
+
): Promise<{ verified: number; invalidated: number; confirmed: number }> {
|
|
205
|
+
const { rows } = await db.execute(
|
|
206
|
+
`SELECT id, file_paths, confidence
|
|
207
|
+
FROM graph_nodes
|
|
208
|
+
WHERE trust_tier = 3
|
|
209
|
+
AND t_valid_until IS NULL
|
|
210
|
+
AND archived_at IS NULL
|
|
211
|
+
ORDER BY confidence ASC
|
|
212
|
+
LIMIT ?`,
|
|
213
|
+
[maxClaims],
|
|
214
|
+
);
|
|
215
|
+
|
|
216
|
+
let verified = 0;
|
|
217
|
+
let invalidated = 0;
|
|
218
|
+
let confirmed = 0;
|
|
219
|
+
|
|
220
|
+
for (const row of rows) {
|
|
221
|
+
const entityId = row.id as string;
|
|
222
|
+
const filePathsRaw = row.file_paths as string;
|
|
223
|
+
|
|
224
|
+
let filePaths: string[];
|
|
225
|
+
try {
|
|
226
|
+
filePaths = JSON.parse(filePathsRaw) as string[];
|
|
227
|
+
} catch {
|
|
228
|
+
filePaths = [];
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
verified++;
|
|
232
|
+
|
|
233
|
+
if (!Array.isArray(filePaths) || filePaths.length === 0) {
|
|
234
|
+
// No source file to check — treat as confirmed
|
|
235
|
+
confirmed++;
|
|
236
|
+
await db.execute(
|
|
237
|
+
"UPDATE graph_nodes SET confidence = MIN(1.0, confidence + 0.05) WHERE id = ?",
|
|
238
|
+
[entityId],
|
|
239
|
+
);
|
|
240
|
+
continue;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Check if any source file has been deleted
|
|
244
|
+
const hasDeletedFile = filePaths.some(
|
|
245
|
+
(p) => typeof p === "string" && p.length > 0 && !existsSync(p),
|
|
246
|
+
);
|
|
247
|
+
|
|
248
|
+
if (hasDeletedFile) {
|
|
249
|
+
invalidated++;
|
|
250
|
+
await invalidateEntity(db, entityId);
|
|
251
|
+
} else {
|
|
252
|
+
confirmed++;
|
|
253
|
+
// Bump confidence slightly as a re-observation
|
|
254
|
+
await db.execute(
|
|
255
|
+
"UPDATE graph_nodes SET confidence = MIN(1.0, confidence + 0.05) WHERE id = ?",
|
|
256
|
+
[entityId],
|
|
257
|
+
);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return { verified, invalidated, confirmed };
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// ─── Sub-task (c): PageRank recomputation ──────────────────────────────────
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Sub-task (c): Recompute PageRank importance scores.
|
|
268
|
+
* Uses the existing computePageRank from ast/pagerank-builder.ts.
|
|
269
|
+
*/
|
|
270
|
+
export async function recomputePageRank(db: SiaDb): Promise<{ nodesScored: number }> {
|
|
271
|
+
const result = await computePageRank(db);
|
|
272
|
+
|
|
273
|
+
// computePageRank already writes importance scores directly.
|
|
274
|
+
// We call updateImportanceScores with an empty map to emit the audit log entry,
|
|
275
|
+
// but only if there were actually nodes to avoid a no-op audit spam.
|
|
276
|
+
if (result.nodesScored > 0) {
|
|
277
|
+
// Build score map from the computed results
|
|
278
|
+
const { rows } = await db.execute(
|
|
279
|
+
"SELECT id, importance FROM graph_nodes WHERE t_valid_until IS NULL AND archived_at IS NULL",
|
|
280
|
+
);
|
|
281
|
+
const scores = new Map<string, number>(
|
|
282
|
+
rows.map((r) => [r.id as string, r.importance as number]),
|
|
283
|
+
);
|
|
284
|
+
await updateImportanceScores(db, scores);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
return { nodesScored: result.nodesScored };
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// ─── Sub-task (d): Version compaction ──────────────────────────────────────
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Sub-task (d): Version compaction.
|
|
294
|
+
*
|
|
295
|
+
* 1. Hard-delete archived entities older than `retentionDays`.
|
|
296
|
+
* 2. Hard-delete archived event entities with low importance, zero edges,
|
|
297
|
+
* and older than `eventRetentionDays`.
|
|
298
|
+
* 3. Run FTS5 optimize.
|
|
299
|
+
*/
|
|
300
|
+
export async function compactVersions(
|
|
301
|
+
db: SiaDb,
|
|
302
|
+
config?: Partial<DeepValidationConfig>,
|
|
303
|
+
): Promise<{ compacted: number; ftsOptimized: boolean }> {
|
|
304
|
+
const cfg: DeepValidationConfig = { ...DEFAULT_CONFIG, ...config };
|
|
305
|
+
|
|
306
|
+
const now = Date.now();
|
|
307
|
+
const retentionCutoff = now - cfg.retentionDays * 86_400_000;
|
|
308
|
+
const eventRetentionCutoff = now - cfg.eventRetentionDays * 86_400_000;
|
|
309
|
+
|
|
310
|
+
// 1. Delete archived entities beyond the main retention window
|
|
311
|
+
const { rows: deletedRows } = await db.execute(
|
|
312
|
+
`DELETE FROM graph_nodes
|
|
313
|
+
WHERE archived_at IS NOT NULL
|
|
314
|
+
AND archived_at < ?
|
|
315
|
+
RETURNING id`,
|
|
316
|
+
[retentionCutoff],
|
|
317
|
+
);
|
|
318
|
+
let compacted = deletedRows.length;
|
|
319
|
+
|
|
320
|
+
// 2. Delete archived event entities with low importance and zero edges
|
|
321
|
+
// that are beyond the event retention window
|
|
322
|
+
const { rows: deletedEventRows } = await db.execute(
|
|
323
|
+
`DELETE FROM graph_nodes
|
|
324
|
+
WHERE type LIKE '%Event'
|
|
325
|
+
AND importance < ?
|
|
326
|
+
AND edge_count = 0
|
|
327
|
+
AND archived_at IS NOT NULL
|
|
328
|
+
AND archived_at < ?
|
|
329
|
+
RETURNING id`,
|
|
330
|
+
[cfg.archiveThreshold, eventRetentionCutoff],
|
|
331
|
+
);
|
|
332
|
+
compacted += deletedEventRows.length;
|
|
333
|
+
|
|
334
|
+
// 3. Optimize the FTS5 virtual table
|
|
335
|
+
let ftsOptimized = false;
|
|
336
|
+
try {
|
|
337
|
+
await db.execute("INSERT INTO graph_nodes_fts(graph_nodes_fts) VALUES('optimize')", []);
|
|
338
|
+
ftsOptimized = true;
|
|
339
|
+
} catch {
|
|
340
|
+
// FTS5 table may not exist in all test environments — treat as non-fatal
|
|
341
|
+
ftsOptimized = false;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
return { compacted, ftsOptimized };
|
|
345
|
+
}
|