@rkarim08/sia 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +35 -0
- package/.claude-plugin/plugin.json +27 -0
- package/.mcp.json +13 -0
- package/CLAUDE.md +226 -0
- package/LICENSE +202 -0
- package/PLUGIN_README.md +253 -0
- package/README.md +1013 -0
- package/agents/sia-changelog-writer.md +89 -0
- package/agents/sia-code-reviewer.md +86 -0
- package/agents/sia-conflict-resolver.md +100 -0
- package/agents/sia-convention-enforcer.md +69 -0
- package/agents/sia-debug.md +106 -0
- package/agents/sia-decision-reviewer.md +101 -0
- package/agents/sia-dependency-tracker.md +80 -0
- package/agents/sia-explain.md +126 -0
- package/agents/sia-feature.md +116 -0
- package/agents/sia-knowledge-capture.md +117 -0
- package/agents/sia-lead-architecture-advisor.md +93 -0
- package/agents/sia-lead-team-health.md +107 -0
- package/agents/sia-migration.md +100 -0
- package/agents/sia-onboarding.md +115 -0
- package/agents/sia-orientation.md +99 -0
- package/agents/sia-pm-briefing.md +106 -0
- package/agents/sia-pm-risk-advisor.md +82 -0
- package/agents/sia-qa-analyst.md +116 -0
- package/agents/sia-qa-regression-map.md +94 -0
- package/agents/sia-refactor.md +115 -0
- package/agents/sia-regression.md +112 -0
- package/agents/sia-security-audit.md +125 -0
- package/agents/sia-test-advisor.md +91 -0
- package/hooks/hooks.json +98 -0
- package/migrations/bridge/001_initial.sql +34 -0
- package/migrations/episodic/001_initial.sql +35 -0
- package/migrations/meta/001_initial.sql +68 -0
- package/migrations/semantic/001_initial.sql +292 -0
- package/migrations/semantic/002_ontology.sql +89 -0
- package/migrations/semantic/003_freshness.sql +63 -0
- package/migrations/semantic/004_v5_unified_schema.sql +194 -0
- package/migrations/semantic/005_backfill_event_kinds.sql +8 -0
- package/migrations/semantic/006_tree_sitter.sql +6 -0
- package/migrations/semantic/007_branch_snapshots.sql +22 -0
- package/package.json +110 -0
- package/scripts/branch-switch.sh +13 -0
- package/scripts/build-wasm-grammars.sh +81 -0
- package/scripts/post-compact.sh +8 -0
- package/scripts/post-tool-use.sh +10 -0
- package/scripts/pre-compact.sh +8 -0
- package/scripts/session-end.sh +8 -0
- package/scripts/session-start.sh +8 -0
- package/scripts/start-mcp.ts +45 -0
- package/scripts/stop-hook.sh +8 -0
- package/scripts/user-prompt-submit.sh +8 -0
- package/scripts/viz-server.ts +152 -0
- package/skills/sia-brainstorm/SKILL.md +156 -0
- package/skills/sia-brainstorm/scripts/frame-template.html +214 -0
- package/skills/sia-brainstorm/scripts/helper.js +95 -0
- package/skills/sia-brainstorm/scripts/server.cjs +338 -0
- package/skills/sia-brainstorm/scripts/start-server.sh +153 -0
- package/skills/sia-brainstorm/scripts/stop-server.sh +55 -0
- package/skills/sia-brainstorm/spec-document-reviewer-prompt.md +49 -0
- package/skills/sia-brainstorm/visual-companion.md +286 -0
- package/skills/sia-capture/SKILL.md +64 -0
- package/skills/sia-compare/SKILL.md +33 -0
- package/skills/sia-conflicts/SKILL.md +38 -0
- package/skills/sia-debug-workflow/SKILL.md +120 -0
- package/skills/sia-debug-workflow/root-cause-tracing.md +70 -0
- package/skills/sia-debug-workflow/scripts/find-polluter.sh +64 -0
- package/skills/sia-debug-workflow/temporal-investigation.md +72 -0
- package/skills/sia-digest/SKILL.md +23 -0
- package/skills/sia-dispatch/SKILL.md +69 -0
- package/skills/sia-dispatch/agent-task-template.md +99 -0
- package/skills/sia-doctor/SKILL.md +39 -0
- package/skills/sia-execute/SKILL.md +70 -0
- package/skills/sia-execute-plan/SKILL.md +85 -0
- package/skills/sia-export-import/SKILL.md +49 -0
- package/skills/sia-export-knowledge/SKILL.md +46 -0
- package/skills/sia-finish/SKILL.md +100 -0
- package/skills/sia-finish/pr-summary-template.md +54 -0
- package/skills/sia-freshness/SKILL.md +38 -0
- package/skills/sia-history/SKILL.md +42 -0
- package/skills/sia-impact/SKILL.md +70 -0
- package/skills/sia-index/SKILL.md +54 -0
- package/skills/sia-install/SKILL.md +39 -0
- package/skills/sia-lead-compliance/SKILL.md +16 -0
- package/skills/sia-lead-drift-report/SKILL.md +16 -0
- package/skills/sia-lead-knowledge-map/SKILL.md +16 -0
- package/skills/sia-learn/SKILL.md +58 -0
- package/skills/sia-plan/SKILL.md +68 -0
- package/skills/sia-plan/plan-reviewer-prompt.md +63 -0
- package/skills/sia-playbooks/SKILL.md +29 -0
- package/skills/sia-playbooks/reference-feature.md +100 -0
- package/skills/sia-playbooks/reference-flagging.md +50 -0
- package/skills/sia-playbooks/reference-orientation.md +92 -0
- package/skills/sia-playbooks/reference-regression.md +115 -0
- package/skills/sia-playbooks/reference-review.md +64 -0
- package/skills/sia-playbooks/reference-tools.md +239 -0
- package/skills/sia-pm-decision-log/SKILL.md +28 -0
- package/skills/sia-pm-risk-dashboard/SKILL.md +24 -0
- package/skills/sia-pm-sprint-summary/SKILL.md +27 -0
- package/skills/sia-prune/SKILL.md +45 -0
- package/skills/sia-qa-coverage/SKILL.md +28 -0
- package/skills/sia-qa-flaky/SKILL.md +20 -0
- package/skills/sia-qa-report/SKILL.md +26 -0
- package/skills/sia-reindex/SKILL.md +30 -0
- package/skills/sia-review-respond/SKILL.md +88 -0
- package/skills/sia-review-respond/pushback-patterns.md +90 -0
- package/skills/sia-search/SKILL.md +47 -0
- package/skills/sia-setup/SKILL.md +82 -0
- package/skills/sia-setup/setup-checklist.md +97 -0
- package/skills/sia-stats/SKILL.md +36 -0
- package/skills/sia-status/SKILL.md +44 -0
- package/skills/sia-sync/SKILL.md +46 -0
- package/skills/sia-team/SKILL.md +64 -0
- package/skills/sia-test/SKILL.md +92 -0
- package/skills/sia-test/testing-anti-patterns.md +104 -0
- package/skills/sia-tour/SKILL.md +29 -0
- package/skills/sia-upgrade/SKILL.md +43 -0
- package/skills/sia-verify/SKILL.md +81 -0
- package/skills/sia-visualize/SKILL.md +28 -0
- package/skills/sia-visualize-live/SKILL.md +55 -0
- package/skills/sia-visualize-live/scripts/graph-template.html +389 -0
- package/skills/sia-visualize-live/scripts/start-visualizer.sh +161 -0
- package/skills/sia-visualize-live/scripts/stop-visualizer.sh +55 -0
- package/skills/sia-visualize-live/scripts/visualizer-server.cjs +264 -0
- package/skills/sia-workspace/SKILL.md +57 -0
- package/src/agent/claude-md-template-flagging.md +219 -0
- package/src/agent/claude-md-template.md +213 -0
- package/src/agent/modules/sia-feature.md +100 -0
- package/src/agent/modules/sia-flagging.md +50 -0
- package/src/agent/modules/sia-orientation.md +92 -0
- package/src/agent/modules/sia-regression.md +115 -0
- package/src/agent/modules/sia-review.md +64 -0
- package/src/agent/modules/sia-tools.md +239 -0
- package/src/ast/extractors/c-include.ts +189 -0
- package/src/ast/extractors/csharp-project.ts +260 -0
- package/src/ast/extractors/prisma-schema.ts +44 -0
- package/src/ast/extractors/project-manifest.ts +111 -0
- package/src/ast/extractors/sql-schema.ts +67 -0
- package/src/ast/extractors/tier-a.ts +423 -0
- package/src/ast/extractors/tier-b.ts +289 -0
- package/src/ast/extractors/tier-dispatch.ts +247 -0
- package/src/ast/index-worker.ts +108 -0
- package/src/ast/indexer.ts +484 -0
- package/src/ast/languages.ts +408 -0
- package/src/ast/pagerank-builder.ts +125 -0
- package/src/ast/path-utils.ts +137 -0
- package/src/ast/tree-sitter/backends/native.ts +57 -0
- package/src/ast/tree-sitter/backends/wasm.ts +39 -0
- package/src/ast/tree-sitter/call-walker.ts +44 -0
- package/src/ast/tree-sitter/edit-computer.ts +55 -0
- package/src/ast/tree-sitter/query-runner.ts +46 -0
- package/src/ast/tree-sitter/service.ts +174 -0
- package/src/ast/tree-sitter/tree-cache.ts +39 -0
- package/src/ast/tree-sitter/types.ts +79 -0
- package/src/ast/watcher.ts +322 -0
- package/src/capture/chunker.ts +169 -0
- package/src/capture/consolidate.ts +127 -0
- package/src/capture/edge-inferrer.ts +161 -0
- package/src/capture/embedder.ts +166 -0
- package/src/capture/embedding-cache.ts +73 -0
- package/src/capture/flag-processor.ts +64 -0
- package/src/capture/hook.ts +67 -0
- package/src/capture/pipeline.ts +450 -0
- package/src/capture/prompts/consolidate.ts +25 -0
- package/src/capture/prompts/edge-infer.ts +29 -0
- package/src/capture/prompts/extract-flagged.ts +36 -0
- package/src/capture/prompts/extract.ts +42 -0
- package/src/capture/tokenizer.ts +147 -0
- package/src/capture/track-a-ast.ts +93 -0
- package/src/capture/track-b-llm.ts +149 -0
- package/src/capture/types.ts +64 -0
- package/src/cli/commands/community.ts +137 -0
- package/src/cli/commands/compare.ts +123 -0
- package/src/cli/commands/conflicts.ts +41 -0
- package/src/cli/commands/digest.ts +197 -0
- package/src/cli/commands/disable-flagging.ts +34 -0
- package/src/cli/commands/doctor.ts +240 -0
- package/src/cli/commands/download-model.ts +161 -0
- package/src/cli/commands/enable-flagging.ts +34 -0
- package/src/cli/commands/export-knowledge.ts +208 -0
- package/src/cli/commands/export.ts +85 -0
- package/src/cli/commands/freshness.ts +164 -0
- package/src/cli/commands/graph.ts +51 -0
- package/src/cli/commands/history.ts +139 -0
- package/src/cli/commands/import.ts +335 -0
- package/src/cli/commands/install.ts +156 -0
- package/src/cli/commands/lead-report.ts +241 -0
- package/src/cli/commands/learn.ts +321 -0
- package/src/cli/commands/pm-report.ts +413 -0
- package/src/cli/commands/prune.ts +75 -0
- package/src/cli/commands/qa-report.ts +278 -0
- package/src/cli/commands/reindex.ts +104 -0
- package/src/cli/commands/rollback.ts +70 -0
- package/src/cli/commands/search.ts +103 -0
- package/src/cli/commands/server.ts +91 -0
- package/src/cli/commands/share.ts +33 -0
- package/src/cli/commands/stats.ts +79 -0
- package/src/cli/commands/status.ts +176 -0
- package/src/cli/commands/sync.ts +96 -0
- package/src/cli/commands/team.ts +118 -0
- package/src/cli/commands/tour.ts +157 -0
- package/src/cli/commands/visualize-live.ts +162 -0
- package/src/cli/commands/workspace.ts +117 -0
- package/src/cli/index.ts +424 -0
- package/src/cli/learn-progress.ts +87 -0
- package/src/community/detection-bridge.ts +344 -0
- package/src/community/leiden.ts +462 -0
- package/src/community/raptor.ts +210 -0
- package/src/community/scheduler.ts +74 -0
- package/src/community/summarize.ts +115 -0
- package/src/decay/archiver.ts +73 -0
- package/src/decay/bridge-orphan-cleanup.ts +212 -0
- package/src/decay/consolidation-sweep.ts +112 -0
- package/src/decay/decay.ts +116 -0
- package/src/decay/deep-validator.ts +62 -0
- package/src/decay/episodic-promoter.ts +132 -0
- package/src/decay/maintenance-scheduler.ts +326 -0
- package/src/decay/scheduler.ts +6 -0
- package/src/decay/session-sweeper.ts +79 -0
- package/src/decay/types.ts +17 -0
- package/src/freshness/confidence-decay.ts +122 -0
- package/src/freshness/cuckoo-filter.ts +176 -0
- package/src/freshness/deep-validation.ts +345 -0
- package/src/freshness/dirty-tracker.ts +237 -0
- package/src/freshness/file-watcher-layer.ts +119 -0
- package/src/freshness/firewall.ts +64 -0
- package/src/freshness/git-reconcile-layer.ts +161 -0
- package/src/freshness/inverted-index.ts +158 -0
- package/src/freshness/stale-read-layer.ts +222 -0
- package/src/graph/audit.ts +69 -0
- package/src/graph/bridge-db.ts +141 -0
- package/src/graph/communities.ts +195 -0
- package/src/graph/db-interface.ts +259 -0
- package/src/graph/edges.ts +163 -0
- package/src/graph/entities.ts +327 -0
- package/src/graph/episodic-db.ts +113 -0
- package/src/graph/flags.ts +31 -0
- package/src/graph/meta-db.ts +200 -0
- package/src/graph/semantic-db.ts +101 -0
- package/src/graph/session-resume.ts +56 -0
- package/src/graph/snapshots.ts +342 -0
- package/src/graph/staging.ts +151 -0
- package/src/graph/types.ts +128 -0
- package/src/hooks/adapters/claude-code.ts +21 -0
- package/src/hooks/adapters/cline.ts +43 -0
- package/src/hooks/adapters/cursor.ts +65 -0
- package/src/hooks/adapters/generic.ts +12 -0
- package/src/hooks/agent-detect.ts +34 -0
- package/src/hooks/claude-md-directives.ts +32 -0
- package/src/hooks/event-router.ts +182 -0
- package/src/hooks/extractors/pattern-detector.ts +111 -0
- package/src/hooks/handlers/post-compact.ts +30 -0
- package/src/hooks/handlers/post-tool-use.ts +403 -0
- package/src/hooks/handlers/pre-compact.ts +100 -0
- package/src/hooks/handlers/session-end.ts +47 -0
- package/src/hooks/handlers/session-start.ts +154 -0
- package/src/hooks/handlers/stop.ts +128 -0
- package/src/hooks/handlers/user-prompt-submit.ts +68 -0
- package/src/hooks/plugin-branch-switch.ts +68 -0
- package/src/hooks/plugin-common.ts +47 -0
- package/src/hooks/plugin-post-compact.ts +28 -0
- package/src/hooks/plugin-post-tool-use.ts +38 -0
- package/src/hooks/plugin-pre-compact.ts +37 -0
- package/src/hooks/plugin-session-end.ts +37 -0
- package/src/hooks/plugin-session-start.ts +75 -0
- package/src/hooks/plugin-stop.ts +61 -0
- package/src/hooks/plugin-user-prompt-submit.ts +47 -0
- package/src/hooks/types.ts +43 -0
- package/src/knowledge/discovery.ts +238 -0
- package/src/knowledge/external-refs.ts +98 -0
- package/src/knowledge/freshness.ts +221 -0
- package/src/knowledge/ingest.ts +330 -0
- package/src/knowledge/markdown-export.ts +229 -0
- package/src/knowledge/markdown-import.ts +359 -0
- package/src/knowledge/patterns.ts +74 -0
- package/src/knowledge/templates.ts +307 -0
- package/src/llm/ai-sdk-adapter.ts +46 -0
- package/src/llm/config.ts +88 -0
- package/src/llm/cost-tracker.ts +110 -0
- package/src/llm/prompts/extraction.ts +55 -0
- package/src/llm/prompts/summarization.ts +36 -0
- package/src/llm/prompts/validation.ts +37 -0
- package/src/llm/provider-registry.ts +68 -0
- package/src/llm/reliability.ts +179 -0
- package/src/llm/schemas.ts +52 -0
- package/src/mcp/freshness-annotator.ts +69 -0
- package/src/mcp/server.ts +949 -0
- package/src/mcp/tools/sia-ast-query.ts +225 -0
- package/src/mcp/tools/sia-at-time.ts +151 -0
- package/src/mcp/tools/sia-backlinks.ts +87 -0
- package/src/mcp/tools/sia-batch-execute.ts +169 -0
- package/src/mcp/tools/sia-by-file.ts +89 -0
- package/src/mcp/tools/sia-community.ts +113 -0
- package/src/mcp/tools/sia-doctor.ts +73 -0
- package/src/mcp/tools/sia-execute-file.ts +122 -0
- package/src/mcp/tools/sia-execute.ts +104 -0
- package/src/mcp/tools/sia-expand.ts +158 -0
- package/src/mcp/tools/sia-fetch-and-index.ts +241 -0
- package/src/mcp/tools/sia-flag.ts +65 -0
- package/src/mcp/tools/sia-index.ts +111 -0
- package/src/mcp/tools/sia-note.ts +134 -0
- package/src/mcp/tools/sia-search.ts +105 -0
- package/src/mcp/tools/sia-stats.ts +63 -0
- package/src/mcp/tools/sia-sync-status.ts +44 -0
- package/src/mcp/tools/sia-upgrade.ts +247 -0
- package/src/mcp/truncate.ts +231 -0
- package/src/native/bridge.ts +167 -0
- package/src/native/fallback-ast-diff.ts +144 -0
- package/src/native/fallback-graph.ts +325 -0
- package/src/ontology/constraints.ts +56 -0
- package/src/ontology/errors.ts +8 -0
- package/src/ontology/middleware.ts +266 -0
- package/src/retrieval/bm25-search.ts +151 -0
- package/src/retrieval/context-assembly.ts +76 -0
- package/src/retrieval/graph-traversal.ts +168 -0
- package/src/retrieval/pagerank.ts +40 -0
- package/src/retrieval/query-classifier.ts +106 -0
- package/src/retrieval/reranker.ts +156 -0
- package/src/retrieval/search.ts +236 -0
- package/src/retrieval/throttle.ts +102 -0
- package/src/retrieval/vector-search.ts +203 -0
- package/src/retrieval/workspace-search.ts +130 -0
- package/src/sandbox/context-mode.ts +285 -0
- package/src/sandbox/credential-pass.ts +55 -0
- package/src/sandbox/executor.ts +235 -0
- package/src/security/pattern-detector.ts +127 -0
- package/src/security/rule-of-two.ts +50 -0
- package/src/security/sanitize.ts +46 -0
- package/src/security/semantic-consistency.ts +93 -0
- package/src/security/staging-promoter.ts +154 -0
- package/src/shared/config.ts +302 -0
- package/src/shared/diagnostics.ts +210 -0
- package/src/shared/errors.ts +48 -0
- package/src/shared/git-utils.ts +143 -0
- package/src/shared/llm-client.ts +120 -0
- package/src/shared/logger.ts +99 -0
- package/src/shared/types.ts +79 -0
- package/src/sync/client.ts +43 -0
- package/src/sync/conflict.ts +106 -0
- package/src/sync/dedup.ts +183 -0
- package/src/sync/hlc.ts +117 -0
- package/src/sync/keychain.ts +144 -0
- package/src/sync/pull.ts +232 -0
- package/src/sync/push.ts +131 -0
- package/src/types/chokidar.d.ts +23 -0
- package/src/visualization/graph-renderer.ts +312 -0
- package/src/visualization/subgraph-extract.ts +208 -0
- package/src/visualization/views/community-clusters.ts +246 -0
- package/src/visualization/views/dependency-map.ts +189 -0
- package/src/visualization/views/graph-explorer.ts +364 -0
- package/src/visualization/views/timeline.ts +247 -0
- package/src/workspace/api-contracts.ts +226 -0
- package/src/workspace/cross-repo.ts +61 -0
- package/src/workspace/detector.ts +190 -0
- package/src/workspace/manifest.ts +141 -0
|
@@ -0,0 +1,484 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { cpus } from "node:os";
|
|
4
|
+
import { join, relative, resolve } from "node:path";
|
|
5
|
+
import type { WorkerMessage, WorkerResult } from "@/ast/index-worker";
|
|
6
|
+
import { parseFileWithRetry } from "@/ast/index-worker";
|
|
7
|
+
import { getLanguageForFile } from "@/ast/languages";
|
|
8
|
+
import { createIgnoreMatcher, toPosixPath } from "@/ast/path-utils";
|
|
9
|
+
import type { CandidateFact } from "@/capture/types";
|
|
10
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
11
|
+
import { insertEdge } from "@/graph/edges";
|
|
12
|
+
import { insertEntity, updateEntity } from "@/graph/entities";
|
|
13
|
+
import type { SiaConfig } from "@/shared/config";
|
|
14
|
+
|
|
15
|
+
export interface IndexResult {
|
|
16
|
+
filesProcessed: number;
|
|
17
|
+
entitiesCreated: number;
|
|
18
|
+
edgesCreated: number;
|
|
19
|
+
cacheHits: number;
|
|
20
|
+
durationMs: number;
|
|
21
|
+
skippedFiles?: Array<{ path: string; error: string }>;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface IndexOptions {
|
|
25
|
+
dryRun?: boolean;
|
|
26
|
+
onProgress?: (progress: IndexResult & { file?: string; error?: string }) => void;
|
|
27
|
+
repoHash?: string;
|
|
28
|
+
cacheSaveInterval?: number; // Save cache every N files (default: 500)
|
|
29
|
+
workerCount?: number; // Number of worker threads (default: cpus - 1, 0 = sequential)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
interface CacheEntry {
|
|
33
|
+
mtimeMs: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
type CacheMap = Record<string, CacheEntry>;
|
|
37
|
+
|
|
38
|
+
function loadCache(cachePath: string): CacheMap {
|
|
39
|
+
if (!existsSync(cachePath)) return {};
|
|
40
|
+
try {
|
|
41
|
+
const raw = readFileSync(cachePath, "utf-8");
|
|
42
|
+
return JSON.parse(raw) as CacheMap;
|
|
43
|
+
} catch {
|
|
44
|
+
return {};
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function saveCache(cachePath: string, cache: CacheMap): void {
|
|
49
|
+
writeFileSync(cachePath, JSON.stringify(cache, null, 2), "utf-8");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const BATCH_SIZE = 100;
|
|
53
|
+
|
|
54
|
+
interface PendingFact {
|
|
55
|
+
fact: CandidateFact;
|
|
56
|
+
relPath: string;
|
|
57
|
+
packagePath: string | null;
|
|
58
|
+
entityId?: string; // Set after insert
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
interface FlushResult {
|
|
62
|
+
created: number;
|
|
63
|
+
insertedIds: string[];
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Flush a batch of facts to the database.
|
|
68
|
+
* Uses a single IN-clause SELECT for dedup, then INSERT/UPDATE per entry.
|
|
69
|
+
* Returns both count of new entities and their IDs for edge creation.
|
|
70
|
+
*/
|
|
71
|
+
async function flushBatch(db: SiaDb, batch: PendingFact[], dryRun: boolean): Promise<FlushResult> {
|
|
72
|
+
if (batch.length === 0 || dryRun) return { created: 0, insertedIds: [] };
|
|
73
|
+
|
|
74
|
+
// Batch dedup: single query with IN clause
|
|
75
|
+
const names = batch.map((f) => f.fact.name);
|
|
76
|
+
const placeholders = names.map(() => "?").join(", ");
|
|
77
|
+
const existing = await db.execute(
|
|
78
|
+
`SELECT id, name, file_paths FROM graph_nodes
|
|
79
|
+
WHERE name IN (${placeholders})
|
|
80
|
+
AND t_valid_until IS NULL AND archived_at IS NULL`,
|
|
81
|
+
names,
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
// Key by name+file_paths composite to handle same-named symbols in different files
|
|
85
|
+
const existingMap = new Map<string, { id: string; file_paths: string }>();
|
|
86
|
+
for (const row of existing.rows) {
|
|
87
|
+
const name = row.name as string;
|
|
88
|
+
const filePaths = (row.file_paths as string) ?? "";
|
|
89
|
+
existingMap.set(`${name}::${filePaths}`, {
|
|
90
|
+
id: row.id as string,
|
|
91
|
+
file_paths: filePaths,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
let created = 0;
|
|
96
|
+
const insertedIds: string[] = [];
|
|
97
|
+
for (const pending of batch) {
|
|
98
|
+
const compositeKey = `${pending.fact.name}::${JSON.stringify(pending.fact.file_paths ?? [pending.relPath])}`;
|
|
99
|
+
// Also check without exact file_paths match — the DB may store differently
|
|
100
|
+
const match =
|
|
101
|
+
existingMap.get(compositeKey) ??
|
|
102
|
+
[...existingMap.entries()].find(
|
|
103
|
+
([key, _v]) =>
|
|
104
|
+
key.startsWith(`${pending.fact.name}::`) && _v.file_paths.includes(pending.relPath),
|
|
105
|
+
)?.[1];
|
|
106
|
+
|
|
107
|
+
if (match) {
|
|
108
|
+
await updateEntity(db, match.id, {
|
|
109
|
+
content: pending.fact.content,
|
|
110
|
+
summary: pending.fact.summary,
|
|
111
|
+
tags: JSON.stringify(pending.fact.tags ?? []),
|
|
112
|
+
});
|
|
113
|
+
} else {
|
|
114
|
+
const entity = await insertEntity(db, {
|
|
115
|
+
type: pending.fact.type,
|
|
116
|
+
name: pending.fact.name,
|
|
117
|
+
content: pending.fact.content,
|
|
118
|
+
summary: pending.fact.summary,
|
|
119
|
+
tags: JSON.stringify(pending.fact.tags ?? []),
|
|
120
|
+
file_paths: JSON.stringify(pending.fact.file_paths ?? [pending.relPath]),
|
|
121
|
+
trust_tier: pending.fact.trust_tier,
|
|
122
|
+
confidence: pending.fact.confidence,
|
|
123
|
+
package_path: pending.packagePath,
|
|
124
|
+
extraction_method: pending.fact.extraction_method ?? null,
|
|
125
|
+
});
|
|
126
|
+
pending.entityId = entity.id;
|
|
127
|
+
insertedIds.push(entity.id);
|
|
128
|
+
created++;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return { created, insertedIds };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Process a single WorkerResult: accumulate facts into batch, flush when full,
|
|
137
|
+
* update cache, and report progress.
|
|
138
|
+
*/
|
|
139
|
+
async function processResult(
|
|
140
|
+
result: WorkerResult,
|
|
141
|
+
ctx: {
|
|
142
|
+
db: SiaDb;
|
|
143
|
+
cache: CacheMap;
|
|
144
|
+
pendingBatch: PendingFact[];
|
|
145
|
+
allProcessedFacts: PendingFact[];
|
|
146
|
+
allInsertedIds: string[];
|
|
147
|
+
skippedFiles: Array<{ path: string; error: string }>;
|
|
148
|
+
filesProcessed: number;
|
|
149
|
+
entitiesCreated: number;
|
|
150
|
+
cacheHits: number;
|
|
151
|
+
dryRun: boolean;
|
|
152
|
+
cacheInterval: number;
|
|
153
|
+
cacheDir: string;
|
|
154
|
+
cachePath: string;
|
|
155
|
+
start: number;
|
|
156
|
+
onProgress?: IndexOptions["onProgress"];
|
|
157
|
+
},
|
|
158
|
+
): Promise<{ filesProcessed: number; entitiesCreated: number }> {
|
|
159
|
+
let { filesProcessed, entitiesCreated } = ctx;
|
|
160
|
+
|
|
161
|
+
if (result.error) {
|
|
162
|
+
ctx.skippedFiles.push({ path: result.relPath, error: result.error });
|
|
163
|
+
ctx.onProgress?.({
|
|
164
|
+
filesProcessed,
|
|
165
|
+
entitiesCreated,
|
|
166
|
+
edgesCreated: 0,
|
|
167
|
+
cacheHits: ctx.cacheHits,
|
|
168
|
+
durationMs: Date.now() - ctx.start,
|
|
169
|
+
file: result.relPath,
|
|
170
|
+
error: result.error,
|
|
171
|
+
});
|
|
172
|
+
return { filesProcessed, entitiesCreated };
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
for (const fact of result.facts) {
|
|
176
|
+
const pending: PendingFact = {
|
|
177
|
+
fact,
|
|
178
|
+
relPath: result.relPath,
|
|
179
|
+
packagePath: result.packagePath,
|
|
180
|
+
};
|
|
181
|
+
ctx.pendingBatch.push(pending);
|
|
182
|
+
ctx.allProcessedFacts.push(pending);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Flush batch when it reaches BATCH_SIZE
|
|
186
|
+
if (ctx.pendingBatch.length >= BATCH_SIZE) {
|
|
187
|
+
const flushed = await flushBatch(ctx.db, ctx.pendingBatch, ctx.dryRun);
|
|
188
|
+
entitiesCreated += flushed.created;
|
|
189
|
+
ctx.allInsertedIds.push(...flushed.insertedIds);
|
|
190
|
+
ctx.pendingBatch.length = 0;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (!ctx.dryRun) {
|
|
194
|
+
ctx.cache[result.relPath] = { mtimeMs: result.mtimeMs };
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
filesProcessed++;
|
|
198
|
+
if (!ctx.dryRun && filesProcessed % ctx.cacheInterval === 0 && filesProcessed > 0) {
|
|
199
|
+
mkdirSync(ctx.cacheDir, { recursive: true });
|
|
200
|
+
saveCache(ctx.cachePath, ctx.cache);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
ctx.onProgress?.({
|
|
204
|
+
filesProcessed,
|
|
205
|
+
entitiesCreated,
|
|
206
|
+
edgesCreated: 0,
|
|
207
|
+
cacheHits: ctx.cacheHits,
|
|
208
|
+
durationMs: Date.now() - ctx.start,
|
|
209
|
+
file: result.relPath,
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
return { filesProcessed, entitiesCreated };
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Resolve proposed_relationships on inserted entities into actual graph edges.
|
|
217
|
+
*/
|
|
218
|
+
async function createEdgesFromRelationships(
|
|
219
|
+
db: SiaDb,
|
|
220
|
+
allProcessedFacts: PendingFact[],
|
|
221
|
+
): Promise<number> {
|
|
222
|
+
let edgesCreated = 0;
|
|
223
|
+
|
|
224
|
+
for (const pending of allProcessedFacts) {
|
|
225
|
+
if (!pending.fact.proposed_relationships?.length) continue;
|
|
226
|
+
if (!pending.entityId) continue; // was an update, not an insert
|
|
227
|
+
|
|
228
|
+
for (const rel of pending.fact.proposed_relationships) {
|
|
229
|
+
// Look up target entity by name
|
|
230
|
+
const targetRows = await db.execute(
|
|
231
|
+
`SELECT id FROM graph_nodes
|
|
232
|
+
WHERE name = ? AND t_valid_until IS NULL AND archived_at IS NULL
|
|
233
|
+
LIMIT 1`,
|
|
234
|
+
[rel.target_name],
|
|
235
|
+
);
|
|
236
|
+
if (targetRows.rows.length > 0) {
|
|
237
|
+
const targetId = targetRows.rows[0].id as string;
|
|
238
|
+
await insertEdge(db, {
|
|
239
|
+
from_id: pending.entityId,
|
|
240
|
+
to_id: targetId,
|
|
241
|
+
type: rel.type,
|
|
242
|
+
weight: rel.weight,
|
|
243
|
+
confidence: 0.8,
|
|
244
|
+
trust_tier: 2,
|
|
245
|
+
extraction_method: "ast",
|
|
246
|
+
});
|
|
247
|
+
edgesCreated++;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return edgesCreated;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Dispatch files to a pool of Worker threads for parallel parsing.
|
|
257
|
+
* Uses round-robin with backpressure: each worker gets one file at a time,
|
|
258
|
+
* receives the next when it reports completion.
|
|
259
|
+
*/
|
|
260
|
+
async function dispatchToWorkerPool(
|
|
261
|
+
filesToProcess: Array<{ absPath: string; relPath: string }>,
|
|
262
|
+
numWorkers: number,
|
|
263
|
+
): Promise<WorkerResult[]> {
|
|
264
|
+
const { Worker } = await import("node:worker_threads");
|
|
265
|
+
const workerPath = new URL("./index-worker.ts", import.meta.url).pathname;
|
|
266
|
+
const workers: InstanceType<typeof Worker>[] = [];
|
|
267
|
+
for (let i = 0; i < numWorkers; i++) {
|
|
268
|
+
workers.push(new Worker(workerPath));
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const results: WorkerResult[] = [];
|
|
272
|
+
let fileIndex = 0;
|
|
273
|
+
const total = filesToProcess.length;
|
|
274
|
+
|
|
275
|
+
await new Promise<void>((resolve, reject) => {
|
|
276
|
+
if (total === 0) {
|
|
277
|
+
resolve();
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
let completed = 0;
|
|
282
|
+
for (const worker of workers) {
|
|
283
|
+
worker.on("message", (result: WorkerResult) => {
|
|
284
|
+
results.push(result);
|
|
285
|
+
completed++;
|
|
286
|
+
|
|
287
|
+
if (fileIndex < total) {
|
|
288
|
+
const msg: WorkerMessage = filesToProcess[fileIndex++];
|
|
289
|
+
worker.postMessage(msg);
|
|
290
|
+
} else if (completed === total) {
|
|
291
|
+
resolve();
|
|
292
|
+
}
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
worker.on("error", (err) => {
|
|
296
|
+
reject(err);
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
// Seed each worker with initial work
|
|
300
|
+
if (fileIndex < total) {
|
|
301
|
+
const msg: WorkerMessage = filesToProcess[fileIndex++];
|
|
302
|
+
worker.postMessage(msg);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
// Terminate workers
|
|
308
|
+
for (const worker of workers) {
|
|
309
|
+
await worker.terminate();
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return results;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/** Walk the repository, extract AST facts, and write CodeEntity nodes. */
|
|
316
|
+
export async function indexRepository(
|
|
317
|
+
repoRoot: string,
|
|
318
|
+
db: SiaDb,
|
|
319
|
+
config: SiaConfig,
|
|
320
|
+
opts: IndexOptions = {},
|
|
321
|
+
): Promise<IndexResult> {
|
|
322
|
+
const start = Date.now();
|
|
323
|
+
const root = resolve(repoRoot);
|
|
324
|
+
const repoHash = opts.repoHash ?? createHash("sha256").update(resolve(repoRoot)).digest("hex");
|
|
325
|
+
|
|
326
|
+
const ignoreMatcher = createIgnoreMatcher(root, config.excludePaths ?? []);
|
|
327
|
+
const cacheDir = join(config.astCacheDir, repoHash);
|
|
328
|
+
const cachePath = join(cacheDir, "index-cache.json");
|
|
329
|
+
const cache = loadCache(cachePath);
|
|
330
|
+
|
|
331
|
+
const CACHE_INTERVAL = opts.cacheSaveInterval ?? 500;
|
|
332
|
+
let filesProcessed = 0;
|
|
333
|
+
let entitiesCreated = 0;
|
|
334
|
+
let cacheHits = 0;
|
|
335
|
+
const pendingBatch: PendingFact[] = [];
|
|
336
|
+
const allProcessedFacts: PendingFact[] = [];
|
|
337
|
+
const allInsertedIds: string[] = [];
|
|
338
|
+
const skippedFiles: Array<{ path: string; error: string }> = [];
|
|
339
|
+
|
|
340
|
+
// Phase 1: Walk file tree, collect all files to process
|
|
341
|
+
const filesToProcess: Array<{ absPath: string; relPath: string }> = [];
|
|
342
|
+
const stack: string[] = [root];
|
|
343
|
+
|
|
344
|
+
while (stack.length > 0) {
|
|
345
|
+
const current = stack.pop();
|
|
346
|
+
if (!current) continue;
|
|
347
|
+
|
|
348
|
+
const entries = readdirSync(current, { withFileTypes: true });
|
|
349
|
+
for (const entry of entries) {
|
|
350
|
+
const absPath = join(current, entry.name);
|
|
351
|
+
const isDir = entry.isDirectory();
|
|
352
|
+
const relPath = toPosixPath(relative(root, absPath));
|
|
353
|
+
|
|
354
|
+
if (ignoreMatcher.shouldIgnore(absPath, isDir)) {
|
|
355
|
+
continue;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (isDir) {
|
|
359
|
+
stack.push(absPath);
|
|
360
|
+
continue;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
const language = getLanguageForFile(absPath);
|
|
364
|
+
if (!language) continue;
|
|
365
|
+
|
|
366
|
+
const stat = statSync(absPath);
|
|
367
|
+
filesProcessed += 1;
|
|
368
|
+
|
|
369
|
+
const cached = cache[relPath];
|
|
370
|
+
if (cached && cached.mtimeMs === stat.mtimeMs) {
|
|
371
|
+
cacheHits += 1;
|
|
372
|
+
continue;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
filesToProcess.push({ absPath, relPath });
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Reset filesProcessed — it counted all files including cache hits during walk.
|
|
380
|
+
// Now track only files actually processed by workers.
|
|
381
|
+
const totalFilesScanned = filesProcessed;
|
|
382
|
+
filesProcessed = 0;
|
|
383
|
+
|
|
384
|
+
// Phase 2: Process files
|
|
385
|
+
// Use worker threads when explicitly requested and there are enough files.
|
|
386
|
+
// Otherwise fall back to sequential processing (always used in tests).
|
|
387
|
+
const numWorkers = opts.workerCount ?? Math.max(1, cpus().length - 1);
|
|
388
|
+
const useWorkers = numWorkers > 0 && filesToProcess.length > 10 && opts.workerCount !== 0;
|
|
389
|
+
|
|
390
|
+
let workerResults: WorkerResult[] | null = null;
|
|
391
|
+
if (useWorkers) {
|
|
392
|
+
try {
|
|
393
|
+
workerResults = await dispatchToWorkerPool(filesToProcess, numWorkers);
|
|
394
|
+
} catch {
|
|
395
|
+
// Worker creation failed (e.g., in test/Node environment) — fall back
|
|
396
|
+
workerResults = null;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
if (workerResults) {
|
|
401
|
+
// Process all worker results
|
|
402
|
+
for (const result of workerResults) {
|
|
403
|
+
const updated = await processResult(result, {
|
|
404
|
+
db,
|
|
405
|
+
cache,
|
|
406
|
+
pendingBatch,
|
|
407
|
+
allProcessedFacts,
|
|
408
|
+
allInsertedIds,
|
|
409
|
+
skippedFiles,
|
|
410
|
+
filesProcessed,
|
|
411
|
+
entitiesCreated,
|
|
412
|
+
cacheHits,
|
|
413
|
+
dryRun: opts.dryRun ?? false,
|
|
414
|
+
cacheInterval: CACHE_INTERVAL,
|
|
415
|
+
cacheDir,
|
|
416
|
+
cachePath,
|
|
417
|
+
start,
|
|
418
|
+
onProgress: opts.onProgress,
|
|
419
|
+
});
|
|
420
|
+
filesProcessed = updated.filesProcessed;
|
|
421
|
+
entitiesCreated = updated.entitiesCreated;
|
|
422
|
+
}
|
|
423
|
+
} else {
|
|
424
|
+
// Sequential fallback — process files one at a time via parseFileWithRetry
|
|
425
|
+
for (const file of filesToProcess) {
|
|
426
|
+
const result = await parseFileWithRetry(file.absPath, file.relPath);
|
|
427
|
+
const updated = await processResult(result, {
|
|
428
|
+
db,
|
|
429
|
+
cache,
|
|
430
|
+
pendingBatch,
|
|
431
|
+
allProcessedFacts,
|
|
432
|
+
allInsertedIds,
|
|
433
|
+
skippedFiles,
|
|
434
|
+
filesProcessed,
|
|
435
|
+
entitiesCreated,
|
|
436
|
+
cacheHits,
|
|
437
|
+
dryRun: opts.dryRun ?? false,
|
|
438
|
+
cacheInterval: CACHE_INTERVAL,
|
|
439
|
+
cacheDir,
|
|
440
|
+
cachePath,
|
|
441
|
+
start,
|
|
442
|
+
onProgress: opts.onProgress,
|
|
443
|
+
});
|
|
444
|
+
filesProcessed = updated.filesProcessed;
|
|
445
|
+
entitiesCreated = updated.entitiesCreated;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// Final batch flush
|
|
450
|
+
const finalFlush = await flushBatch(db, pendingBatch, opts.dryRun ?? false);
|
|
451
|
+
entitiesCreated += finalFlush.created;
|
|
452
|
+
allInsertedIds.push(...finalFlush.insertedIds);
|
|
453
|
+
|
|
454
|
+
// Phase 3: Create edges from proposed_relationships
|
|
455
|
+
let edgesCreated = 0;
|
|
456
|
+
if (!opts.dryRun) {
|
|
457
|
+
edgesCreated += await createEdgesFromRelationships(db, allProcessedFacts);
|
|
458
|
+
|
|
459
|
+
// Run inferEdges for semantic proximity edges
|
|
460
|
+
try {
|
|
461
|
+
const { inferEdges } = await import("@/capture/edge-inferrer");
|
|
462
|
+
if (allInsertedIds.length > 0) {
|
|
463
|
+
edgesCreated += await inferEdges(db, allInsertedIds);
|
|
464
|
+
}
|
|
465
|
+
} catch {
|
|
466
|
+
// inferEdges failure is non-fatal
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
mkdirSync(cacheDir, { recursive: true });
|
|
470
|
+
saveCache(cachePath, cache);
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// Restore total filesProcessed to include cache hits
|
|
474
|
+
filesProcessed = totalFilesScanned;
|
|
475
|
+
|
|
476
|
+
return {
|
|
477
|
+
filesProcessed,
|
|
478
|
+
entitiesCreated,
|
|
479
|
+
edgesCreated,
|
|
480
|
+
cacheHits,
|
|
481
|
+
durationMs: Date.now() - start,
|
|
482
|
+
skippedFiles: skippedFiles.length > 0 ? skippedFiles : undefined,
|
|
483
|
+
};
|
|
484
|
+
}
|