@rkarim08/sia 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +35 -0
- package/.claude-plugin/plugin.json +27 -0
- package/.mcp.json +13 -0
- package/CLAUDE.md +226 -0
- package/LICENSE +202 -0
- package/PLUGIN_README.md +253 -0
- package/README.md +1013 -0
- package/agents/sia-changelog-writer.md +89 -0
- package/agents/sia-code-reviewer.md +86 -0
- package/agents/sia-conflict-resolver.md +100 -0
- package/agents/sia-convention-enforcer.md +69 -0
- package/agents/sia-debug.md +106 -0
- package/agents/sia-decision-reviewer.md +101 -0
- package/agents/sia-dependency-tracker.md +80 -0
- package/agents/sia-explain.md +126 -0
- package/agents/sia-feature.md +116 -0
- package/agents/sia-knowledge-capture.md +117 -0
- package/agents/sia-lead-architecture-advisor.md +93 -0
- package/agents/sia-lead-team-health.md +107 -0
- package/agents/sia-migration.md +100 -0
- package/agents/sia-onboarding.md +115 -0
- package/agents/sia-orientation.md +99 -0
- package/agents/sia-pm-briefing.md +106 -0
- package/agents/sia-pm-risk-advisor.md +82 -0
- package/agents/sia-qa-analyst.md +116 -0
- package/agents/sia-qa-regression-map.md +94 -0
- package/agents/sia-refactor.md +115 -0
- package/agents/sia-regression.md +112 -0
- package/agents/sia-security-audit.md +125 -0
- package/agents/sia-test-advisor.md +91 -0
- package/hooks/hooks.json +98 -0
- package/migrations/bridge/001_initial.sql +34 -0
- package/migrations/episodic/001_initial.sql +35 -0
- package/migrations/meta/001_initial.sql +68 -0
- package/migrations/semantic/001_initial.sql +292 -0
- package/migrations/semantic/002_ontology.sql +89 -0
- package/migrations/semantic/003_freshness.sql +63 -0
- package/migrations/semantic/004_v5_unified_schema.sql +194 -0
- package/migrations/semantic/005_backfill_event_kinds.sql +8 -0
- package/migrations/semantic/006_tree_sitter.sql +6 -0
- package/migrations/semantic/007_branch_snapshots.sql +22 -0
- package/package.json +110 -0
- package/scripts/branch-switch.sh +13 -0
- package/scripts/build-wasm-grammars.sh +81 -0
- package/scripts/post-compact.sh +8 -0
- package/scripts/post-tool-use.sh +10 -0
- package/scripts/pre-compact.sh +8 -0
- package/scripts/session-end.sh +8 -0
- package/scripts/session-start.sh +8 -0
- package/scripts/start-mcp.ts +45 -0
- package/scripts/stop-hook.sh +8 -0
- package/scripts/user-prompt-submit.sh +8 -0
- package/scripts/viz-server.ts +152 -0
- package/skills/sia-brainstorm/SKILL.md +156 -0
- package/skills/sia-brainstorm/scripts/frame-template.html +214 -0
- package/skills/sia-brainstorm/scripts/helper.js +95 -0
- package/skills/sia-brainstorm/scripts/server.cjs +338 -0
- package/skills/sia-brainstorm/scripts/start-server.sh +153 -0
- package/skills/sia-brainstorm/scripts/stop-server.sh +55 -0
- package/skills/sia-brainstorm/spec-document-reviewer-prompt.md +49 -0
- package/skills/sia-brainstorm/visual-companion.md +286 -0
- package/skills/sia-capture/SKILL.md +64 -0
- package/skills/sia-compare/SKILL.md +33 -0
- package/skills/sia-conflicts/SKILL.md +38 -0
- package/skills/sia-debug-workflow/SKILL.md +120 -0
- package/skills/sia-debug-workflow/root-cause-tracing.md +70 -0
- package/skills/sia-debug-workflow/scripts/find-polluter.sh +64 -0
- package/skills/sia-debug-workflow/temporal-investigation.md +72 -0
- package/skills/sia-digest/SKILL.md +23 -0
- package/skills/sia-dispatch/SKILL.md +69 -0
- package/skills/sia-dispatch/agent-task-template.md +99 -0
- package/skills/sia-doctor/SKILL.md +39 -0
- package/skills/sia-execute/SKILL.md +70 -0
- package/skills/sia-execute-plan/SKILL.md +85 -0
- package/skills/sia-export-import/SKILL.md +49 -0
- package/skills/sia-export-knowledge/SKILL.md +46 -0
- package/skills/sia-finish/SKILL.md +100 -0
- package/skills/sia-finish/pr-summary-template.md +54 -0
- package/skills/sia-freshness/SKILL.md +38 -0
- package/skills/sia-history/SKILL.md +42 -0
- package/skills/sia-impact/SKILL.md +70 -0
- package/skills/sia-index/SKILL.md +54 -0
- package/skills/sia-install/SKILL.md +39 -0
- package/skills/sia-lead-compliance/SKILL.md +16 -0
- package/skills/sia-lead-drift-report/SKILL.md +16 -0
- package/skills/sia-lead-knowledge-map/SKILL.md +16 -0
- package/skills/sia-learn/SKILL.md +58 -0
- package/skills/sia-plan/SKILL.md +68 -0
- package/skills/sia-plan/plan-reviewer-prompt.md +63 -0
- package/skills/sia-playbooks/SKILL.md +29 -0
- package/skills/sia-playbooks/reference-feature.md +100 -0
- package/skills/sia-playbooks/reference-flagging.md +50 -0
- package/skills/sia-playbooks/reference-orientation.md +92 -0
- package/skills/sia-playbooks/reference-regression.md +115 -0
- package/skills/sia-playbooks/reference-review.md +64 -0
- package/skills/sia-playbooks/reference-tools.md +239 -0
- package/skills/sia-pm-decision-log/SKILL.md +28 -0
- package/skills/sia-pm-risk-dashboard/SKILL.md +24 -0
- package/skills/sia-pm-sprint-summary/SKILL.md +27 -0
- package/skills/sia-prune/SKILL.md +45 -0
- package/skills/sia-qa-coverage/SKILL.md +28 -0
- package/skills/sia-qa-flaky/SKILL.md +20 -0
- package/skills/sia-qa-report/SKILL.md +26 -0
- package/skills/sia-reindex/SKILL.md +30 -0
- package/skills/sia-review-respond/SKILL.md +88 -0
- package/skills/sia-review-respond/pushback-patterns.md +90 -0
- package/skills/sia-search/SKILL.md +47 -0
- package/skills/sia-setup/SKILL.md +82 -0
- package/skills/sia-setup/setup-checklist.md +97 -0
- package/skills/sia-stats/SKILL.md +36 -0
- package/skills/sia-status/SKILL.md +44 -0
- package/skills/sia-sync/SKILL.md +46 -0
- package/skills/sia-team/SKILL.md +64 -0
- package/skills/sia-test/SKILL.md +92 -0
- package/skills/sia-test/testing-anti-patterns.md +104 -0
- package/skills/sia-tour/SKILL.md +29 -0
- package/skills/sia-upgrade/SKILL.md +43 -0
- package/skills/sia-verify/SKILL.md +81 -0
- package/skills/sia-visualize/SKILL.md +28 -0
- package/skills/sia-visualize-live/SKILL.md +55 -0
- package/skills/sia-visualize-live/scripts/graph-template.html +389 -0
- package/skills/sia-visualize-live/scripts/start-visualizer.sh +161 -0
- package/skills/sia-visualize-live/scripts/stop-visualizer.sh +55 -0
- package/skills/sia-visualize-live/scripts/visualizer-server.cjs +264 -0
- package/skills/sia-workspace/SKILL.md +57 -0
- package/src/agent/claude-md-template-flagging.md +219 -0
- package/src/agent/claude-md-template.md +213 -0
- package/src/agent/modules/sia-feature.md +100 -0
- package/src/agent/modules/sia-flagging.md +50 -0
- package/src/agent/modules/sia-orientation.md +92 -0
- package/src/agent/modules/sia-regression.md +115 -0
- package/src/agent/modules/sia-review.md +64 -0
- package/src/agent/modules/sia-tools.md +239 -0
- package/src/ast/extractors/c-include.ts +189 -0
- package/src/ast/extractors/csharp-project.ts +260 -0
- package/src/ast/extractors/prisma-schema.ts +44 -0
- package/src/ast/extractors/project-manifest.ts +111 -0
- package/src/ast/extractors/sql-schema.ts +67 -0
- package/src/ast/extractors/tier-a.ts +423 -0
- package/src/ast/extractors/tier-b.ts +289 -0
- package/src/ast/extractors/tier-dispatch.ts +247 -0
- package/src/ast/index-worker.ts +108 -0
- package/src/ast/indexer.ts +484 -0
- package/src/ast/languages.ts +408 -0
- package/src/ast/pagerank-builder.ts +125 -0
- package/src/ast/path-utils.ts +137 -0
- package/src/ast/tree-sitter/backends/native.ts +57 -0
- package/src/ast/tree-sitter/backends/wasm.ts +39 -0
- package/src/ast/tree-sitter/call-walker.ts +44 -0
- package/src/ast/tree-sitter/edit-computer.ts +55 -0
- package/src/ast/tree-sitter/query-runner.ts +46 -0
- package/src/ast/tree-sitter/service.ts +174 -0
- package/src/ast/tree-sitter/tree-cache.ts +39 -0
- package/src/ast/tree-sitter/types.ts +79 -0
- package/src/ast/watcher.ts +322 -0
- package/src/capture/chunker.ts +169 -0
- package/src/capture/consolidate.ts +127 -0
- package/src/capture/edge-inferrer.ts +161 -0
- package/src/capture/embedder.ts +166 -0
- package/src/capture/embedding-cache.ts +73 -0
- package/src/capture/flag-processor.ts +64 -0
- package/src/capture/hook.ts +67 -0
- package/src/capture/pipeline.ts +450 -0
- package/src/capture/prompts/consolidate.ts +25 -0
- package/src/capture/prompts/edge-infer.ts +29 -0
- package/src/capture/prompts/extract-flagged.ts +36 -0
- package/src/capture/prompts/extract.ts +42 -0
- package/src/capture/tokenizer.ts +147 -0
- package/src/capture/track-a-ast.ts +93 -0
- package/src/capture/track-b-llm.ts +149 -0
- package/src/capture/types.ts +64 -0
- package/src/cli/commands/community.ts +137 -0
- package/src/cli/commands/compare.ts +123 -0
- package/src/cli/commands/conflicts.ts +41 -0
- package/src/cli/commands/digest.ts +197 -0
- package/src/cli/commands/disable-flagging.ts +34 -0
- package/src/cli/commands/doctor.ts +240 -0
- package/src/cli/commands/download-model.ts +161 -0
- package/src/cli/commands/enable-flagging.ts +34 -0
- package/src/cli/commands/export-knowledge.ts +208 -0
- package/src/cli/commands/export.ts +85 -0
- package/src/cli/commands/freshness.ts +164 -0
- package/src/cli/commands/graph.ts +51 -0
- package/src/cli/commands/history.ts +139 -0
- package/src/cli/commands/import.ts +335 -0
- package/src/cli/commands/install.ts +156 -0
- package/src/cli/commands/lead-report.ts +241 -0
- package/src/cli/commands/learn.ts +321 -0
- package/src/cli/commands/pm-report.ts +413 -0
- package/src/cli/commands/prune.ts +75 -0
- package/src/cli/commands/qa-report.ts +278 -0
- package/src/cli/commands/reindex.ts +104 -0
- package/src/cli/commands/rollback.ts +70 -0
- package/src/cli/commands/search.ts +103 -0
- package/src/cli/commands/server.ts +91 -0
- package/src/cli/commands/share.ts +33 -0
- package/src/cli/commands/stats.ts +79 -0
- package/src/cli/commands/status.ts +176 -0
- package/src/cli/commands/sync.ts +96 -0
- package/src/cli/commands/team.ts +118 -0
- package/src/cli/commands/tour.ts +157 -0
- package/src/cli/commands/visualize-live.ts +162 -0
- package/src/cli/commands/workspace.ts +117 -0
- package/src/cli/index.ts +424 -0
- package/src/cli/learn-progress.ts +87 -0
- package/src/community/detection-bridge.ts +344 -0
- package/src/community/leiden.ts +462 -0
- package/src/community/raptor.ts +210 -0
- package/src/community/scheduler.ts +74 -0
- package/src/community/summarize.ts +115 -0
- package/src/decay/archiver.ts +73 -0
- package/src/decay/bridge-orphan-cleanup.ts +212 -0
- package/src/decay/consolidation-sweep.ts +112 -0
- package/src/decay/decay.ts +116 -0
- package/src/decay/deep-validator.ts +62 -0
- package/src/decay/episodic-promoter.ts +132 -0
- package/src/decay/maintenance-scheduler.ts +326 -0
- package/src/decay/scheduler.ts +6 -0
- package/src/decay/session-sweeper.ts +79 -0
- package/src/decay/types.ts +17 -0
- package/src/freshness/confidence-decay.ts +122 -0
- package/src/freshness/cuckoo-filter.ts +176 -0
- package/src/freshness/deep-validation.ts +345 -0
- package/src/freshness/dirty-tracker.ts +237 -0
- package/src/freshness/file-watcher-layer.ts +119 -0
- package/src/freshness/firewall.ts +64 -0
- package/src/freshness/git-reconcile-layer.ts +161 -0
- package/src/freshness/inverted-index.ts +158 -0
- package/src/freshness/stale-read-layer.ts +222 -0
- package/src/graph/audit.ts +69 -0
- package/src/graph/bridge-db.ts +141 -0
- package/src/graph/communities.ts +195 -0
- package/src/graph/db-interface.ts +259 -0
- package/src/graph/edges.ts +163 -0
- package/src/graph/entities.ts +327 -0
- package/src/graph/episodic-db.ts +113 -0
- package/src/graph/flags.ts +31 -0
- package/src/graph/meta-db.ts +200 -0
- package/src/graph/semantic-db.ts +101 -0
- package/src/graph/session-resume.ts +56 -0
- package/src/graph/snapshots.ts +342 -0
- package/src/graph/staging.ts +151 -0
- package/src/graph/types.ts +128 -0
- package/src/hooks/adapters/claude-code.ts +21 -0
- package/src/hooks/adapters/cline.ts +43 -0
- package/src/hooks/adapters/cursor.ts +65 -0
- package/src/hooks/adapters/generic.ts +12 -0
- package/src/hooks/agent-detect.ts +34 -0
- package/src/hooks/claude-md-directives.ts +32 -0
- package/src/hooks/event-router.ts +182 -0
- package/src/hooks/extractors/pattern-detector.ts +111 -0
- package/src/hooks/handlers/post-compact.ts +30 -0
- package/src/hooks/handlers/post-tool-use.ts +403 -0
- package/src/hooks/handlers/pre-compact.ts +100 -0
- package/src/hooks/handlers/session-end.ts +47 -0
- package/src/hooks/handlers/session-start.ts +154 -0
- package/src/hooks/handlers/stop.ts +128 -0
- package/src/hooks/handlers/user-prompt-submit.ts +68 -0
- package/src/hooks/plugin-branch-switch.ts +68 -0
- package/src/hooks/plugin-common.ts +47 -0
- package/src/hooks/plugin-post-compact.ts +28 -0
- package/src/hooks/plugin-post-tool-use.ts +38 -0
- package/src/hooks/plugin-pre-compact.ts +37 -0
- package/src/hooks/plugin-session-end.ts +37 -0
- package/src/hooks/plugin-session-start.ts +75 -0
- package/src/hooks/plugin-stop.ts +61 -0
- package/src/hooks/plugin-user-prompt-submit.ts +47 -0
- package/src/hooks/types.ts +43 -0
- package/src/knowledge/discovery.ts +238 -0
- package/src/knowledge/external-refs.ts +98 -0
- package/src/knowledge/freshness.ts +221 -0
- package/src/knowledge/ingest.ts +330 -0
- package/src/knowledge/markdown-export.ts +229 -0
- package/src/knowledge/markdown-import.ts +359 -0
- package/src/knowledge/patterns.ts +74 -0
- package/src/knowledge/templates.ts +307 -0
- package/src/llm/ai-sdk-adapter.ts +46 -0
- package/src/llm/config.ts +88 -0
- package/src/llm/cost-tracker.ts +110 -0
- package/src/llm/prompts/extraction.ts +55 -0
- package/src/llm/prompts/summarization.ts +36 -0
- package/src/llm/prompts/validation.ts +37 -0
- package/src/llm/provider-registry.ts +68 -0
- package/src/llm/reliability.ts +179 -0
- package/src/llm/schemas.ts +52 -0
- package/src/mcp/freshness-annotator.ts +69 -0
- package/src/mcp/server.ts +949 -0
- package/src/mcp/tools/sia-ast-query.ts +225 -0
- package/src/mcp/tools/sia-at-time.ts +151 -0
- package/src/mcp/tools/sia-backlinks.ts +87 -0
- package/src/mcp/tools/sia-batch-execute.ts +169 -0
- package/src/mcp/tools/sia-by-file.ts +89 -0
- package/src/mcp/tools/sia-community.ts +113 -0
- package/src/mcp/tools/sia-doctor.ts +73 -0
- package/src/mcp/tools/sia-execute-file.ts +122 -0
- package/src/mcp/tools/sia-execute.ts +104 -0
- package/src/mcp/tools/sia-expand.ts +158 -0
- package/src/mcp/tools/sia-fetch-and-index.ts +241 -0
- package/src/mcp/tools/sia-flag.ts +65 -0
- package/src/mcp/tools/sia-index.ts +111 -0
- package/src/mcp/tools/sia-note.ts +134 -0
- package/src/mcp/tools/sia-search.ts +105 -0
- package/src/mcp/tools/sia-stats.ts +63 -0
- package/src/mcp/tools/sia-sync-status.ts +44 -0
- package/src/mcp/tools/sia-upgrade.ts +247 -0
- package/src/mcp/truncate.ts +231 -0
- package/src/native/bridge.ts +167 -0
- package/src/native/fallback-ast-diff.ts +144 -0
- package/src/native/fallback-graph.ts +325 -0
- package/src/ontology/constraints.ts +56 -0
- package/src/ontology/errors.ts +8 -0
- package/src/ontology/middleware.ts +266 -0
- package/src/retrieval/bm25-search.ts +151 -0
- package/src/retrieval/context-assembly.ts +76 -0
- package/src/retrieval/graph-traversal.ts +168 -0
- package/src/retrieval/pagerank.ts +40 -0
- package/src/retrieval/query-classifier.ts +106 -0
- package/src/retrieval/reranker.ts +156 -0
- package/src/retrieval/search.ts +236 -0
- package/src/retrieval/throttle.ts +102 -0
- package/src/retrieval/vector-search.ts +203 -0
- package/src/retrieval/workspace-search.ts +130 -0
- package/src/sandbox/context-mode.ts +285 -0
- package/src/sandbox/credential-pass.ts +55 -0
- package/src/sandbox/executor.ts +235 -0
- package/src/security/pattern-detector.ts +127 -0
- package/src/security/rule-of-two.ts +50 -0
- package/src/security/sanitize.ts +46 -0
- package/src/security/semantic-consistency.ts +93 -0
- package/src/security/staging-promoter.ts +154 -0
- package/src/shared/config.ts +302 -0
- package/src/shared/diagnostics.ts +210 -0
- package/src/shared/errors.ts +48 -0
- package/src/shared/git-utils.ts +143 -0
- package/src/shared/llm-client.ts +120 -0
- package/src/shared/logger.ts +99 -0
- package/src/shared/types.ts +79 -0
- package/src/sync/client.ts +43 -0
- package/src/sync/conflict.ts +106 -0
- package/src/sync/dedup.ts +183 -0
- package/src/sync/hlc.ts +117 -0
- package/src/sync/keychain.ts +144 -0
- package/src/sync/pull.ts +232 -0
- package/src/sync/push.ts +131 -0
- package/src/types/chokidar.d.ts +23 -0
- package/src/visualization/graph-renderer.ts +312 -0
- package/src/visualization/subgraph-extract.ts +208 -0
- package/src/visualization/views/community-clusters.ts +246 -0
- package/src/visualization/views/dependency-map.ts +189 -0
- package/src/visualization/views/graph-explorer.ts +364 -0
- package/src/visualization/views/timeline.ts +247 -0
- package/src/workspace/api-contracts.ts +226 -0
- package/src/workspace/cross-repo.ts +61 -0
- package/src/workspace/detector.ts +190 -0
- package/src/workspace/manifest.ts +141 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
// Module: vector-search — ONNX embedder + cosine similarity (VSS fallback)
|
|
2
|
+
|
|
3
|
+
import type { Embedder } from "@/capture/embedder";
|
|
4
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
5
|
+
|
|
6
|
+
/** A single vector search result: entity ID + similarity score. */
|
|
7
|
+
export interface VectorResult {
|
|
8
|
+
entityId: string;
|
|
9
|
+
score: number;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/** Options for vectorSearch. */
|
|
13
|
+
export interface VectorSearchOpts {
|
|
14
|
+
limit?: number;
|
|
15
|
+
paranoid?: boolean;
|
|
16
|
+
packagePath?: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/** Default similarity threshold below which results are discarded. */
|
|
20
|
+
const SIMILARITY_THRESHOLD = 0.3;
|
|
21
|
+
|
|
22
|
+
/** Maximum candidate entities to scan in brute-force fallback. */
|
|
23
|
+
const BRUTE_FORCE_LIMIT = 1000;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Compute cosine similarity between two Float32Arrays.
|
|
27
|
+
*
|
|
28
|
+
* Both vectors are assumed to be of equal length. Returns 0 if either
|
|
29
|
+
* has zero magnitude (degenerate case).
|
|
30
|
+
*/
|
|
31
|
+
function cosineSim(a: Float32Array, b: Float32Array): number {
|
|
32
|
+
let dot = 0;
|
|
33
|
+
let normA = 0;
|
|
34
|
+
let normB = 0;
|
|
35
|
+
|
|
36
|
+
for (let i = 0; i < a.length; i++) {
|
|
37
|
+
dot += a[i] * b[i];
|
|
38
|
+
normA += a[i] * a[i];
|
|
39
|
+
normB += b[i] * b[i];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
43
|
+
if (denom === 0) return 0;
|
|
44
|
+
return dot / denom;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Search entities by vector similarity.
|
|
49
|
+
*
|
|
50
|
+
* 1. Embed query via the provided embedder.
|
|
51
|
+
* 2. Try sqlite-vss (`vss_search`) on the `graph_nodes_vss` virtual table.
|
|
52
|
+
* 3. If VSS is unavailable, fall back to brute-force cosine scan over
|
|
53
|
+
* entities that have a non-NULL embedding column (capped at 1000).
|
|
54
|
+
*
|
|
55
|
+
* Results are filtered by an optional paranoid flag (excludes tier 4)
|
|
56
|
+
* and packagePath, then sorted by score descending and capped at `limit`.
|
|
57
|
+
*/
|
|
58
|
+
export async function vectorSearch(
|
|
59
|
+
db: SiaDb,
|
|
60
|
+
query: string,
|
|
61
|
+
embedder: Embedder,
|
|
62
|
+
opts?: VectorSearchOpts,
|
|
63
|
+
): Promise<VectorResult[]> {
|
|
64
|
+
const limit = opts?.limit ?? 15;
|
|
65
|
+
|
|
66
|
+
// Step 1: Embed the query text
|
|
67
|
+
const queryEmbedding = await embedder.embed(query);
|
|
68
|
+
if (!queryEmbedding) return [];
|
|
69
|
+
|
|
70
|
+
// Step 2: Try sqlite-vss via rawSqlite()
|
|
71
|
+
const vssResults = tryVssSearch(db, queryEmbedding, limit, opts);
|
|
72
|
+
if (vssResults !== null) return vssResults;
|
|
73
|
+
|
|
74
|
+
// Step 3: Brute-force cosine scan fallback
|
|
75
|
+
return bruteForceCosineSearch(db, queryEmbedding, limit, opts);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Attempt to use sqlite-vss extension for fast approximate search.
|
|
80
|
+
* Returns null if VSS is not available (extension not loaded, table missing, etc.).
|
|
81
|
+
*/
|
|
82
|
+
function tryVssSearch(
|
|
83
|
+
db: SiaDb,
|
|
84
|
+
queryEmbedding: Float32Array,
|
|
85
|
+
limit: number,
|
|
86
|
+
opts?: VectorSearchOpts,
|
|
87
|
+
): VectorResult[] | null {
|
|
88
|
+
const raw = db.rawSqlite();
|
|
89
|
+
if (!raw) return null;
|
|
90
|
+
|
|
91
|
+
try {
|
|
92
|
+
// Serialize embedding to JSON array for vss_search
|
|
93
|
+
const embeddingJson = JSON.stringify(Array.from(queryEmbedding));
|
|
94
|
+
|
|
95
|
+
// Use vss_search to get candidate rowids with distances
|
|
96
|
+
const vssRows = raw
|
|
97
|
+
.prepare(
|
|
98
|
+
`SELECT rowid, distance
|
|
99
|
+
FROM vss_search(graph_nodes_vss, ?, ?)`,
|
|
100
|
+
)
|
|
101
|
+
.all(embeddingJson, limit * 2) as Array<{ rowid: number; distance: number }>;
|
|
102
|
+
|
|
103
|
+
if (!vssRows || vssRows.length === 0) return null;
|
|
104
|
+
|
|
105
|
+
// Map rowids back to entity IDs with filters
|
|
106
|
+
const results: VectorResult[] = [];
|
|
107
|
+
for (const vssRow of vssRows) {
|
|
108
|
+
// Convert distance to similarity score (VSS returns L2 distance)
|
|
109
|
+
const score = 1 / (1 + vssRow.distance);
|
|
110
|
+
if (score < SIMILARITY_THRESHOLD) continue;
|
|
111
|
+
|
|
112
|
+
// Look up entity to apply filters
|
|
113
|
+
const entity = raw
|
|
114
|
+
.prepare(
|
|
115
|
+
`SELECT id, trust_tier, package_path
|
|
116
|
+
FROM graph_nodes
|
|
117
|
+
WHERE rowid = ?
|
|
118
|
+
AND t_valid_until IS NULL
|
|
119
|
+
AND archived_at IS NULL`,
|
|
120
|
+
)
|
|
121
|
+
.get(vssRow.rowid) as
|
|
122
|
+
| { id: string; trust_tier: number; package_path: string | null }
|
|
123
|
+
| undefined;
|
|
124
|
+
|
|
125
|
+
if (!entity) continue;
|
|
126
|
+
if (opts?.paranoid && entity.trust_tier === 4) continue;
|
|
127
|
+
if (opts?.packagePath && entity.package_path !== opts.packagePath) continue;
|
|
128
|
+
|
|
129
|
+
results.push({ entityId: entity.id, score });
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
results.sort((a, b) => b.score - a.score);
|
|
133
|
+
return results.slice(0, limit);
|
|
134
|
+
} catch {
|
|
135
|
+
// VSS extension not loaded or table doesn't exist — fall through to brute-force
|
|
136
|
+
return null;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Brute-force cosine similarity scan.
|
|
142
|
+
*
|
|
143
|
+
* Queries up to BRUTE_FORCE_LIMIT entities that have a non-NULL embedding,
|
|
144
|
+
* computes cosine similarity against the query embedding, filters by
|
|
145
|
+
* threshold and optional constraints, then returns sorted top-N results.
|
|
146
|
+
*/
|
|
147
|
+
async function bruteForceCosineSearch(
|
|
148
|
+
db: SiaDb,
|
|
149
|
+
queryEmbedding: Float32Array,
|
|
150
|
+
limit: number,
|
|
151
|
+
opts?: VectorSearchOpts,
|
|
152
|
+
): Promise<VectorResult[]> {
|
|
153
|
+
// Build WHERE clauses
|
|
154
|
+
const clauses: string[] = [
|
|
155
|
+
"embedding IS NOT NULL",
|
|
156
|
+
"t_valid_until IS NULL",
|
|
157
|
+
"archived_at IS NULL",
|
|
158
|
+
];
|
|
159
|
+
const params: unknown[] = [];
|
|
160
|
+
|
|
161
|
+
if (opts?.paranoid) {
|
|
162
|
+
clauses.push("trust_tier != 4");
|
|
163
|
+
}
|
|
164
|
+
if (opts?.packagePath) {
|
|
165
|
+
clauses.push("package_path = ?");
|
|
166
|
+
params.push(opts.packagePath);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
params.push(BRUTE_FORCE_LIMIT);
|
|
170
|
+
|
|
171
|
+
const sql = `SELECT id, embedding FROM graph_nodes WHERE ${clauses.join(" AND ")} LIMIT ?`;
|
|
172
|
+
const { rows } = await db.execute(sql, params);
|
|
173
|
+
|
|
174
|
+
const results: VectorResult[] = [];
|
|
175
|
+
|
|
176
|
+
for (const row of rows) {
|
|
177
|
+
const embeddingBlob = row.embedding;
|
|
178
|
+
if (!embeddingBlob) continue;
|
|
179
|
+
|
|
180
|
+
// Convert stored BLOB to Float32Array
|
|
181
|
+
let storedEmbedding: Float32Array;
|
|
182
|
+
if (embeddingBlob instanceof Buffer || embeddingBlob instanceof Uint8Array) {
|
|
183
|
+
storedEmbedding = new Float32Array(
|
|
184
|
+
(embeddingBlob as Uint8Array).buffer,
|
|
185
|
+
(embeddingBlob as Uint8Array).byteOffset,
|
|
186
|
+
(embeddingBlob as Uint8Array).byteLength / 4,
|
|
187
|
+
);
|
|
188
|
+
} else if (embeddingBlob instanceof ArrayBuffer) {
|
|
189
|
+
storedEmbedding = new Float32Array(embeddingBlob);
|
|
190
|
+
} else {
|
|
191
|
+
// Unexpected type — skip
|
|
192
|
+
continue;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const score = cosineSim(queryEmbedding, storedEmbedding);
|
|
196
|
+
if (score < SIMILARITY_THRESHOLD) continue;
|
|
197
|
+
|
|
198
|
+
results.push({ entityId: row.id as string, score });
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
results.sort((a, b) => b.score - a.score);
|
|
202
|
+
return results.slice(0, limit);
|
|
203
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
// Module: workspace-search — Workspace-scoped search via ATTACH
|
|
2
|
+
|
|
3
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
4
|
+
import type { SiaSearchResult } from "@/mcp/tools/sia-search";
|
|
5
|
+
import { SIA_HOME } from "@/shared/config";
|
|
6
|
+
import { getPeerRepos } from "@/workspace/cross-repo";
|
|
7
|
+
|
|
8
|
+
/** Options for workspaceSearch. */
|
|
9
|
+
export interface WorkspaceSearchOpts {
|
|
10
|
+
primaryDb: SiaDb;
|
|
11
|
+
metaDb: SiaDb;
|
|
12
|
+
bridgeDb: SiaDb;
|
|
13
|
+
workspaceId: string;
|
|
14
|
+
primaryRepoId: string;
|
|
15
|
+
query: string;
|
|
16
|
+
siaHome?: string;
|
|
17
|
+
limit?: number;
|
|
18
|
+
paranoid?: boolean;
|
|
19
|
+
node_types?: string[];
|
|
20
|
+
package_path?: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Result of a workspace search. */
|
|
24
|
+
export interface WorkspaceSearchResult {
|
|
25
|
+
entities: SiaSearchResult[];
|
|
26
|
+
missingRepos: string[];
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** Max peer repos to ATTACH (SQLite limit of 10 - main - bridge = 8) */
|
|
30
|
+
const MAX_PEERS = 8;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Perform workspace-scoped search across primary + peer repo databases.
|
|
34
|
+
*
|
|
35
|
+
* ATTACHes one peer at a time (ATTACH, query, DETACH, next) to stay safely
|
|
36
|
+
* within SQLite limits and simplify error handling. Results are merged and
|
|
37
|
+
* re-sorted after all peers are queried.
|
|
38
|
+
*
|
|
39
|
+
* Missing peers produce metadata entries, not errors.
|
|
40
|
+
* Does NOT set WAL pragma on attached read-only databases.
|
|
41
|
+
*/
|
|
42
|
+
export async function workspaceSearch(opts: WorkspaceSearchOpts): Promise<WorkspaceSearchResult> {
|
|
43
|
+
const siaHome = opts.siaHome ?? SIA_HOME;
|
|
44
|
+
const limit = opts.limit ?? 15;
|
|
45
|
+
const missingRepos: string[] = [];
|
|
46
|
+
|
|
47
|
+
// Get peers from meta.db
|
|
48
|
+
const allPeers = await getPeerRepos(opts.metaDb, opts.workspaceId, opts.primaryRepoId, siaHome);
|
|
49
|
+
|
|
50
|
+
// Cap at MAX_PEERS
|
|
51
|
+
const peers = allPeers.slice(0, MAX_PEERS);
|
|
52
|
+
if (allPeers.length > MAX_PEERS) {
|
|
53
|
+
for (let i = MAX_PEERS; i < allPeers.length; i++) {
|
|
54
|
+
missingRepos.push(allPeers[i].name ?? allPeers[i].repoId);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Build WHERE clause
|
|
59
|
+
const clauses: string[] = ["t_valid_until IS NULL", "archived_at IS NULL"];
|
|
60
|
+
if (opts.paranoid) clauses.push("trust_tier != 4");
|
|
61
|
+
if (opts.node_types && opts.node_types.length > 0) {
|
|
62
|
+
const placeholders = opts.node_types.map(() => "?").join(", ");
|
|
63
|
+
clauses.push(`type IN (${placeholders})`);
|
|
64
|
+
}
|
|
65
|
+
if (opts.package_path) {
|
|
66
|
+
clauses.push("package_path = ?");
|
|
67
|
+
}
|
|
68
|
+
const whereClause = clauses.join(" AND ");
|
|
69
|
+
|
|
70
|
+
// Build params for WHERE clause (without limit)
|
|
71
|
+
const filterParams: unknown[] = [];
|
|
72
|
+
if (opts.node_types) filterParams.push(...opts.node_types);
|
|
73
|
+
if (opts.package_path) filterParams.push(opts.package_path);
|
|
74
|
+
|
|
75
|
+
// Query primary
|
|
76
|
+
const allEntities: SiaSearchResult[] = [];
|
|
77
|
+
const primarySql = `SELECT * FROM graph_nodes WHERE ${whereClause} ORDER BY importance DESC LIMIT ?`;
|
|
78
|
+
const primaryResult = await opts.primaryDb.execute(primarySql, [...filterParams, limit]);
|
|
79
|
+
|
|
80
|
+
for (const row of primaryResult.rows) {
|
|
81
|
+
allEntities.push(mapRow(row, null));
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Query each peer via ATTACH
|
|
85
|
+
for (const peer of peers) {
|
|
86
|
+
try {
|
|
87
|
+
await opts.primaryDb.execute("ATTACH DATABASE ? AS peer_db", [peer.graphDbPath]);
|
|
88
|
+
|
|
89
|
+
const peerSql = `SELECT * FROM peer_db.graph_nodes WHERE ${whereClause} ORDER BY importance DESC LIMIT ?`;
|
|
90
|
+
const peerResult = await opts.primaryDb.execute(peerSql, [...filterParams, limit]);
|
|
91
|
+
|
|
92
|
+
for (const row of peerResult.rows) {
|
|
93
|
+
allEntities.push(mapRow(row, peer.name));
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
await opts.primaryDb.execute("DETACH DATABASE peer_db", []);
|
|
97
|
+
} catch {
|
|
98
|
+
missingRepos.push(peer.name ?? peer.repoId);
|
|
99
|
+
try {
|
|
100
|
+
await opts.primaryDb.execute("DETACH DATABASE peer_db", []);
|
|
101
|
+
} catch {
|
|
102
|
+
/* already detached or never attached */
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Sort all by importance DESC, take top `limit`
|
|
108
|
+
allEntities.sort((a, b) => b.importance - a.importance);
|
|
109
|
+
const capped = allEntities.slice(0, limit);
|
|
110
|
+
|
|
111
|
+
return { entities: capped, missingRepos };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function mapRow(row: Record<string, unknown>, sourceRepoName: string | null): SiaSearchResult {
|
|
115
|
+
return {
|
|
116
|
+
id: row.id as string,
|
|
117
|
+
type: row.type as string,
|
|
118
|
+
name: row.name as string,
|
|
119
|
+
summary: (row.summary as string) ?? "",
|
|
120
|
+
content: (row.content as string) ?? "",
|
|
121
|
+
trust_tier: row.trust_tier as number,
|
|
122
|
+
confidence: row.confidence as number,
|
|
123
|
+
importance: row.importance as number,
|
|
124
|
+
tags: (row.tags as string) ?? "[]",
|
|
125
|
+
file_paths: (row.file_paths as string) ?? "[]",
|
|
126
|
+
conflict_group_id: (row.conflict_group_id as string | null) ?? null,
|
|
127
|
+
t_valid_from: (row.t_valid_from as number | null) ?? null,
|
|
128
|
+
source_repo_name: sourceRepoName,
|
|
129
|
+
};
|
|
130
|
+
}
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
// Module: context-mode — Large output chunking with intent-based retrieval using strategy pattern
|
|
2
|
+
|
|
3
|
+
import { randomUUID } from "node:crypto";
|
|
4
|
+
import type { Embedder } from "@/capture/embedder";
|
|
5
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
6
|
+
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
// Public interfaces
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
|
|
11
|
+
export interface RawChunk {
|
|
12
|
+
text: string;
|
|
13
|
+
metadata?: Record<string, unknown>;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface StoredChunk {
|
|
17
|
+
id: string;
|
|
18
|
+
text: string;
|
|
19
|
+
embedding: number[];
|
|
20
|
+
nodeId: string; // ContentChunk entity ID in graph
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface ChunkStrategy {
|
|
24
|
+
name: string;
|
|
25
|
+
chunk(content: string): RawChunk[];
|
|
26
|
+
extraEdges?(chunk: StoredChunk, db: SiaDb): Promise<void>;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface ContextModeResult {
|
|
30
|
+
applied: boolean;
|
|
31
|
+
chunks: string[];
|
|
32
|
+
totalIndexed: number;
|
|
33
|
+
contextSavings: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// headingChunker strategy — splits markdown by heading lines outside code blocks
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
export const headingChunker: ChunkStrategy = {
|
|
41
|
+
name: "headingChunker",
|
|
42
|
+
|
|
43
|
+
chunk(content: string): RawChunk[] {
|
|
44
|
+
const lines = content.split("\n");
|
|
45
|
+
const chunks: RawChunk[] = [];
|
|
46
|
+
let inCodeBlock = false;
|
|
47
|
+
let currentHeading = "";
|
|
48
|
+
let currentLines: string[] = [];
|
|
49
|
+
|
|
50
|
+
const flush = () => {
|
|
51
|
+
if (currentLines.length > 0) {
|
|
52
|
+
chunks.push({
|
|
53
|
+
text: currentLines.join("\n"),
|
|
54
|
+
metadata: { heading: currentHeading },
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
for (const line of lines) {
|
|
60
|
+
// Track code fence state
|
|
61
|
+
if (line.trimStart().startsWith("```")) {
|
|
62
|
+
inCodeBlock = !inCodeBlock;
|
|
63
|
+
currentLines.push(line);
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Only split on headings outside code blocks
|
|
68
|
+
if (!inCodeBlock && /^#{1,6} /.test(line)) {
|
|
69
|
+
flush();
|
|
70
|
+
currentHeading = line;
|
|
71
|
+
currentLines = [line];
|
|
72
|
+
} else {
|
|
73
|
+
currentLines.push(line);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
flush();
|
|
78
|
+
|
|
79
|
+
// If no headings were found, return single chunk
|
|
80
|
+
if (chunks.length === 0) {
|
|
81
|
+
return [{ text: content, metadata: { heading: "" } }];
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return chunks;
|
|
85
|
+
},
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
// lineChunker strategy — groups newline-delimited lines into ~512-token (~2048 char) paragraphs
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
const LINE_CHUNK_SIZE = 2048;
|
|
93
|
+
|
|
94
|
+
export const lineChunker: ChunkStrategy = {
|
|
95
|
+
name: "lineChunker",
|
|
96
|
+
|
|
97
|
+
chunk(content: string): RawChunk[] {
|
|
98
|
+
const lines = content.split("\n");
|
|
99
|
+
const chunks: RawChunk[] = [];
|
|
100
|
+
let current = "";
|
|
101
|
+
let currentStart = 0;
|
|
102
|
+
|
|
103
|
+
for (let i = 0; i < lines.length; i++) {
|
|
104
|
+
const line = lines[i];
|
|
105
|
+
const candidate = current.length === 0 ? line : `${current}\n${line}`;
|
|
106
|
+
|
|
107
|
+
if (candidate.length > LINE_CHUNK_SIZE && current.length > 0) {
|
|
108
|
+
chunks.push({
|
|
109
|
+
text: current,
|
|
110
|
+
metadata: { startLine: currentStart, endLine: i - 1 },
|
|
111
|
+
});
|
|
112
|
+
current = line;
|
|
113
|
+
currentStart = i;
|
|
114
|
+
} else {
|
|
115
|
+
current = candidate;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (current.length > 0) {
|
|
120
|
+
chunks.push({
|
|
121
|
+
text: current,
|
|
122
|
+
metadata: { startLine: currentStart, endLine: lines.length - 1 },
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return chunks;
|
|
127
|
+
},
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
// ---------------------------------------------------------------------------
|
|
131
|
+
// contentTypeChunker strategy — detects content type and delegates to the right chunker
|
|
132
|
+
// ---------------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
export const contentTypeChunker: ChunkStrategy = {
|
|
135
|
+
name: "contentTypeChunker",
|
|
136
|
+
|
|
137
|
+
chunk(content: string): RawChunk[] {
|
|
138
|
+
const trimmed = content.trimStart();
|
|
139
|
+
|
|
140
|
+
// Detect JSON (starts with { or [)
|
|
141
|
+
if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
|
|
142
|
+
try {
|
|
143
|
+
const parsed = JSON.parse(content);
|
|
144
|
+
const formatted = JSON.stringify(parsed, null, 2);
|
|
145
|
+
return lineChunker.chunk(formatted);
|
|
146
|
+
} catch {
|
|
147
|
+
// Not valid JSON (possibly truncated by output cap) — fall through to heading/line detection
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Check for markdown headings
|
|
152
|
+
if (/^#{1,6} /m.test(content)) {
|
|
153
|
+
return headingChunker.chunk(content);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Default: lineChunker
|
|
157
|
+
return lineChunker.chunk(content);
|
|
158
|
+
},
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
// ---------------------------------------------------------------------------
|
|
162
|
+
// Cosine similarity between two number arrays
|
|
163
|
+
// ---------------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
function cosineSimilarity(a: number[], b: number[]): number {
|
|
166
|
+
if (a.length === 0 || b.length === 0) return 0;
|
|
167
|
+
let dot = 0;
|
|
168
|
+
let normA = 0;
|
|
169
|
+
let normB = 0;
|
|
170
|
+
const len = Math.min(a.length, b.length);
|
|
171
|
+
for (let i = 0; i < len; i++) {
|
|
172
|
+
dot += a[i] * b[i];
|
|
173
|
+
normA += a[i] * a[i];
|
|
174
|
+
normB += b[i] * b[i];
|
|
175
|
+
}
|
|
176
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
177
|
+
return denom === 0 ? 0 : dot / denom;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// ---------------------------------------------------------------------------
|
|
181
|
+
// applyContextMode — main entry point
|
|
182
|
+
// ---------------------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Apply context mode to a large content string.
|
|
186
|
+
*
|
|
187
|
+
* If content.length <= threshold OR intent is undefined, returns raw content unchanged.
|
|
188
|
+
* Otherwise, chunks the content via the strategy, embeds each chunk into the graph DB
|
|
189
|
+
* as ContentChunk entities, embeds the intent, and returns the top-K chunks by
|
|
190
|
+
* cosine similarity to the intent embedding.
|
|
191
|
+
*/
|
|
192
|
+
export async function applyContextMode(
|
|
193
|
+
content: string,
|
|
194
|
+
intent: string | undefined,
|
|
195
|
+
strategy: ChunkStrategy,
|
|
196
|
+
db: SiaDb,
|
|
197
|
+
embedder: Embedder,
|
|
198
|
+
sessionId: string,
|
|
199
|
+
config: { threshold: number; topK: number },
|
|
200
|
+
): Promise<ContextModeResult> {
|
|
201
|
+
// Short-circuit: below threshold or no intent
|
|
202
|
+
if (content.length <= config.threshold || intent === undefined) {
|
|
203
|
+
return {
|
|
204
|
+
applied: false,
|
|
205
|
+
chunks: [content],
|
|
206
|
+
totalIndexed: 0,
|
|
207
|
+
contextSavings: 0,
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const now = Date.now();
|
|
212
|
+
const nowStr = String(now);
|
|
213
|
+
|
|
214
|
+
// 1. Chunk content via strategy
|
|
215
|
+
const rawChunks = strategy.chunk(content);
|
|
216
|
+
|
|
217
|
+
// 2. Embed each chunk and store as ContentChunk entity in the graph
|
|
218
|
+
const storedChunks: StoredChunk[] = [];
|
|
219
|
+
|
|
220
|
+
for (let i = 0; i < rawChunks.length; i++) {
|
|
221
|
+
const raw = rawChunks[i];
|
|
222
|
+
const nodeId = randomUUID();
|
|
223
|
+
const chunkName = `chunk-${sessionId}-${i}`;
|
|
224
|
+
|
|
225
|
+
// Embed the chunk text
|
|
226
|
+
const rawEmb = await embedder.embed(raw.text);
|
|
227
|
+
const embedding: number[] = rawEmb ? Array.from(rawEmb) : [];
|
|
228
|
+
|
|
229
|
+
// Store entity in graph DB (table is 'graph_nodes' after v5 migration)
|
|
230
|
+
await db.execute(
|
|
231
|
+
`INSERT INTO graph_nodes (id, type, name, summary, content, trust_tier, confidence, base_confidence, importance, base_importance, access_count, edge_count, tags, file_paths, t_created, t_valid_from, created_by, created_at, last_accessed)
|
|
232
|
+
VALUES (?, 'ContentChunk', ?, ?, ?, 3, 0.8, 0.8, 0.5, 0.5, 0, 0, '[]', '[]', ?, ?, 'sia-context-mode', ?, ?)`,
|
|
233
|
+
[nodeId, chunkName, raw.text.slice(0, 100), raw.text, nowStr, nowStr, nowStr, nowStr],
|
|
234
|
+
);
|
|
235
|
+
|
|
236
|
+
const stored: StoredChunk = {
|
|
237
|
+
id: randomUUID(),
|
|
238
|
+
text: raw.text,
|
|
239
|
+
embedding,
|
|
240
|
+
nodeId,
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
storedChunks.push(stored);
|
|
244
|
+
|
|
245
|
+
// Call extraEdges if defined
|
|
246
|
+
if (strategy.extraEdges) {
|
|
247
|
+
await strategy.extraEdges(stored, db);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// 3. Embed the intent
|
|
252
|
+
const intentEmbRaw = await embedder.embed(intent);
|
|
253
|
+
if (!intentEmbRaw) {
|
|
254
|
+
// Embedder failed — cannot do intent-based retrieval, return all chunks
|
|
255
|
+
return {
|
|
256
|
+
applied: true,
|
|
257
|
+
chunks: storedChunks.map((s) => s.text).slice(0, config.topK),
|
|
258
|
+
totalIndexed: storedChunks.length,
|
|
259
|
+
contextSavings:
|
|
260
|
+
content.length -
|
|
261
|
+
storedChunks.slice(0, config.topK).reduce((sum, s) => sum + s.text.length, 0),
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
const intentEmbedding: number[] = Array.from(intentEmbRaw);
|
|
265
|
+
|
|
266
|
+
// 4. Cosine similarity between intent embedding and each stored chunk embedding
|
|
267
|
+
const scored = storedChunks.map((chunk) => ({
|
|
268
|
+
chunk,
|
|
269
|
+
score: cosineSimilarity(intentEmbedding, chunk.embedding),
|
|
270
|
+
}));
|
|
271
|
+
|
|
272
|
+
// Sort by similarity descending, take top-K
|
|
273
|
+
scored.sort((a, b) => b.score - a.score);
|
|
274
|
+
const topChunks = scored.slice(0, config.topK).map((s) => s.chunk.text);
|
|
275
|
+
|
|
276
|
+
const totalIndexed = storedChunks.length;
|
|
277
|
+
const contextSavings = content.length - topChunks.reduce((sum, c) => sum + c.length, 0);
|
|
278
|
+
|
|
279
|
+
return {
|
|
280
|
+
applied: true,
|
|
281
|
+
chunks: topChunks,
|
|
282
|
+
totalIndexed,
|
|
283
|
+
contextSavings: Math.max(0, contextSavings),
|
|
284
|
+
};
|
|
285
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
// Module: credential-pass — Build allowlisted env for sandbox subprocesses
|
|
2
|
+
|
|
3
|
+
/** Exact env var names that always pass through. */
|
|
4
|
+
const EXACT_ALLOWLIST = [
|
|
5
|
+
"PATH",
|
|
6
|
+
"HOME",
|
|
7
|
+
"USER",
|
|
8
|
+
"SHELL",
|
|
9
|
+
"LANG",
|
|
10
|
+
"TERM",
|
|
11
|
+
"KUBECONFIG",
|
|
12
|
+
"GH_TOKEN",
|
|
13
|
+
"GITHUB_TOKEN",
|
|
14
|
+
"NODE_PATH",
|
|
15
|
+
"BUN_INSTALL",
|
|
16
|
+
] as const;
|
|
17
|
+
|
|
18
|
+
/** Glob prefixes — any env var starting with these passes through. */
|
|
19
|
+
const PREFIX_ALLOWLIST = ["AWS_", "GOOGLE_", "GCLOUD_", "CLOUDSDK_", "DOCKER_", "GITHUB_"] as const;
|
|
20
|
+
|
|
21
|
+
/** Exported for test assertions. */
|
|
22
|
+
export const ENV_ALLOWLIST = { exact: EXACT_ALLOWLIST, prefixes: PREFIX_ALLOWLIST };
|
|
23
|
+
|
|
24
|
+
function isAllowlisted(key: string): boolean {
|
|
25
|
+
if ((EXACT_ALLOWLIST as readonly string[]).includes(key)) return true;
|
|
26
|
+
return PREFIX_ALLOWLIST.some((prefix) => key.startsWith(prefix));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Build a filtered env object for sandbox subprocess execution.
|
|
31
|
+
* Only allowlisted env vars from process.env pass through.
|
|
32
|
+
* `overrides` are merged last — user-provided values win.
|
|
33
|
+
* Never logs or persists any env values.
|
|
34
|
+
*/
|
|
35
|
+
export function buildSandboxEnv(overrides?: Record<string, string>): Record<string, string> {
|
|
36
|
+
const env: Record<string, string> = {};
|
|
37
|
+
|
|
38
|
+
for (const [key, value] of Object.entries(process.env)) {
|
|
39
|
+
if (value !== undefined && isAllowlisted(key)) {
|
|
40
|
+
env[key] = value;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (overrides) {
|
|
45
|
+
for (const [key, value] of Object.entries(overrides)) {
|
|
46
|
+
if (isAllowlisted(key)) {
|
|
47
|
+
env[key] = value;
|
|
48
|
+
} else {
|
|
49
|
+
console.warn(`[sia-sandbox] env override "${key}" dropped: not in allowlist`);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return env;
|
|
55
|
+
}
|