@rkarim08/sia 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +35 -0
- package/.claude-plugin/plugin.json +27 -0
- package/.mcp.json +13 -0
- package/CLAUDE.md +226 -0
- package/LICENSE +202 -0
- package/PLUGIN_README.md +253 -0
- package/README.md +1013 -0
- package/agents/sia-changelog-writer.md +89 -0
- package/agents/sia-code-reviewer.md +86 -0
- package/agents/sia-conflict-resolver.md +100 -0
- package/agents/sia-convention-enforcer.md +69 -0
- package/agents/sia-debug.md +106 -0
- package/agents/sia-decision-reviewer.md +101 -0
- package/agents/sia-dependency-tracker.md +80 -0
- package/agents/sia-explain.md +126 -0
- package/agents/sia-feature.md +116 -0
- package/agents/sia-knowledge-capture.md +117 -0
- package/agents/sia-lead-architecture-advisor.md +93 -0
- package/agents/sia-lead-team-health.md +107 -0
- package/agents/sia-migration.md +100 -0
- package/agents/sia-onboarding.md +115 -0
- package/agents/sia-orientation.md +99 -0
- package/agents/sia-pm-briefing.md +106 -0
- package/agents/sia-pm-risk-advisor.md +82 -0
- package/agents/sia-qa-analyst.md +116 -0
- package/agents/sia-qa-regression-map.md +94 -0
- package/agents/sia-refactor.md +115 -0
- package/agents/sia-regression.md +112 -0
- package/agents/sia-security-audit.md +125 -0
- package/agents/sia-test-advisor.md +91 -0
- package/hooks/hooks.json +98 -0
- package/migrations/bridge/001_initial.sql +34 -0
- package/migrations/episodic/001_initial.sql +35 -0
- package/migrations/meta/001_initial.sql +68 -0
- package/migrations/semantic/001_initial.sql +292 -0
- package/migrations/semantic/002_ontology.sql +89 -0
- package/migrations/semantic/003_freshness.sql +63 -0
- package/migrations/semantic/004_v5_unified_schema.sql +194 -0
- package/migrations/semantic/005_backfill_event_kinds.sql +8 -0
- package/migrations/semantic/006_tree_sitter.sql +6 -0
- package/migrations/semantic/007_branch_snapshots.sql +22 -0
- package/package.json +110 -0
- package/scripts/branch-switch.sh +13 -0
- package/scripts/build-wasm-grammars.sh +81 -0
- package/scripts/post-compact.sh +8 -0
- package/scripts/post-tool-use.sh +10 -0
- package/scripts/pre-compact.sh +8 -0
- package/scripts/session-end.sh +8 -0
- package/scripts/session-start.sh +8 -0
- package/scripts/start-mcp.ts +45 -0
- package/scripts/stop-hook.sh +8 -0
- package/scripts/user-prompt-submit.sh +8 -0
- package/scripts/viz-server.ts +152 -0
- package/skills/sia-brainstorm/SKILL.md +156 -0
- package/skills/sia-brainstorm/scripts/frame-template.html +214 -0
- package/skills/sia-brainstorm/scripts/helper.js +95 -0
- package/skills/sia-brainstorm/scripts/server.cjs +338 -0
- package/skills/sia-brainstorm/scripts/start-server.sh +153 -0
- package/skills/sia-brainstorm/scripts/stop-server.sh +55 -0
- package/skills/sia-brainstorm/spec-document-reviewer-prompt.md +49 -0
- package/skills/sia-brainstorm/visual-companion.md +286 -0
- package/skills/sia-capture/SKILL.md +64 -0
- package/skills/sia-compare/SKILL.md +33 -0
- package/skills/sia-conflicts/SKILL.md +38 -0
- package/skills/sia-debug-workflow/SKILL.md +120 -0
- package/skills/sia-debug-workflow/root-cause-tracing.md +70 -0
- package/skills/sia-debug-workflow/scripts/find-polluter.sh +64 -0
- package/skills/sia-debug-workflow/temporal-investigation.md +72 -0
- package/skills/sia-digest/SKILL.md +23 -0
- package/skills/sia-dispatch/SKILL.md +69 -0
- package/skills/sia-dispatch/agent-task-template.md +99 -0
- package/skills/sia-doctor/SKILL.md +39 -0
- package/skills/sia-execute/SKILL.md +70 -0
- package/skills/sia-execute-plan/SKILL.md +85 -0
- package/skills/sia-export-import/SKILL.md +49 -0
- package/skills/sia-export-knowledge/SKILL.md +46 -0
- package/skills/sia-finish/SKILL.md +100 -0
- package/skills/sia-finish/pr-summary-template.md +54 -0
- package/skills/sia-freshness/SKILL.md +38 -0
- package/skills/sia-history/SKILL.md +42 -0
- package/skills/sia-impact/SKILL.md +70 -0
- package/skills/sia-index/SKILL.md +54 -0
- package/skills/sia-install/SKILL.md +39 -0
- package/skills/sia-lead-compliance/SKILL.md +16 -0
- package/skills/sia-lead-drift-report/SKILL.md +16 -0
- package/skills/sia-lead-knowledge-map/SKILL.md +16 -0
- package/skills/sia-learn/SKILL.md +58 -0
- package/skills/sia-plan/SKILL.md +68 -0
- package/skills/sia-plan/plan-reviewer-prompt.md +63 -0
- package/skills/sia-playbooks/SKILL.md +29 -0
- package/skills/sia-playbooks/reference-feature.md +100 -0
- package/skills/sia-playbooks/reference-flagging.md +50 -0
- package/skills/sia-playbooks/reference-orientation.md +92 -0
- package/skills/sia-playbooks/reference-regression.md +115 -0
- package/skills/sia-playbooks/reference-review.md +64 -0
- package/skills/sia-playbooks/reference-tools.md +239 -0
- package/skills/sia-pm-decision-log/SKILL.md +28 -0
- package/skills/sia-pm-risk-dashboard/SKILL.md +24 -0
- package/skills/sia-pm-sprint-summary/SKILL.md +27 -0
- package/skills/sia-prune/SKILL.md +45 -0
- package/skills/sia-qa-coverage/SKILL.md +28 -0
- package/skills/sia-qa-flaky/SKILL.md +20 -0
- package/skills/sia-qa-report/SKILL.md +26 -0
- package/skills/sia-reindex/SKILL.md +30 -0
- package/skills/sia-review-respond/SKILL.md +88 -0
- package/skills/sia-review-respond/pushback-patterns.md +90 -0
- package/skills/sia-search/SKILL.md +47 -0
- package/skills/sia-setup/SKILL.md +82 -0
- package/skills/sia-setup/setup-checklist.md +97 -0
- package/skills/sia-stats/SKILL.md +36 -0
- package/skills/sia-status/SKILL.md +44 -0
- package/skills/sia-sync/SKILL.md +46 -0
- package/skills/sia-team/SKILL.md +64 -0
- package/skills/sia-test/SKILL.md +92 -0
- package/skills/sia-test/testing-anti-patterns.md +104 -0
- package/skills/sia-tour/SKILL.md +29 -0
- package/skills/sia-upgrade/SKILL.md +43 -0
- package/skills/sia-verify/SKILL.md +81 -0
- package/skills/sia-visualize/SKILL.md +28 -0
- package/skills/sia-visualize-live/SKILL.md +55 -0
- package/skills/sia-visualize-live/scripts/graph-template.html +389 -0
- package/skills/sia-visualize-live/scripts/start-visualizer.sh +161 -0
- package/skills/sia-visualize-live/scripts/stop-visualizer.sh +55 -0
- package/skills/sia-visualize-live/scripts/visualizer-server.cjs +264 -0
- package/skills/sia-workspace/SKILL.md +57 -0
- package/src/agent/claude-md-template-flagging.md +219 -0
- package/src/agent/claude-md-template.md +213 -0
- package/src/agent/modules/sia-feature.md +100 -0
- package/src/agent/modules/sia-flagging.md +50 -0
- package/src/agent/modules/sia-orientation.md +92 -0
- package/src/agent/modules/sia-regression.md +115 -0
- package/src/agent/modules/sia-review.md +64 -0
- package/src/agent/modules/sia-tools.md +239 -0
- package/src/ast/extractors/c-include.ts +189 -0
- package/src/ast/extractors/csharp-project.ts +260 -0
- package/src/ast/extractors/prisma-schema.ts +44 -0
- package/src/ast/extractors/project-manifest.ts +111 -0
- package/src/ast/extractors/sql-schema.ts +67 -0
- package/src/ast/extractors/tier-a.ts +423 -0
- package/src/ast/extractors/tier-b.ts +289 -0
- package/src/ast/extractors/tier-dispatch.ts +247 -0
- package/src/ast/index-worker.ts +108 -0
- package/src/ast/indexer.ts +484 -0
- package/src/ast/languages.ts +408 -0
- package/src/ast/pagerank-builder.ts +125 -0
- package/src/ast/path-utils.ts +137 -0
- package/src/ast/tree-sitter/backends/native.ts +57 -0
- package/src/ast/tree-sitter/backends/wasm.ts +39 -0
- package/src/ast/tree-sitter/call-walker.ts +44 -0
- package/src/ast/tree-sitter/edit-computer.ts +55 -0
- package/src/ast/tree-sitter/query-runner.ts +46 -0
- package/src/ast/tree-sitter/service.ts +174 -0
- package/src/ast/tree-sitter/tree-cache.ts +39 -0
- package/src/ast/tree-sitter/types.ts +79 -0
- package/src/ast/watcher.ts +322 -0
- package/src/capture/chunker.ts +169 -0
- package/src/capture/consolidate.ts +127 -0
- package/src/capture/edge-inferrer.ts +161 -0
- package/src/capture/embedder.ts +166 -0
- package/src/capture/embedding-cache.ts +73 -0
- package/src/capture/flag-processor.ts +64 -0
- package/src/capture/hook.ts +67 -0
- package/src/capture/pipeline.ts +450 -0
- package/src/capture/prompts/consolidate.ts +25 -0
- package/src/capture/prompts/edge-infer.ts +29 -0
- package/src/capture/prompts/extract-flagged.ts +36 -0
- package/src/capture/prompts/extract.ts +42 -0
- package/src/capture/tokenizer.ts +147 -0
- package/src/capture/track-a-ast.ts +93 -0
- package/src/capture/track-b-llm.ts +149 -0
- package/src/capture/types.ts +64 -0
- package/src/cli/commands/community.ts +137 -0
- package/src/cli/commands/compare.ts +123 -0
- package/src/cli/commands/conflicts.ts +41 -0
- package/src/cli/commands/digest.ts +197 -0
- package/src/cli/commands/disable-flagging.ts +34 -0
- package/src/cli/commands/doctor.ts +240 -0
- package/src/cli/commands/download-model.ts +161 -0
- package/src/cli/commands/enable-flagging.ts +34 -0
- package/src/cli/commands/export-knowledge.ts +208 -0
- package/src/cli/commands/export.ts +85 -0
- package/src/cli/commands/freshness.ts +164 -0
- package/src/cli/commands/graph.ts +51 -0
- package/src/cli/commands/history.ts +139 -0
- package/src/cli/commands/import.ts +335 -0
- package/src/cli/commands/install.ts +156 -0
- package/src/cli/commands/lead-report.ts +241 -0
- package/src/cli/commands/learn.ts +321 -0
- package/src/cli/commands/pm-report.ts +413 -0
- package/src/cli/commands/prune.ts +75 -0
- package/src/cli/commands/qa-report.ts +278 -0
- package/src/cli/commands/reindex.ts +104 -0
- package/src/cli/commands/rollback.ts +70 -0
- package/src/cli/commands/search.ts +103 -0
- package/src/cli/commands/server.ts +91 -0
- package/src/cli/commands/share.ts +33 -0
- package/src/cli/commands/stats.ts +79 -0
- package/src/cli/commands/status.ts +176 -0
- package/src/cli/commands/sync.ts +96 -0
- package/src/cli/commands/team.ts +118 -0
- package/src/cli/commands/tour.ts +157 -0
- package/src/cli/commands/visualize-live.ts +162 -0
- package/src/cli/commands/workspace.ts +117 -0
- package/src/cli/index.ts +424 -0
- package/src/cli/learn-progress.ts +87 -0
- package/src/community/detection-bridge.ts +344 -0
- package/src/community/leiden.ts +462 -0
- package/src/community/raptor.ts +210 -0
- package/src/community/scheduler.ts +74 -0
- package/src/community/summarize.ts +115 -0
- package/src/decay/archiver.ts +73 -0
- package/src/decay/bridge-orphan-cleanup.ts +212 -0
- package/src/decay/consolidation-sweep.ts +112 -0
- package/src/decay/decay.ts +116 -0
- package/src/decay/deep-validator.ts +62 -0
- package/src/decay/episodic-promoter.ts +132 -0
- package/src/decay/maintenance-scheduler.ts +326 -0
- package/src/decay/scheduler.ts +6 -0
- package/src/decay/session-sweeper.ts +79 -0
- package/src/decay/types.ts +17 -0
- package/src/freshness/confidence-decay.ts +122 -0
- package/src/freshness/cuckoo-filter.ts +176 -0
- package/src/freshness/deep-validation.ts +345 -0
- package/src/freshness/dirty-tracker.ts +237 -0
- package/src/freshness/file-watcher-layer.ts +119 -0
- package/src/freshness/firewall.ts +64 -0
- package/src/freshness/git-reconcile-layer.ts +161 -0
- package/src/freshness/inverted-index.ts +158 -0
- package/src/freshness/stale-read-layer.ts +222 -0
- package/src/graph/audit.ts +69 -0
- package/src/graph/bridge-db.ts +141 -0
- package/src/graph/communities.ts +195 -0
- package/src/graph/db-interface.ts +259 -0
- package/src/graph/edges.ts +163 -0
- package/src/graph/entities.ts +327 -0
- package/src/graph/episodic-db.ts +113 -0
- package/src/graph/flags.ts +31 -0
- package/src/graph/meta-db.ts +200 -0
- package/src/graph/semantic-db.ts +101 -0
- package/src/graph/session-resume.ts +56 -0
- package/src/graph/snapshots.ts +342 -0
- package/src/graph/staging.ts +151 -0
- package/src/graph/types.ts +128 -0
- package/src/hooks/adapters/claude-code.ts +21 -0
- package/src/hooks/adapters/cline.ts +43 -0
- package/src/hooks/adapters/cursor.ts +65 -0
- package/src/hooks/adapters/generic.ts +12 -0
- package/src/hooks/agent-detect.ts +34 -0
- package/src/hooks/claude-md-directives.ts +32 -0
- package/src/hooks/event-router.ts +182 -0
- package/src/hooks/extractors/pattern-detector.ts +111 -0
- package/src/hooks/handlers/post-compact.ts +30 -0
- package/src/hooks/handlers/post-tool-use.ts +403 -0
- package/src/hooks/handlers/pre-compact.ts +100 -0
- package/src/hooks/handlers/session-end.ts +47 -0
- package/src/hooks/handlers/session-start.ts +154 -0
- package/src/hooks/handlers/stop.ts +128 -0
- package/src/hooks/handlers/user-prompt-submit.ts +68 -0
- package/src/hooks/plugin-branch-switch.ts +68 -0
- package/src/hooks/plugin-common.ts +47 -0
- package/src/hooks/plugin-post-compact.ts +28 -0
- package/src/hooks/plugin-post-tool-use.ts +38 -0
- package/src/hooks/plugin-pre-compact.ts +37 -0
- package/src/hooks/plugin-session-end.ts +37 -0
- package/src/hooks/plugin-session-start.ts +75 -0
- package/src/hooks/plugin-stop.ts +61 -0
- package/src/hooks/plugin-user-prompt-submit.ts +47 -0
- package/src/hooks/types.ts +43 -0
- package/src/knowledge/discovery.ts +238 -0
- package/src/knowledge/external-refs.ts +98 -0
- package/src/knowledge/freshness.ts +221 -0
- package/src/knowledge/ingest.ts +330 -0
- package/src/knowledge/markdown-export.ts +229 -0
- package/src/knowledge/markdown-import.ts +359 -0
- package/src/knowledge/patterns.ts +74 -0
- package/src/knowledge/templates.ts +307 -0
- package/src/llm/ai-sdk-adapter.ts +46 -0
- package/src/llm/config.ts +88 -0
- package/src/llm/cost-tracker.ts +110 -0
- package/src/llm/prompts/extraction.ts +55 -0
- package/src/llm/prompts/summarization.ts +36 -0
- package/src/llm/prompts/validation.ts +37 -0
- package/src/llm/provider-registry.ts +68 -0
- package/src/llm/reliability.ts +179 -0
- package/src/llm/schemas.ts +52 -0
- package/src/mcp/freshness-annotator.ts +69 -0
- package/src/mcp/server.ts +949 -0
- package/src/mcp/tools/sia-ast-query.ts +225 -0
- package/src/mcp/tools/sia-at-time.ts +151 -0
- package/src/mcp/tools/sia-backlinks.ts +87 -0
- package/src/mcp/tools/sia-batch-execute.ts +169 -0
- package/src/mcp/tools/sia-by-file.ts +89 -0
- package/src/mcp/tools/sia-community.ts +113 -0
- package/src/mcp/tools/sia-doctor.ts +73 -0
- package/src/mcp/tools/sia-execute-file.ts +122 -0
- package/src/mcp/tools/sia-execute.ts +104 -0
- package/src/mcp/tools/sia-expand.ts +158 -0
- package/src/mcp/tools/sia-fetch-and-index.ts +241 -0
- package/src/mcp/tools/sia-flag.ts +65 -0
- package/src/mcp/tools/sia-index.ts +111 -0
- package/src/mcp/tools/sia-note.ts +134 -0
- package/src/mcp/tools/sia-search.ts +105 -0
- package/src/mcp/tools/sia-stats.ts +63 -0
- package/src/mcp/tools/sia-sync-status.ts +44 -0
- package/src/mcp/tools/sia-upgrade.ts +247 -0
- package/src/mcp/truncate.ts +231 -0
- package/src/native/bridge.ts +167 -0
- package/src/native/fallback-ast-diff.ts +144 -0
- package/src/native/fallback-graph.ts +325 -0
- package/src/ontology/constraints.ts +56 -0
- package/src/ontology/errors.ts +8 -0
- package/src/ontology/middleware.ts +266 -0
- package/src/retrieval/bm25-search.ts +151 -0
- package/src/retrieval/context-assembly.ts +76 -0
- package/src/retrieval/graph-traversal.ts +168 -0
- package/src/retrieval/pagerank.ts +40 -0
- package/src/retrieval/query-classifier.ts +106 -0
- package/src/retrieval/reranker.ts +156 -0
- package/src/retrieval/search.ts +236 -0
- package/src/retrieval/throttle.ts +102 -0
- package/src/retrieval/vector-search.ts +203 -0
- package/src/retrieval/workspace-search.ts +130 -0
- package/src/sandbox/context-mode.ts +285 -0
- package/src/sandbox/credential-pass.ts +55 -0
- package/src/sandbox/executor.ts +235 -0
- package/src/security/pattern-detector.ts +127 -0
- package/src/security/rule-of-two.ts +50 -0
- package/src/security/sanitize.ts +46 -0
- package/src/security/semantic-consistency.ts +93 -0
- package/src/security/staging-promoter.ts +154 -0
- package/src/shared/config.ts +302 -0
- package/src/shared/diagnostics.ts +210 -0
- package/src/shared/errors.ts +48 -0
- package/src/shared/git-utils.ts +143 -0
- package/src/shared/llm-client.ts +120 -0
- package/src/shared/logger.ts +99 -0
- package/src/shared/types.ts +79 -0
- package/src/sync/client.ts +43 -0
- package/src/sync/conflict.ts +106 -0
- package/src/sync/dedup.ts +183 -0
- package/src/sync/hlc.ts +117 -0
- package/src/sync/keychain.ts +144 -0
- package/src/sync/pull.ts +232 -0
- package/src/sync/push.ts +131 -0
- package/src/types/chokidar.d.ts +23 -0
- package/src/visualization/graph-renderer.ts +312 -0
- package/src/visualization/subgraph-extract.ts +208 -0
- package/src/visualization/views/community-clusters.ts +246 -0
- package/src/visualization/views/dependency-map.ts +189 -0
- package/src/visualization/views/graph-explorer.ts +364 -0
- package/src/visualization/views/timeline.ts +247 -0
- package/src/workspace/api-contracts.ts +226 -0
- package/src/workspace/cross-repo.ts +61 -0
- package/src/workspace/detector.ts +190 -0
- package/src/workspace/manifest.ts +141 -0
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
// Module: sia-fetch-and-index — Fetch a URL, convert to markdown, and index via contentTypeChunker
|
|
2
|
+
|
|
3
|
+
import { randomUUID } from "node:crypto";
|
|
4
|
+
import * as dns from "node:dns/promises";
|
|
5
|
+
import TurndownService from "turndown";
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import type { Embedder } from "@/capture/embedder";
|
|
8
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
9
|
+
import { contentTypeChunker } from "@/sandbox/context-mode";
|
|
10
|
+
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// Input / Output types
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
|
|
15
|
+
export const SiaFetchAndIndexInput = z.object({
|
|
16
|
+
url: z.string(),
|
|
17
|
+
intent: z.string().optional(),
|
|
18
|
+
tags: z.array(z.string()).optional(),
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
export interface SiaFetchAndIndexResult {
|
|
22
|
+
indexed?: number;
|
|
23
|
+
contentType?: string;
|
|
24
|
+
sourceUrl?: string;
|
|
25
|
+
externalRefId?: string;
|
|
26
|
+
error?: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// SSRF Protection
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Returns true if the given IP address is a private/loopback/link-local address.
|
|
35
|
+
* Checks IPv4 ranges: 127.0.0.0/8, 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16,
|
|
36
|
+
* 169.254.0.0/16, and IPv6 loopback ::1.
|
|
37
|
+
*/
|
|
38
|
+
export function isPrivateIp(ip: string): boolean {
|
|
39
|
+
// IPv4-mapped IPv6 (e.g. ::ffff:127.0.0.1)
|
|
40
|
+
if (ip.startsWith("::ffff:")) {
|
|
41
|
+
return isPrivateIp(ip.slice(7));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// IPv6 loopback
|
|
45
|
+
if (ip === "::1" || ip === "0:0:0:0:0:0:0:1") {
|
|
46
|
+
return true;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Parse IPv4 octets
|
|
50
|
+
const parts = ip.split(".");
|
|
51
|
+
if (parts.length !== 4) {
|
|
52
|
+
// Non-IPv4 that isn't ::1 — treat as potentially private to be safe
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const [a, b, _c] = parts.map(Number);
|
|
57
|
+
|
|
58
|
+
// 127.0.0.0/8 — loopback
|
|
59
|
+
if (a === 127) return true;
|
|
60
|
+
// 10.0.0.0/8 — private
|
|
61
|
+
if (a === 10) return true;
|
|
62
|
+
// 172.16.0.0/12 — private (172.16.x.x through 172.31.x.x)
|
|
63
|
+
if (a === 172 && b >= 16 && b <= 31) return true;
|
|
64
|
+
// 192.168.0.0/16 — private
|
|
65
|
+
if (a === 192 && b === 168) return true;
|
|
66
|
+
// 169.254.0.0/16 — link-local
|
|
67
|
+
if (a === 169 && b === 254) return true;
|
|
68
|
+
// 0.0.0.0/8 — this network
|
|
69
|
+
if (a === 0) return true;
|
|
70
|
+
|
|
71
|
+
return false;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
// handleSiaFetchAndIndex
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
export async function handleSiaFetchAndIndex(
|
|
79
|
+
db: SiaDb,
|
|
80
|
+
input: z.infer<typeof SiaFetchAndIndexInput>,
|
|
81
|
+
_embedder: Embedder,
|
|
82
|
+
sessionId: string,
|
|
83
|
+
): Promise<SiaFetchAndIndexResult> {
|
|
84
|
+
const { url, tags } = input;
|
|
85
|
+
|
|
86
|
+
// 1. Parse URL — error if invalid
|
|
87
|
+
let parsed: URL;
|
|
88
|
+
try {
|
|
89
|
+
parsed = new URL(url);
|
|
90
|
+
} catch {
|
|
91
|
+
return { error: `Invalid URL: ${url}` };
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// 2. Block non-HTTP(S) schemes
|
|
95
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
96
|
+
return {
|
|
97
|
+
error: `Only HTTP and HTTPS URLs are supported. Got: ${parsed.protocol}`,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// 3. Resolve hostname via DNS → block private IPs
|
|
102
|
+
const hostname = parsed.hostname;
|
|
103
|
+
let resolvedIps: string[] = [];
|
|
104
|
+
try {
|
|
105
|
+
resolvedIps = await dns.resolve(hostname);
|
|
106
|
+
} catch {
|
|
107
|
+
// If DNS resolution fails entirely, attempt lookup as fallback
|
|
108
|
+
try {
|
|
109
|
+
const lookupResult = await dns.lookup(hostname);
|
|
110
|
+
resolvedIps = [lookupResult.address];
|
|
111
|
+
} catch (lookupErr) {
|
|
112
|
+
return {
|
|
113
|
+
error: `DNS resolution failed for host: ${hostname} (${(lookupErr as Error).message})`,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
for (const ip of resolvedIps) {
|
|
119
|
+
if (isPrivateIp(ip)) {
|
|
120
|
+
return {
|
|
121
|
+
error: `Blocked: ${hostname} resolves to private IP ${ip}`,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// 4. Fetch with timeout (30s), User-Agent header
|
|
127
|
+
// Use the resolved IP directly to prevent DNS rebinding (TOCTOU attack):
|
|
128
|
+
// the hostname is sent via the Host header so the server responds correctly.
|
|
129
|
+
const MAX_CONTENT_LENGTH = 5 * 1024 * 1024; // 5 MB cap
|
|
130
|
+
const controller = new AbortController();
|
|
131
|
+
const timeout = setTimeout(() => controller.abort(), 30_000);
|
|
132
|
+
|
|
133
|
+
const resolvedUrl = new URL(url);
|
|
134
|
+
resolvedUrl.hostname = resolvedIps[0];
|
|
135
|
+
|
|
136
|
+
let response: Response;
|
|
137
|
+
try {
|
|
138
|
+
response = await fetch(resolvedUrl.toString(), {
|
|
139
|
+
signal: controller.signal,
|
|
140
|
+
headers: {
|
|
141
|
+
Host: hostname,
|
|
142
|
+
"User-Agent": "Sia/1.0 (knowledge graph indexer)",
|
|
143
|
+
Accept: "text/html,text/markdown,application/json,text/plain,*/*",
|
|
144
|
+
},
|
|
145
|
+
});
|
|
146
|
+
} catch (err) {
|
|
147
|
+
clearTimeout(timeout);
|
|
148
|
+
return { error: `Fetch failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
149
|
+
} finally {
|
|
150
|
+
clearTimeout(timeout);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
if (!response.ok) {
|
|
154
|
+
return { error: `HTTP ${response.status}: ${response.statusText}` };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// 5. Content-type detection & size cap
|
|
158
|
+
const rawContentType = response.headers.get("content-type") ?? "text/plain";
|
|
159
|
+
const contentType = rawContentType.split(";")[0].trim().toLowerCase();
|
|
160
|
+
|
|
161
|
+
// Check content-length header first
|
|
162
|
+
const contentLengthHeader = response.headers.get("content-length");
|
|
163
|
+
if (contentLengthHeader && Number(contentLengthHeader) > MAX_CONTENT_LENGTH) {
|
|
164
|
+
return {
|
|
165
|
+
error: `Response too large: ${contentLengthHeader} bytes (max ${MAX_CONTENT_LENGTH})`,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
let rawBody: string;
|
|
170
|
+
try {
|
|
171
|
+
const buffer = await response.arrayBuffer();
|
|
172
|
+
if (buffer.byteLength > MAX_CONTENT_LENGTH) {
|
|
173
|
+
return {
|
|
174
|
+
error: `Response too large: ${buffer.byteLength} bytes (max ${MAX_CONTENT_LENGTH})`,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
rawBody = new TextDecoder().decode(buffer);
|
|
178
|
+
} catch (err) {
|
|
179
|
+
return {
|
|
180
|
+
error: `Failed to read response body: ${err instanceof Error ? err.message : String(err)}`,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// 6. HTML → markdown via turndown
|
|
185
|
+
let processedContent: string;
|
|
186
|
+
if (contentType === "text/html") {
|
|
187
|
+
const td = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
|
|
188
|
+
processedContent = td.turndown(rawBody);
|
|
189
|
+
} else {
|
|
190
|
+
processedContent = rawBody;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// 7. Chunk content with contentTypeChunker and insert with trust_tier 4
|
|
194
|
+
const tagsJson = JSON.stringify(tags ?? []);
|
|
195
|
+
const now = Date.now();
|
|
196
|
+
const nowStr = String(now);
|
|
197
|
+
|
|
198
|
+
const rawChunks = contentTypeChunker.chunk(processedContent);
|
|
199
|
+
const chunkIds: string[] = [];
|
|
200
|
+
|
|
201
|
+
for (let i = 0; i < rawChunks.length; i++) {
|
|
202
|
+
const raw = rawChunks[i];
|
|
203
|
+
const nodeId = randomUUID();
|
|
204
|
+
const chunkName = `fetch-chunk-${sessionId}-${i}`;
|
|
205
|
+
const summary = raw.text.slice(0, 100);
|
|
206
|
+
|
|
207
|
+
await db.execute(
|
|
208
|
+
`INSERT INTO graph_nodes (id, type, name, summary, content, trust_tier, confidence, base_confidence, importance, base_importance, access_count, edge_count, tags, file_paths, t_created, t_valid_from, created_by, created_at, last_accessed)
|
|
209
|
+
VALUES (?, 'ContentChunk', ?, ?, ?, 4, 0.7, 0.7, 0.4, 0.4, 0, 0, ?, '[]', ?, ?, 'sia-fetch-and-index', ?, ?)`,
|
|
210
|
+
[nodeId, chunkName, summary, raw.text, tagsJson, nowStr, nowStr, nowStr, nowStr],
|
|
211
|
+
);
|
|
212
|
+
|
|
213
|
+
chunkIds.push(nodeId);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// 8. Create ExternalRef node in graph_nodes
|
|
217
|
+
const externalRefId = randomUUID();
|
|
218
|
+
await db.execute(
|
|
219
|
+
`INSERT INTO graph_nodes (id, type, name, summary, content, trust_tier, confidence, base_confidence, importance, base_importance, access_count, edge_count, tags, file_paths, t_created, t_valid_from, created_by, created_at, last_accessed)
|
|
220
|
+
VALUES (?, 'ExternalRef', ?, ?, ?, 4, 0.7, 0.7, 0.4, 0.4, 0, 0, ?, '[]', ?, ?, 'sia-fetch-and-index', ?, ?)`,
|
|
221
|
+
[
|
|
222
|
+
externalRefId,
|
|
223
|
+
url,
|
|
224
|
+
`External reference: ${url}`,
|
|
225
|
+
url,
|
|
226
|
+
tagsJson,
|
|
227
|
+
nowStr,
|
|
228
|
+
nowStr,
|
|
229
|
+
nowStr,
|
|
230
|
+
nowStr,
|
|
231
|
+
],
|
|
232
|
+
);
|
|
233
|
+
|
|
234
|
+
// 9. Return result
|
|
235
|
+
return {
|
|
236
|
+
indexed: chunkIds.length,
|
|
237
|
+
contentType,
|
|
238
|
+
sourceUrl: url,
|
|
239
|
+
externalRefId,
|
|
240
|
+
};
|
|
241
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
// Module: sia-flag — Flag the current session for human review
|
|
2
|
+
|
|
3
|
+
import { v4 as uuid } from "uuid";
|
|
4
|
+
import type { z } from "zod";
|
|
5
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
6
|
+
import type { SiaFlagInput as SiaFlagInputSchema } from "@/mcp/server";
|
|
7
|
+
|
|
8
|
+
export type SiaFlagInput = z.infer<typeof SiaFlagInputSchema>;
|
|
9
|
+
|
|
10
|
+
export interface SiaFlagConfig {
|
|
11
|
+
enableFlagging: boolean;
|
|
12
|
+
sessionId: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface SiaFlagResult {
|
|
16
|
+
flagged?: boolean;
|
|
17
|
+
id?: string;
|
|
18
|
+
error?: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Sanitize a flag reason string.
|
|
23
|
+
*
|
|
24
|
+
* Strips: < > { } [ ] \ " and control characters (0x00-0x1F, 0x7F).
|
|
25
|
+
* Keeps: `: backticks _ / # @ ( ) . , ' -` and all normal printable chars.
|
|
26
|
+
* Truncates to 100 characters after sanitization.
|
|
27
|
+
*/
|
|
28
|
+
export function sanitizeReason(raw: string): string {
|
|
29
|
+
// Remove < > { } [ ] \ " and control chars (0x00-0x1F, 0x7F)
|
|
30
|
+
// biome-ignore lint/suspicious/noControlCharactersInRegex: intentionally stripping control characters for sanitization
|
|
31
|
+
const cleaned = raw.replace(/[<>{}[\]\\"]/g, "").replace(/[\x00-\x1f\x7f]/g, "");
|
|
32
|
+
return cleaned.slice(0, 100);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Handle a sia_flag request: insert a flag into session_flags for human review.
|
|
37
|
+
*/
|
|
38
|
+
export async function handleSiaFlag(
|
|
39
|
+
db: SiaDb,
|
|
40
|
+
input: SiaFlagInput,
|
|
41
|
+
config: SiaFlagConfig,
|
|
42
|
+
): Promise<SiaFlagResult> {
|
|
43
|
+
if (!config.enableFlagging) {
|
|
44
|
+
return {
|
|
45
|
+
error: "Flagging is disabled. Run 'npx sia enable-flagging' to enable.",
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const reason = sanitizeReason(input.reason);
|
|
50
|
+
|
|
51
|
+
if (reason.length === 0) {
|
|
52
|
+
return { error: "Flag reason is empty after sanitization" };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const id = uuid();
|
|
56
|
+
const createdAt = Date.now();
|
|
57
|
+
|
|
58
|
+
await db.execute(
|
|
59
|
+
`INSERT INTO session_flags (id, session_id, reason, created_at, consumed)
|
|
60
|
+
VALUES (?, ?, ?, ?, 0)`,
|
|
61
|
+
[id, config.sessionId, reason, createdAt],
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
return { flagged: true, id };
|
|
65
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
// Module: sia-index — Index markdown/text content by chunking and scanning for entity references
|
|
2
|
+
|
|
3
|
+
import { randomUUID } from "node:crypto";
|
|
4
|
+
import { z } from "zod";
|
|
5
|
+
import type { Embedder } from "@/capture/embedder";
|
|
6
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
7
|
+
import { insertEdge } from "@/graph/edges";
|
|
8
|
+
import { headingChunker } from "@/sandbox/context-mode";
|
|
9
|
+
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Input / Output types
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
export const SiaIndexInput = z.object({
|
|
15
|
+
content: z.string(),
|
|
16
|
+
source: z.string().optional(),
|
|
17
|
+
tags: z.array(z.string()).optional(),
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
export interface SiaIndexResult {
|
|
21
|
+
indexed: number;
|
|
22
|
+
references: number;
|
|
23
|
+
chunkIds: string[];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// handleSiaIndex
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
export async function handleSiaIndex(
|
|
31
|
+
db: SiaDb,
|
|
32
|
+
input: z.infer<typeof SiaIndexInput>,
|
|
33
|
+
_embedder: Embedder,
|
|
34
|
+
sessionId: string,
|
|
35
|
+
): Promise<SiaIndexResult> {
|
|
36
|
+
const { content, source, tags } = input;
|
|
37
|
+
|
|
38
|
+
// 1. Empty content fast-path
|
|
39
|
+
if (!content || content.trim().length === 0) {
|
|
40
|
+
return { indexed: 0, references: 0, chunkIds: [] };
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const now = Date.now();
|
|
44
|
+
const nowStr = String(now);
|
|
45
|
+
const tagsJson = JSON.stringify(tags ?? []);
|
|
46
|
+
|
|
47
|
+
// 2. Chunk content via headingChunker
|
|
48
|
+
const rawChunks = headingChunker.chunk(content);
|
|
49
|
+
|
|
50
|
+
// 3. Embed each chunk and store as ContentChunk node in graph_nodes
|
|
51
|
+
const chunkIds: string[] = [];
|
|
52
|
+
|
|
53
|
+
for (let i = 0; i < rawChunks.length; i++) {
|
|
54
|
+
const raw = rawChunks[i];
|
|
55
|
+
const nodeId = randomUUID();
|
|
56
|
+
const chunkName = source ? `chunk-${source}-${i}` : `chunk-${sessionId}-${i}`;
|
|
57
|
+
const summary = raw.text.slice(0, 100);
|
|
58
|
+
|
|
59
|
+
await db.execute(
|
|
60
|
+
`INSERT INTO graph_nodes (id, type, name, summary, content, trust_tier, confidence, base_confidence, importance, base_importance, access_count, edge_count, tags, file_paths, t_created, t_valid_from, created_by, created_at, last_accessed)
|
|
61
|
+
VALUES (?, 'ContentChunk', ?, ?, ?, 3, 0.8, 0.8, 0.5, 0.5, 0, 0, ?, '[]', ?, ?, 'sia-index', ?, ?)`,
|
|
62
|
+
[nodeId, chunkName, summary, raw.text, tagsJson, nowStr, nowStr, nowStr, nowStr],
|
|
63
|
+
);
|
|
64
|
+
|
|
65
|
+
chunkIds.push(nodeId);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// 4. Scan each chunk for mentions of known entity names
|
|
69
|
+
// Query graph_nodes for CodeSymbol and FileNode types
|
|
70
|
+
const { rows: knownEntities } = await db.execute(
|
|
71
|
+
`SELECT id, name FROM graph_nodes WHERE type IN ('CodeSymbol', 'FileNode') AND (t_expired IS NULL OR t_expired = '') AND (t_valid_until IS NULL OR t_valid_until = '')`,
|
|
72
|
+
[],
|
|
73
|
+
);
|
|
74
|
+
|
|
75
|
+
let referenceCount = 0;
|
|
76
|
+
|
|
77
|
+
if (knownEntities.length > 0) {
|
|
78
|
+
for (let ci = 0; ci < rawChunks.length; ci++) {
|
|
79
|
+
const chunkText = rawChunks[ci].text;
|
|
80
|
+
const chunkNodeId = chunkIds[ci];
|
|
81
|
+
|
|
82
|
+
for (const row of knownEntities) {
|
|
83
|
+
const entityId = row.id as string;
|
|
84
|
+
const entityName = row.name as string;
|
|
85
|
+
if (entityName && chunkText.includes(entityName)) {
|
|
86
|
+
try {
|
|
87
|
+
await insertEdge(db, {
|
|
88
|
+
from_id: chunkNodeId,
|
|
89
|
+
to_id: entityId,
|
|
90
|
+
type: "references",
|
|
91
|
+
weight: 1.0,
|
|
92
|
+
confidence: 0.7,
|
|
93
|
+
trust_tier: 3,
|
|
94
|
+
});
|
|
95
|
+
referenceCount++;
|
|
96
|
+
} catch (edgeErr) {
|
|
97
|
+
console.error(
|
|
98
|
+
`[sia-index] edge insert failed for ${chunkNodeId}->${entityId}: ${(edgeErr as Error).message}`,
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return {
|
|
107
|
+
indexed: chunkIds.length,
|
|
108
|
+
references: referenceCount,
|
|
109
|
+
chunkIds,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
// Module: sia-note — Developer-authored knowledge entry via MCP
|
|
2
|
+
|
|
3
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
4
|
+
import type { Entity } from "@/graph/entities";
|
|
5
|
+
import { OntologyError } from "@/ontology/errors";
|
|
6
|
+
import {
|
|
7
|
+
createBug,
|
|
8
|
+
createConcept,
|
|
9
|
+
createConvention,
|
|
10
|
+
createDecision,
|
|
11
|
+
createSolution,
|
|
12
|
+
} from "@/ontology/middleware";
|
|
13
|
+
|
|
14
|
+
export interface SiaNoteInput {
|
|
15
|
+
kind: "Decision" | "Convention" | "Bug" | "Solution" | "Concept";
|
|
16
|
+
name: string;
|
|
17
|
+
content: string;
|
|
18
|
+
tags?: string[];
|
|
19
|
+
relates_to?: string[]; // entity IDs → pertains_to/caused_by edges
|
|
20
|
+
supersedes?: string; // entity ID this replaces
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface SiaNoteResult {
|
|
24
|
+
node_id: string;
|
|
25
|
+
kind: string;
|
|
26
|
+
edges_created: number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Create a developer-authored knowledge entry in the graph.
|
|
31
|
+
* Routes to the appropriate ontology middleware function based on kind.
|
|
32
|
+
*
|
|
33
|
+
* For Bug: first relates_to entry becomes the causedBy target.
|
|
34
|
+
* For Convention: all relates_to become pertainsTo targets (at least 1 required).
|
|
35
|
+
* For Decision: relates_to become pertainsTo, supersedes if provided.
|
|
36
|
+
* For Solution: first relates_to entry becomes the solves target, rest become pertainsTo.
|
|
37
|
+
* For Concept: relates_to become pertainsTo.
|
|
38
|
+
*/
|
|
39
|
+
export async function handleSiaNote(db: SiaDb, input: SiaNoteInput): Promise<SiaNoteResult> {
|
|
40
|
+
const relatesTo = input.relates_to ?? [];
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
let entity: Entity;
|
|
44
|
+
let edgesCreated: number;
|
|
45
|
+
|
|
46
|
+
switch (input.kind) {
|
|
47
|
+
case "Bug": {
|
|
48
|
+
if (relatesTo.length === 0) {
|
|
49
|
+
throw new OntologyError(
|
|
50
|
+
"Bug requires at least one relates_to entry as the causedBy target",
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
entity = await createBug(db, {
|
|
54
|
+
name: input.name,
|
|
55
|
+
content: input.content,
|
|
56
|
+
causedBy: relatesTo[0],
|
|
57
|
+
tags: input.tags,
|
|
58
|
+
});
|
|
59
|
+
// Bug creates exactly 1 caused_by edge
|
|
60
|
+
edgesCreated = 1;
|
|
61
|
+
break;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
case "Convention": {
|
|
65
|
+
if (relatesTo.length === 0) {
|
|
66
|
+
throw new OntologyError(
|
|
67
|
+
"Convention requires at least one relates_to entry as a pertainsTo target",
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
entity = await createConvention(db, {
|
|
71
|
+
name: input.name,
|
|
72
|
+
content: input.content,
|
|
73
|
+
pertainsTo: relatesTo,
|
|
74
|
+
tags: input.tags,
|
|
75
|
+
});
|
|
76
|
+
edgesCreated = relatesTo.length;
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
case "Decision": {
|
|
81
|
+
entity = await createDecision(db, {
|
|
82
|
+
name: input.name,
|
|
83
|
+
content: input.content,
|
|
84
|
+
pertainsTo: relatesTo.length > 0 ? relatesTo : undefined,
|
|
85
|
+
supersedes: input.supersedes,
|
|
86
|
+
tags: input.tags,
|
|
87
|
+
});
|
|
88
|
+
edgesCreated = relatesTo.length + (input.supersedes ? 1 : 0);
|
|
89
|
+
break;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
case "Solution": {
|
|
93
|
+
if (relatesTo.length === 0) {
|
|
94
|
+
throw new OntologyError(
|
|
95
|
+
"Solution requires at least one relates_to entry as the solves target",
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
const pertainsTo = relatesTo.length > 1 ? relatesTo.slice(1) : undefined;
|
|
99
|
+
entity = await createSolution(db, {
|
|
100
|
+
name: input.name,
|
|
101
|
+
content: input.content,
|
|
102
|
+
solves: relatesTo[0],
|
|
103
|
+
pertainsTo,
|
|
104
|
+
tags: input.tags,
|
|
105
|
+
});
|
|
106
|
+
// 1 solves edge + any remaining pertains_to edges
|
|
107
|
+
edgesCreated = 1 + (pertainsTo?.length ?? 0);
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
case "Concept": {
|
|
112
|
+
entity = await createConcept(db, {
|
|
113
|
+
name: input.name,
|
|
114
|
+
content: input.content,
|
|
115
|
+
pertainsTo: relatesTo.length > 0 ? relatesTo : undefined,
|
|
116
|
+
tags: input.tags,
|
|
117
|
+
});
|
|
118
|
+
edgesCreated = relatesTo.length;
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
node_id: entity.id,
|
|
125
|
+
kind: input.kind,
|
|
126
|
+
edges_created: edgesCreated,
|
|
127
|
+
};
|
|
128
|
+
} catch (err) {
|
|
129
|
+
if (err instanceof OntologyError) {
|
|
130
|
+
throw new Error(`sia_note failed: ${err.message}`);
|
|
131
|
+
}
|
|
132
|
+
throw err;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
// Module: sia-search — Three-stage hybrid retrieval via BM25 + graph + vector
|
|
2
|
+
//
|
|
3
|
+
// Workspace routing is preserved from Phase 5.
|
|
4
|
+
// Local search delegates to the three-stage pipeline in @/retrieval/search.
|
|
5
|
+
|
|
6
|
+
import type { z } from "zod";
|
|
7
|
+
import type { Embedder } from "@/capture/embedder";
|
|
8
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
9
|
+
import { annotateFreshness } from "@/mcp/freshness-annotator";
|
|
10
|
+
import type { SiaSearchInput } from "@/mcp/server";
|
|
11
|
+
import { hybridSearch } from "@/retrieval/search";
|
|
12
|
+
import { workspaceSearch } from "@/retrieval/workspace-search";
|
|
13
|
+
|
|
14
|
+
/** Shape returned for each entity hit in sia_search results. */
|
|
15
|
+
export interface SiaSearchResult {
|
|
16
|
+
id: string;
|
|
17
|
+
type: string;
|
|
18
|
+
name: string;
|
|
19
|
+
summary: string;
|
|
20
|
+
content: string;
|
|
21
|
+
trust_tier: number;
|
|
22
|
+
confidence: number;
|
|
23
|
+
importance: number;
|
|
24
|
+
tags: string;
|
|
25
|
+
file_paths: string;
|
|
26
|
+
conflict_group_id: string | null;
|
|
27
|
+
t_valid_from: number | null;
|
|
28
|
+
source_repo_name: string | null;
|
|
29
|
+
extraction_method?: string | null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** Dependencies for workspace-scoped search. */
|
|
33
|
+
export interface WorkspaceDeps {
|
|
34
|
+
metaDb: SiaDb;
|
|
35
|
+
bridgeDb: SiaDb;
|
|
36
|
+
workspaceId: string;
|
|
37
|
+
primaryRepoId: string;
|
|
38
|
+
siaHome?: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Maximum number of results sia_search will return regardless of input. */
|
|
42
|
+
const MAX_LIMIT = 15;
|
|
43
|
+
|
|
44
|
+
/** Default number of results when `limit` is not specified. */
|
|
45
|
+
const DEFAULT_LIMIT = 5;
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Execute a simplified search against the entities table.
|
|
49
|
+
*
|
|
50
|
+
* Filters:
|
|
51
|
+
* - Active only: t_valid_until IS NULL AND archived_at IS NULL
|
|
52
|
+
* - paranoid mode: additionally excludes trust_tier = 4
|
|
53
|
+
* - node_types: IN filter on type column
|
|
54
|
+
* - package_path: exact match on package_path column
|
|
55
|
+
*
|
|
56
|
+
* Results are ordered by importance DESC and capped at `limit` (default 5, max 15).
|
|
57
|
+
* When `workspace: true` and workspaceDeps are provided, delegates to workspace search.
|
|
58
|
+
*/
|
|
59
|
+
export async function handleSiaSearch(
|
|
60
|
+
db: SiaDb,
|
|
61
|
+
input: z.infer<typeof SiaSearchInput>,
|
|
62
|
+
_embedder?: Embedder,
|
|
63
|
+
workspaceDeps?: WorkspaceDeps,
|
|
64
|
+
): Promise<SiaSearchResult[]> {
|
|
65
|
+
// Workspace-scoped search
|
|
66
|
+
if (input.workspace && workspaceDeps) {
|
|
67
|
+
const result = await workspaceSearch({
|
|
68
|
+
primaryDb: db,
|
|
69
|
+
metaDb: workspaceDeps.metaDb,
|
|
70
|
+
bridgeDb: workspaceDeps.bridgeDb,
|
|
71
|
+
workspaceId: workspaceDeps.workspaceId,
|
|
72
|
+
primaryRepoId: workspaceDeps.primaryRepoId,
|
|
73
|
+
query: input.query,
|
|
74
|
+
siaHome: workspaceDeps.siaHome,
|
|
75
|
+
limit: input.limit,
|
|
76
|
+
paranoid: input.paranoid,
|
|
77
|
+
node_types: input.node_types,
|
|
78
|
+
package_path: input.package_path,
|
|
79
|
+
});
|
|
80
|
+
return (await annotateFreshness(
|
|
81
|
+
result.entities as unknown as Record<string, unknown>[],
|
|
82
|
+
db,
|
|
83
|
+
)) as unknown as SiaSearchResult[];
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Compute effective limit
|
|
87
|
+
const rawLimit = input.limit ?? DEFAULT_LIMIT;
|
|
88
|
+
const effectiveLimit = Math.min(Math.max(1, rawLimit), MAX_LIMIT);
|
|
89
|
+
|
|
90
|
+
// Local search via three-stage pipeline
|
|
91
|
+
const searchResult = await hybridSearch(db, _embedder ?? null, {
|
|
92
|
+
query: input.query,
|
|
93
|
+
taskType: input.task_type,
|
|
94
|
+
nodeTypes: input.node_types,
|
|
95
|
+
packagePath: input.package_path,
|
|
96
|
+
paranoid: input.paranoid,
|
|
97
|
+
limit: effectiveLimit,
|
|
98
|
+
includeProvenance: input.include_provenance,
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
return (await annotateFreshness(
|
|
102
|
+
searchResult.results as unknown as Record<string, unknown>[],
|
|
103
|
+
db,
|
|
104
|
+
)) as unknown as SiaSearchResult[];
|
|
105
|
+
}
|