@rkarim08/sia 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +35 -0
- package/.claude-plugin/plugin.json +27 -0
- package/.mcp.json +13 -0
- package/CLAUDE.md +226 -0
- package/LICENSE +202 -0
- package/PLUGIN_README.md +253 -0
- package/README.md +1013 -0
- package/agents/sia-changelog-writer.md +89 -0
- package/agents/sia-code-reviewer.md +86 -0
- package/agents/sia-conflict-resolver.md +100 -0
- package/agents/sia-convention-enforcer.md +69 -0
- package/agents/sia-debug.md +106 -0
- package/agents/sia-decision-reviewer.md +101 -0
- package/agents/sia-dependency-tracker.md +80 -0
- package/agents/sia-explain.md +126 -0
- package/agents/sia-feature.md +116 -0
- package/agents/sia-knowledge-capture.md +117 -0
- package/agents/sia-lead-architecture-advisor.md +93 -0
- package/agents/sia-lead-team-health.md +107 -0
- package/agents/sia-migration.md +100 -0
- package/agents/sia-onboarding.md +115 -0
- package/agents/sia-orientation.md +99 -0
- package/agents/sia-pm-briefing.md +106 -0
- package/agents/sia-pm-risk-advisor.md +82 -0
- package/agents/sia-qa-analyst.md +116 -0
- package/agents/sia-qa-regression-map.md +94 -0
- package/agents/sia-refactor.md +115 -0
- package/agents/sia-regression.md +112 -0
- package/agents/sia-security-audit.md +125 -0
- package/agents/sia-test-advisor.md +91 -0
- package/hooks/hooks.json +98 -0
- package/migrations/bridge/001_initial.sql +34 -0
- package/migrations/episodic/001_initial.sql +35 -0
- package/migrations/meta/001_initial.sql +68 -0
- package/migrations/semantic/001_initial.sql +292 -0
- package/migrations/semantic/002_ontology.sql +89 -0
- package/migrations/semantic/003_freshness.sql +63 -0
- package/migrations/semantic/004_v5_unified_schema.sql +194 -0
- package/migrations/semantic/005_backfill_event_kinds.sql +8 -0
- package/migrations/semantic/006_tree_sitter.sql +6 -0
- package/migrations/semantic/007_branch_snapshots.sql +22 -0
- package/package.json +110 -0
- package/scripts/branch-switch.sh +13 -0
- package/scripts/build-wasm-grammars.sh +81 -0
- package/scripts/post-compact.sh +8 -0
- package/scripts/post-tool-use.sh +10 -0
- package/scripts/pre-compact.sh +8 -0
- package/scripts/session-end.sh +8 -0
- package/scripts/session-start.sh +8 -0
- package/scripts/start-mcp.ts +45 -0
- package/scripts/stop-hook.sh +8 -0
- package/scripts/user-prompt-submit.sh +8 -0
- package/scripts/viz-server.ts +152 -0
- package/skills/sia-brainstorm/SKILL.md +156 -0
- package/skills/sia-brainstorm/scripts/frame-template.html +214 -0
- package/skills/sia-brainstorm/scripts/helper.js +95 -0
- package/skills/sia-brainstorm/scripts/server.cjs +338 -0
- package/skills/sia-brainstorm/scripts/start-server.sh +153 -0
- package/skills/sia-brainstorm/scripts/stop-server.sh +55 -0
- package/skills/sia-brainstorm/spec-document-reviewer-prompt.md +49 -0
- package/skills/sia-brainstorm/visual-companion.md +286 -0
- package/skills/sia-capture/SKILL.md +64 -0
- package/skills/sia-compare/SKILL.md +33 -0
- package/skills/sia-conflicts/SKILL.md +38 -0
- package/skills/sia-debug-workflow/SKILL.md +120 -0
- package/skills/sia-debug-workflow/root-cause-tracing.md +70 -0
- package/skills/sia-debug-workflow/scripts/find-polluter.sh +64 -0
- package/skills/sia-debug-workflow/temporal-investigation.md +72 -0
- package/skills/sia-digest/SKILL.md +23 -0
- package/skills/sia-dispatch/SKILL.md +69 -0
- package/skills/sia-dispatch/agent-task-template.md +99 -0
- package/skills/sia-doctor/SKILL.md +39 -0
- package/skills/sia-execute/SKILL.md +70 -0
- package/skills/sia-execute-plan/SKILL.md +85 -0
- package/skills/sia-export-import/SKILL.md +49 -0
- package/skills/sia-export-knowledge/SKILL.md +46 -0
- package/skills/sia-finish/SKILL.md +100 -0
- package/skills/sia-finish/pr-summary-template.md +54 -0
- package/skills/sia-freshness/SKILL.md +38 -0
- package/skills/sia-history/SKILL.md +42 -0
- package/skills/sia-impact/SKILL.md +70 -0
- package/skills/sia-index/SKILL.md +54 -0
- package/skills/sia-install/SKILL.md +39 -0
- package/skills/sia-lead-compliance/SKILL.md +16 -0
- package/skills/sia-lead-drift-report/SKILL.md +16 -0
- package/skills/sia-lead-knowledge-map/SKILL.md +16 -0
- package/skills/sia-learn/SKILL.md +58 -0
- package/skills/sia-plan/SKILL.md +68 -0
- package/skills/sia-plan/plan-reviewer-prompt.md +63 -0
- package/skills/sia-playbooks/SKILL.md +29 -0
- package/skills/sia-playbooks/reference-feature.md +100 -0
- package/skills/sia-playbooks/reference-flagging.md +50 -0
- package/skills/sia-playbooks/reference-orientation.md +92 -0
- package/skills/sia-playbooks/reference-regression.md +115 -0
- package/skills/sia-playbooks/reference-review.md +64 -0
- package/skills/sia-playbooks/reference-tools.md +239 -0
- package/skills/sia-pm-decision-log/SKILL.md +28 -0
- package/skills/sia-pm-risk-dashboard/SKILL.md +24 -0
- package/skills/sia-pm-sprint-summary/SKILL.md +27 -0
- package/skills/sia-prune/SKILL.md +45 -0
- package/skills/sia-qa-coverage/SKILL.md +28 -0
- package/skills/sia-qa-flaky/SKILL.md +20 -0
- package/skills/sia-qa-report/SKILL.md +26 -0
- package/skills/sia-reindex/SKILL.md +30 -0
- package/skills/sia-review-respond/SKILL.md +88 -0
- package/skills/sia-review-respond/pushback-patterns.md +90 -0
- package/skills/sia-search/SKILL.md +47 -0
- package/skills/sia-setup/SKILL.md +82 -0
- package/skills/sia-setup/setup-checklist.md +97 -0
- package/skills/sia-stats/SKILL.md +36 -0
- package/skills/sia-status/SKILL.md +44 -0
- package/skills/sia-sync/SKILL.md +46 -0
- package/skills/sia-team/SKILL.md +64 -0
- package/skills/sia-test/SKILL.md +92 -0
- package/skills/sia-test/testing-anti-patterns.md +104 -0
- package/skills/sia-tour/SKILL.md +29 -0
- package/skills/sia-upgrade/SKILL.md +43 -0
- package/skills/sia-verify/SKILL.md +81 -0
- package/skills/sia-visualize/SKILL.md +28 -0
- package/skills/sia-visualize-live/SKILL.md +55 -0
- package/skills/sia-visualize-live/scripts/graph-template.html +389 -0
- package/skills/sia-visualize-live/scripts/start-visualizer.sh +161 -0
- package/skills/sia-visualize-live/scripts/stop-visualizer.sh +55 -0
- package/skills/sia-visualize-live/scripts/visualizer-server.cjs +264 -0
- package/skills/sia-workspace/SKILL.md +57 -0
- package/src/agent/claude-md-template-flagging.md +219 -0
- package/src/agent/claude-md-template.md +213 -0
- package/src/agent/modules/sia-feature.md +100 -0
- package/src/agent/modules/sia-flagging.md +50 -0
- package/src/agent/modules/sia-orientation.md +92 -0
- package/src/agent/modules/sia-regression.md +115 -0
- package/src/agent/modules/sia-review.md +64 -0
- package/src/agent/modules/sia-tools.md +239 -0
- package/src/ast/extractors/c-include.ts +189 -0
- package/src/ast/extractors/csharp-project.ts +260 -0
- package/src/ast/extractors/prisma-schema.ts +44 -0
- package/src/ast/extractors/project-manifest.ts +111 -0
- package/src/ast/extractors/sql-schema.ts +67 -0
- package/src/ast/extractors/tier-a.ts +423 -0
- package/src/ast/extractors/tier-b.ts +289 -0
- package/src/ast/extractors/tier-dispatch.ts +247 -0
- package/src/ast/index-worker.ts +108 -0
- package/src/ast/indexer.ts +484 -0
- package/src/ast/languages.ts +408 -0
- package/src/ast/pagerank-builder.ts +125 -0
- package/src/ast/path-utils.ts +137 -0
- package/src/ast/tree-sitter/backends/native.ts +57 -0
- package/src/ast/tree-sitter/backends/wasm.ts +39 -0
- package/src/ast/tree-sitter/call-walker.ts +44 -0
- package/src/ast/tree-sitter/edit-computer.ts +55 -0
- package/src/ast/tree-sitter/query-runner.ts +46 -0
- package/src/ast/tree-sitter/service.ts +174 -0
- package/src/ast/tree-sitter/tree-cache.ts +39 -0
- package/src/ast/tree-sitter/types.ts +79 -0
- package/src/ast/watcher.ts +322 -0
- package/src/capture/chunker.ts +169 -0
- package/src/capture/consolidate.ts +127 -0
- package/src/capture/edge-inferrer.ts +161 -0
- package/src/capture/embedder.ts +166 -0
- package/src/capture/embedding-cache.ts +73 -0
- package/src/capture/flag-processor.ts +64 -0
- package/src/capture/hook.ts +67 -0
- package/src/capture/pipeline.ts +450 -0
- package/src/capture/prompts/consolidate.ts +25 -0
- package/src/capture/prompts/edge-infer.ts +29 -0
- package/src/capture/prompts/extract-flagged.ts +36 -0
- package/src/capture/prompts/extract.ts +42 -0
- package/src/capture/tokenizer.ts +147 -0
- package/src/capture/track-a-ast.ts +93 -0
- package/src/capture/track-b-llm.ts +149 -0
- package/src/capture/types.ts +64 -0
- package/src/cli/commands/community.ts +137 -0
- package/src/cli/commands/compare.ts +123 -0
- package/src/cli/commands/conflicts.ts +41 -0
- package/src/cli/commands/digest.ts +197 -0
- package/src/cli/commands/disable-flagging.ts +34 -0
- package/src/cli/commands/doctor.ts +240 -0
- package/src/cli/commands/download-model.ts +161 -0
- package/src/cli/commands/enable-flagging.ts +34 -0
- package/src/cli/commands/export-knowledge.ts +208 -0
- package/src/cli/commands/export.ts +85 -0
- package/src/cli/commands/freshness.ts +164 -0
- package/src/cli/commands/graph.ts +51 -0
- package/src/cli/commands/history.ts +139 -0
- package/src/cli/commands/import.ts +335 -0
- package/src/cli/commands/install.ts +156 -0
- package/src/cli/commands/lead-report.ts +241 -0
- package/src/cli/commands/learn.ts +321 -0
- package/src/cli/commands/pm-report.ts +413 -0
- package/src/cli/commands/prune.ts +75 -0
- package/src/cli/commands/qa-report.ts +278 -0
- package/src/cli/commands/reindex.ts +104 -0
- package/src/cli/commands/rollback.ts +70 -0
- package/src/cli/commands/search.ts +103 -0
- package/src/cli/commands/server.ts +91 -0
- package/src/cli/commands/share.ts +33 -0
- package/src/cli/commands/stats.ts +79 -0
- package/src/cli/commands/status.ts +176 -0
- package/src/cli/commands/sync.ts +96 -0
- package/src/cli/commands/team.ts +118 -0
- package/src/cli/commands/tour.ts +157 -0
- package/src/cli/commands/visualize-live.ts +162 -0
- package/src/cli/commands/workspace.ts +117 -0
- package/src/cli/index.ts +424 -0
- package/src/cli/learn-progress.ts +87 -0
- package/src/community/detection-bridge.ts +344 -0
- package/src/community/leiden.ts +462 -0
- package/src/community/raptor.ts +210 -0
- package/src/community/scheduler.ts +74 -0
- package/src/community/summarize.ts +115 -0
- package/src/decay/archiver.ts +73 -0
- package/src/decay/bridge-orphan-cleanup.ts +212 -0
- package/src/decay/consolidation-sweep.ts +112 -0
- package/src/decay/decay.ts +116 -0
- package/src/decay/deep-validator.ts +62 -0
- package/src/decay/episodic-promoter.ts +132 -0
- package/src/decay/maintenance-scheduler.ts +326 -0
- package/src/decay/scheduler.ts +6 -0
- package/src/decay/session-sweeper.ts +79 -0
- package/src/decay/types.ts +17 -0
- package/src/freshness/confidence-decay.ts +122 -0
- package/src/freshness/cuckoo-filter.ts +176 -0
- package/src/freshness/deep-validation.ts +345 -0
- package/src/freshness/dirty-tracker.ts +237 -0
- package/src/freshness/file-watcher-layer.ts +119 -0
- package/src/freshness/firewall.ts +64 -0
- package/src/freshness/git-reconcile-layer.ts +161 -0
- package/src/freshness/inverted-index.ts +158 -0
- package/src/freshness/stale-read-layer.ts +222 -0
- package/src/graph/audit.ts +69 -0
- package/src/graph/bridge-db.ts +141 -0
- package/src/graph/communities.ts +195 -0
- package/src/graph/db-interface.ts +259 -0
- package/src/graph/edges.ts +163 -0
- package/src/graph/entities.ts +327 -0
- package/src/graph/episodic-db.ts +113 -0
- package/src/graph/flags.ts +31 -0
- package/src/graph/meta-db.ts +200 -0
- package/src/graph/semantic-db.ts +101 -0
- package/src/graph/session-resume.ts +56 -0
- package/src/graph/snapshots.ts +342 -0
- package/src/graph/staging.ts +151 -0
- package/src/graph/types.ts +128 -0
- package/src/hooks/adapters/claude-code.ts +21 -0
- package/src/hooks/adapters/cline.ts +43 -0
- package/src/hooks/adapters/cursor.ts +65 -0
- package/src/hooks/adapters/generic.ts +12 -0
- package/src/hooks/agent-detect.ts +34 -0
- package/src/hooks/claude-md-directives.ts +32 -0
- package/src/hooks/event-router.ts +182 -0
- package/src/hooks/extractors/pattern-detector.ts +111 -0
- package/src/hooks/handlers/post-compact.ts +30 -0
- package/src/hooks/handlers/post-tool-use.ts +403 -0
- package/src/hooks/handlers/pre-compact.ts +100 -0
- package/src/hooks/handlers/session-end.ts +47 -0
- package/src/hooks/handlers/session-start.ts +154 -0
- package/src/hooks/handlers/stop.ts +128 -0
- package/src/hooks/handlers/user-prompt-submit.ts +68 -0
- package/src/hooks/plugin-branch-switch.ts +68 -0
- package/src/hooks/plugin-common.ts +47 -0
- package/src/hooks/plugin-post-compact.ts +28 -0
- package/src/hooks/plugin-post-tool-use.ts +38 -0
- package/src/hooks/plugin-pre-compact.ts +37 -0
- package/src/hooks/plugin-session-end.ts +37 -0
- package/src/hooks/plugin-session-start.ts +75 -0
- package/src/hooks/plugin-stop.ts +61 -0
- package/src/hooks/plugin-user-prompt-submit.ts +47 -0
- package/src/hooks/types.ts +43 -0
- package/src/knowledge/discovery.ts +238 -0
- package/src/knowledge/external-refs.ts +98 -0
- package/src/knowledge/freshness.ts +221 -0
- package/src/knowledge/ingest.ts +330 -0
- package/src/knowledge/markdown-export.ts +229 -0
- package/src/knowledge/markdown-import.ts +359 -0
- package/src/knowledge/patterns.ts +74 -0
- package/src/knowledge/templates.ts +307 -0
- package/src/llm/ai-sdk-adapter.ts +46 -0
- package/src/llm/config.ts +88 -0
- package/src/llm/cost-tracker.ts +110 -0
- package/src/llm/prompts/extraction.ts +55 -0
- package/src/llm/prompts/summarization.ts +36 -0
- package/src/llm/prompts/validation.ts +37 -0
- package/src/llm/provider-registry.ts +68 -0
- package/src/llm/reliability.ts +179 -0
- package/src/llm/schemas.ts +52 -0
- package/src/mcp/freshness-annotator.ts +69 -0
- package/src/mcp/server.ts +949 -0
- package/src/mcp/tools/sia-ast-query.ts +225 -0
- package/src/mcp/tools/sia-at-time.ts +151 -0
- package/src/mcp/tools/sia-backlinks.ts +87 -0
- package/src/mcp/tools/sia-batch-execute.ts +169 -0
- package/src/mcp/tools/sia-by-file.ts +89 -0
- package/src/mcp/tools/sia-community.ts +113 -0
- package/src/mcp/tools/sia-doctor.ts +73 -0
- package/src/mcp/tools/sia-execute-file.ts +122 -0
- package/src/mcp/tools/sia-execute.ts +104 -0
- package/src/mcp/tools/sia-expand.ts +158 -0
- package/src/mcp/tools/sia-fetch-and-index.ts +241 -0
- package/src/mcp/tools/sia-flag.ts +65 -0
- package/src/mcp/tools/sia-index.ts +111 -0
- package/src/mcp/tools/sia-note.ts +134 -0
- package/src/mcp/tools/sia-search.ts +105 -0
- package/src/mcp/tools/sia-stats.ts +63 -0
- package/src/mcp/tools/sia-sync-status.ts +44 -0
- package/src/mcp/tools/sia-upgrade.ts +247 -0
- package/src/mcp/truncate.ts +231 -0
- package/src/native/bridge.ts +167 -0
- package/src/native/fallback-ast-diff.ts +144 -0
- package/src/native/fallback-graph.ts +325 -0
- package/src/ontology/constraints.ts +56 -0
- package/src/ontology/errors.ts +8 -0
- package/src/ontology/middleware.ts +266 -0
- package/src/retrieval/bm25-search.ts +151 -0
- package/src/retrieval/context-assembly.ts +76 -0
- package/src/retrieval/graph-traversal.ts +168 -0
- package/src/retrieval/pagerank.ts +40 -0
- package/src/retrieval/query-classifier.ts +106 -0
- package/src/retrieval/reranker.ts +156 -0
- package/src/retrieval/search.ts +236 -0
- package/src/retrieval/throttle.ts +102 -0
- package/src/retrieval/vector-search.ts +203 -0
- package/src/retrieval/workspace-search.ts +130 -0
- package/src/sandbox/context-mode.ts +285 -0
- package/src/sandbox/credential-pass.ts +55 -0
- package/src/sandbox/executor.ts +235 -0
- package/src/security/pattern-detector.ts +127 -0
- package/src/security/rule-of-two.ts +50 -0
- package/src/security/sanitize.ts +46 -0
- package/src/security/semantic-consistency.ts +93 -0
- package/src/security/staging-promoter.ts +154 -0
- package/src/shared/config.ts +302 -0
- package/src/shared/diagnostics.ts +210 -0
- package/src/shared/errors.ts +48 -0
- package/src/shared/git-utils.ts +143 -0
- package/src/shared/llm-client.ts +120 -0
- package/src/shared/logger.ts +99 -0
- package/src/shared/types.ts +79 -0
- package/src/sync/client.ts +43 -0
- package/src/sync/conflict.ts +106 -0
- package/src/sync/dedup.ts +183 -0
- package/src/sync/hlc.ts +117 -0
- package/src/sync/keychain.ts +144 -0
- package/src/sync/pull.ts +232 -0
- package/src/sync/push.ts +131 -0
- package/src/types/chokidar.d.ts +23 -0
- package/src/visualization/graph-renderer.ts +312 -0
- package/src/visualization/subgraph-extract.ts +208 -0
- package/src/visualization/views/community-clusters.ts +246 -0
- package/src/visualization/views/dependency-map.ts +189 -0
- package/src/visualization/views/graph-explorer.ts +364 -0
- package/src/visualization/views/timeline.ts +247 -0
- package/src/workspace/api-contracts.ts +226 -0
- package/src/workspace/cross-repo.ts +61 -0
- package/src/workspace/detector.ts +190 -0
- package/src/workspace/manifest.ts +141 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
// Module: tokenizer — Basic WordPiece tokenizer for HuggingFace tokenizer.json files
|
|
2
|
+
|
|
3
|
+
import { readFileSync } from "node:fs";
|
|
4
|
+
|
|
5
|
+
/** Special token IDs for BERT-style tokenizers. */
|
|
6
|
+
const CLS_ID = 101;
|
|
7
|
+
const SEP_ID = 102;
|
|
8
|
+
const UNK_ID = 100;
|
|
9
|
+
const PAD_ID = 0;
|
|
10
|
+
|
|
11
|
+
/** Default maximum sequence length. */
|
|
12
|
+
const DEFAULT_MAX_LENGTH = 128;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* A loaded tokenizer: just the vocabulary mapping word → token id.
|
|
16
|
+
*/
|
|
17
|
+
export interface Tokenizer {
|
|
18
|
+
vocab: Map<string, number>;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Tokenized output ready for ONNX inference.
|
|
23
|
+
*/
|
|
24
|
+
export interface TokenizedOutput {
|
|
25
|
+
inputIds: BigInt64Array;
|
|
26
|
+
attentionMask: BigInt64Array;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Load a HuggingFace tokenizer.json and extract its vocabulary.
|
|
31
|
+
*
|
|
32
|
+
* The file format has `model.vocab` as an object mapping token strings to ids.
|
|
33
|
+
*/
|
|
34
|
+
export function loadTokenizer(tokenizerJsonPath: string): Tokenizer {
|
|
35
|
+
const raw = readFileSync(tokenizerJsonPath, "utf-8");
|
|
36
|
+
const json = JSON.parse(raw);
|
|
37
|
+
|
|
38
|
+
const vocabObj: Record<string, number> = json?.model?.vocab ?? {};
|
|
39
|
+
const vocab = new Map<string, number>(Object.entries(vocabObj));
|
|
40
|
+
|
|
41
|
+
return { vocab };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Look up a single token string in the vocabulary, falling back to [UNK].
|
|
46
|
+
*/
|
|
47
|
+
function lookupToken(vocab: Map<string, number>, token: string): number {
|
|
48
|
+
return vocab.get(token) ?? UNK_ID;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Perform basic WordPiece-style tokenization:
|
|
53
|
+
* 1. Lowercase the text
|
|
54
|
+
* 2. Strip punctuation into separate tokens
|
|
55
|
+
* 3. Split on whitespace
|
|
56
|
+
* 4. For each word, try to find it in the vocabulary; if not found, split
|
|
57
|
+
* into sub-word pieces using the ## prefix convention
|
|
58
|
+
* 5. Prepend [CLS], append [SEP], pad to maxLength
|
|
59
|
+
*/
|
|
60
|
+
export function tokenize(
|
|
61
|
+
tokenizer: Tokenizer,
|
|
62
|
+
text: string,
|
|
63
|
+
maxLength: number = DEFAULT_MAX_LENGTH,
|
|
64
|
+
): TokenizedOutput {
|
|
65
|
+
const { vocab } = tokenizer;
|
|
66
|
+
|
|
67
|
+
// Lowercase and insert spaces around punctuation so they become separate tokens
|
|
68
|
+
const normalized = text
|
|
69
|
+
.toLowerCase()
|
|
70
|
+
.replace(/([^\w\s])/g, " $1 ")
|
|
71
|
+
.replace(/\s+/g, " ")
|
|
72
|
+
.trim();
|
|
73
|
+
|
|
74
|
+
const words = normalized.length > 0 ? normalized.split(" ") : [];
|
|
75
|
+
|
|
76
|
+
// Tokenize each word with basic WordPiece
|
|
77
|
+
const tokenIds: number[] = [CLS_ID];
|
|
78
|
+
|
|
79
|
+
for (const word of words) {
|
|
80
|
+
// Budget: leave room for trailing [SEP] + current position
|
|
81
|
+
if (tokenIds.length >= maxLength - 1) break;
|
|
82
|
+
|
|
83
|
+
if (vocab.has(word)) {
|
|
84
|
+
tokenIds.push(lookupToken(vocab, word));
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Attempt sub-word splitting
|
|
89
|
+
const subTokens = wordPieceSplit(vocab, word);
|
|
90
|
+
for (const st of subTokens) {
|
|
91
|
+
if (tokenIds.length >= maxLength - 1) break;
|
|
92
|
+
tokenIds.push(st);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
tokenIds.push(SEP_ID);
|
|
97
|
+
|
|
98
|
+
// Build padded arrays
|
|
99
|
+
const inputIds = new BigInt64Array(maxLength);
|
|
100
|
+
const attentionMask = new BigInt64Array(maxLength);
|
|
101
|
+
|
|
102
|
+
for (let i = 0; i < maxLength; i++) {
|
|
103
|
+
if (i < tokenIds.length) {
|
|
104
|
+
inputIds[i] = BigInt(tokenIds[i]);
|
|
105
|
+
attentionMask[i] = 1n;
|
|
106
|
+
} else {
|
|
107
|
+
inputIds[i] = BigInt(PAD_ID);
|
|
108
|
+
attentionMask[i] = 0n;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return { inputIds, attentionMask };
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Greedy left-to-right WordPiece splitting for a single word.
|
|
117
|
+
* Returns an array of token ids.
|
|
118
|
+
*/
|
|
119
|
+
function wordPieceSplit(vocab: Map<string, number>, word: string): number[] {
|
|
120
|
+
const ids: number[] = [];
|
|
121
|
+
let start = 0;
|
|
122
|
+
|
|
123
|
+
while (start < word.length) {
|
|
124
|
+
let end = word.length;
|
|
125
|
+
let matched = false;
|
|
126
|
+
|
|
127
|
+
while (start < end) {
|
|
128
|
+
const substr = start === 0 ? word.slice(start, end) : `##${word.slice(start, end)}`;
|
|
129
|
+
|
|
130
|
+
if (vocab.has(substr)) {
|
|
131
|
+
ids.push(lookupToken(vocab, substr));
|
|
132
|
+
matched = true;
|
|
133
|
+
start = end;
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
end--;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (!matched) {
|
|
140
|
+
// No sub-word match found — emit [UNK] for this character and advance
|
|
141
|
+
ids.push(UNK_ID);
|
|
142
|
+
start++;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return ids;
|
|
147
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
// Module: track-a-ast — structural extraction of code entities via regex patterns
|
|
2
|
+
|
|
3
|
+
import type { CandidateFact } from "@/capture/types";
|
|
4
|
+
|
|
5
|
+
/** Map of file extension to an array of regex patterns that capture entity names. */
|
|
6
|
+
const PATTERNS: Record<string, RegExp[]> = {
|
|
7
|
+
".ts": [
|
|
8
|
+
/export\s+(?:async\s+)?function\s+(\w+)/gm,
|
|
9
|
+
/export\s+class\s+(\w+)/gm,
|
|
10
|
+
/export\s+const\s+(\w+)/gm,
|
|
11
|
+
],
|
|
12
|
+
".tsx": [
|
|
13
|
+
/export\s+(?:async\s+)?function\s+(\w+)/gm,
|
|
14
|
+
/export\s+class\s+(\w+)/gm,
|
|
15
|
+
/export\s+const\s+(\w+)/gm,
|
|
16
|
+
],
|
|
17
|
+
".js": [
|
|
18
|
+
/export\s+(?:async\s+)?function\s+(\w+)/gm,
|
|
19
|
+
/export\s+class\s+(\w+)/gm,
|
|
20
|
+
/export\s+const\s+(\w+)/gm,
|
|
21
|
+
],
|
|
22
|
+
".jsx": [
|
|
23
|
+
/export\s+(?:async\s+)?function\s+(\w+)/gm,
|
|
24
|
+
/export\s+class\s+(\w+)/gm,
|
|
25
|
+
/export\s+const\s+(\w+)/gm,
|
|
26
|
+
],
|
|
27
|
+
".py": [/^def\s+(\w+)/gm, /^class\s+(\w+)/gm],
|
|
28
|
+
".go": [/^func\s+(\w+)/gm, /^type\s+(\w+)\s+struct/gm],
|
|
29
|
+
".rs": [/^pub\s+fn\s+(\w+)/gm, /^pub\s+struct\s+(\w+)/gm],
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Given the surrounding lines around a match index, return a small context window.
|
|
34
|
+
*/
|
|
35
|
+
function surroundingLines(content: string, matchIndex: number): string {
|
|
36
|
+
const before = content.lastIndexOf("\n", matchIndex);
|
|
37
|
+
const lineStart = before === -1 ? 0 : before + 1;
|
|
38
|
+
let end = matchIndex;
|
|
39
|
+
for (let i = 0; i < 3; i++) {
|
|
40
|
+
const next = content.indexOf("\n", end + 1);
|
|
41
|
+
if (next === -1) {
|
|
42
|
+
end = content.length;
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
end = next;
|
|
46
|
+
}
|
|
47
|
+
return content.slice(lineStart, end).trim();
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Extract structural code entities from file content using regex-based pattern matching.
|
|
52
|
+
*
|
|
53
|
+
* @param content The file text to scan.
|
|
54
|
+
* @param filePath Optional path used to determine language by extension.
|
|
55
|
+
* @returns An array of CandidateFact objects for every matched entity.
|
|
56
|
+
*/
|
|
57
|
+
export function extractTrackA(content: string, filePath?: string): CandidateFact[] {
|
|
58
|
+
if (!filePath) return [];
|
|
59
|
+
|
|
60
|
+
const dotIdx = filePath.lastIndexOf(".");
|
|
61
|
+
if (dotIdx === -1) return [];
|
|
62
|
+
|
|
63
|
+
const ext = filePath.slice(dotIdx);
|
|
64
|
+
const patterns = PATTERNS[ext];
|
|
65
|
+
if (!patterns) return [];
|
|
66
|
+
|
|
67
|
+
const facts: CandidateFact[] = [];
|
|
68
|
+
|
|
69
|
+
for (const pattern of patterns) {
|
|
70
|
+
// Reset lastIndex so the regex starts from the beginning each time
|
|
71
|
+
pattern.lastIndex = 0;
|
|
72
|
+
|
|
73
|
+
let match: RegExpExecArray | null = pattern.exec(content);
|
|
74
|
+
while (match !== null) {
|
|
75
|
+
const name = match[1];
|
|
76
|
+
const context = surroundingLines(content, match.index);
|
|
77
|
+
facts.push({
|
|
78
|
+
type: "CodeEntity",
|
|
79
|
+
name,
|
|
80
|
+
content: context,
|
|
81
|
+
summary: `CodeEntity: ${name}`,
|
|
82
|
+
tags: [],
|
|
83
|
+
file_paths: [filePath],
|
|
84
|
+
trust_tier: 2,
|
|
85
|
+
confidence: 0.92,
|
|
86
|
+
extraction_method: "regex-ast",
|
|
87
|
+
});
|
|
88
|
+
match = pattern.exec(content);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return facts;
|
|
93
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
// Module: track-b-llm — semantic extraction via LLM or pattern matching (Track B)
|
|
2
|
+
|
|
3
|
+
import { extractPrompt } from "@/capture/prompts/extract";
|
|
4
|
+
import type { CandidateFact, EntityType, ProposedRelationship } from "@/capture/types";
|
|
5
|
+
|
|
6
|
+
interface LlmMessageParam {
|
|
7
|
+
role: string;
|
|
8
|
+
content: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
interface LlmCreateParams {
|
|
12
|
+
model: string;
|
|
13
|
+
max_tokens: number;
|
|
14
|
+
system: string;
|
|
15
|
+
messages: LlmMessageParam[];
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
interface LlmResponse {
|
|
19
|
+
content: Array<{ text: string }>;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
interface LlmClient {
|
|
23
|
+
messages: {
|
|
24
|
+
create: (params: LlmCreateParams) => Promise<LlmResponse>;
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
interface TrackBConfig {
|
|
29
|
+
captureModel: string;
|
|
30
|
+
minExtractConfidence: number;
|
|
31
|
+
airGapped: boolean;
|
|
32
|
+
llmClient?: LlmClient;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
interface PatternEntry {
|
|
36
|
+
pattern: RegExp;
|
|
37
|
+
type: EntityType;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
interface RawFact {
|
|
41
|
+
type?: string;
|
|
42
|
+
name?: string;
|
|
43
|
+
content?: string;
|
|
44
|
+
summary?: string;
|
|
45
|
+
tags?: string[];
|
|
46
|
+
file_paths?: string[];
|
|
47
|
+
confidence?: number;
|
|
48
|
+
proposed_relationships?: ProposedRelationship[];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const PATTERNS: PatternEntry[] = [
|
|
52
|
+
{
|
|
53
|
+
pattern: /decided to|chose|we will use|going with|selected|opted for/i,
|
|
54
|
+
type: "Decision",
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
pattern: /bug|error|crash|broken|failing|exception|regression/i,
|
|
58
|
+
type: "Bug",
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
pattern: /always|never|must|convention|rule:|standard:/i,
|
|
62
|
+
type: "Convention",
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
pattern: /fix|solved|workaround|resolution|patch/i,
|
|
66
|
+
type: "Solution",
|
|
67
|
+
},
|
|
68
|
+
];
|
|
69
|
+
|
|
70
|
+
function splitSentences(content: string): string[] {
|
|
71
|
+
return content
|
|
72
|
+
.split(/\. |\n/)
|
|
73
|
+
.map((s) => s.trim())
|
|
74
|
+
.filter((s) => s.length > 0);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function patternMatch(content: string, minConfidence: number): CandidateFact[] {
|
|
78
|
+
const sentences = splitSentences(content);
|
|
79
|
+
const candidates: CandidateFact[] = [];
|
|
80
|
+
|
|
81
|
+
for (const sentence of sentences) {
|
|
82
|
+
for (const { pattern, type } of PATTERNS) {
|
|
83
|
+
if (pattern.test(sentence)) {
|
|
84
|
+
candidates.push({
|
|
85
|
+
type,
|
|
86
|
+
name: sentence.slice(0, 50),
|
|
87
|
+
content: sentence,
|
|
88
|
+
summary: sentence.slice(0, 80),
|
|
89
|
+
tags: [],
|
|
90
|
+
file_paths: [],
|
|
91
|
+
trust_tier: 3,
|
|
92
|
+
confidence: 0.7,
|
|
93
|
+
extraction_method: "pattern-match",
|
|
94
|
+
});
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return candidates.filter((c) => c.confidence >= minConfidence);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export async function extractTrackB(
|
|
104
|
+
content: string,
|
|
105
|
+
config: TrackBConfig,
|
|
106
|
+
): Promise<CandidateFact[]> {
|
|
107
|
+
if (config.airGapped) {
|
|
108
|
+
return [];
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// LLM path: attempt extraction via Anthropic API when client is provided
|
|
112
|
+
if (config.llmClient) {
|
|
113
|
+
try {
|
|
114
|
+
const { system, user } = extractPrompt(content);
|
|
115
|
+
const response = await config.llmClient.messages.create({
|
|
116
|
+
model: config.captureModel,
|
|
117
|
+
max_tokens: 1024,
|
|
118
|
+
system,
|
|
119
|
+
messages: [{ role: "user", content: user }],
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
const rawText: string = response.content[0].text;
|
|
123
|
+
const parsed: { facts?: RawFact[] } = JSON.parse(rawText) as { facts?: RawFact[] };
|
|
124
|
+
const facts: RawFact[] = Array.isArray(parsed.facts) ? parsed.facts : [];
|
|
125
|
+
|
|
126
|
+
const candidates: CandidateFact[] = facts.map((f: RawFact) => ({
|
|
127
|
+
type: (f.type ?? "Concept") as EntityType,
|
|
128
|
+
name: String(f.name ?? "").slice(0, 200),
|
|
129
|
+
content: String(f.content ?? ""),
|
|
130
|
+
summary: String(f.summary ?? ""),
|
|
131
|
+
tags: Array.isArray(f.tags) ? f.tags : [],
|
|
132
|
+
file_paths: Array.isArray(f.file_paths) ? f.file_paths : [],
|
|
133
|
+
trust_tier: 3,
|
|
134
|
+
confidence: typeof f.confidence === "number" ? f.confidence : 0.7,
|
|
135
|
+
extraction_method: "llm-haiku",
|
|
136
|
+
proposed_relationships: Array.isArray(f.proposed_relationships)
|
|
137
|
+
? f.proposed_relationships
|
|
138
|
+
: undefined,
|
|
139
|
+
}));
|
|
140
|
+
|
|
141
|
+
return candidates.filter((c) => c.confidence >= config.minExtractConfidence);
|
|
142
|
+
} catch {
|
|
143
|
+
// Fall through to pattern matching
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Pattern matching fallback
|
|
148
|
+
return patternMatch(content, config.minExtractConfidence);
|
|
149
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
// Module: types — shared types for the capture pipeline
|
|
2
|
+
|
|
3
|
+
/** The kind of knowledge entity extracted from a conversation. */
|
|
4
|
+
export type EntityType =
|
|
5
|
+
| "CodeEntity"
|
|
6
|
+
| "Concept"
|
|
7
|
+
| "Decision"
|
|
8
|
+
| "Bug"
|
|
9
|
+
| "Solution"
|
|
10
|
+
| "Convention"
|
|
11
|
+
| "Dependency";
|
|
12
|
+
|
|
13
|
+
/** A proposed edge to another entity, resolved by name during consolidation. */
|
|
14
|
+
export interface ProposedRelationship {
|
|
15
|
+
target_name: string;
|
|
16
|
+
type: string;
|
|
17
|
+
weight: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/** A candidate fact produced by extraction tracks before consolidation. */
|
|
21
|
+
export interface CandidateFact {
|
|
22
|
+
type: EntityType;
|
|
23
|
+
name: string;
|
|
24
|
+
content: string;
|
|
25
|
+
summary: string;
|
|
26
|
+
tags: string[];
|
|
27
|
+
file_paths: string[];
|
|
28
|
+
trust_tier: 1 | 2 | 3 | 4;
|
|
29
|
+
confidence: number;
|
|
30
|
+
extraction_method?: string;
|
|
31
|
+
proposed_relationships?: ProposedRelationship[];
|
|
32
|
+
t_valid_from?: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/** Payload delivered by Claude-Code hooks into the capture pipeline. */
|
|
36
|
+
export interface HookPayload {
|
|
37
|
+
cwd: string;
|
|
38
|
+
type: "PostToolUse" | "Stop";
|
|
39
|
+
sessionId: string;
|
|
40
|
+
content: string;
|
|
41
|
+
toolName?: string;
|
|
42
|
+
filePath?: string;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Operation the consolidator decides for a candidate. */
|
|
46
|
+
export type ConsolidationOp = "ADD" | "UPDATE" | "INVALIDATE" | "NOOP";
|
|
47
|
+
|
|
48
|
+
/** Aggregate counts returned after a consolidation pass. */
|
|
49
|
+
export interface ConsolidationResult {
|
|
50
|
+
added: number;
|
|
51
|
+
updated: number;
|
|
52
|
+
invalidated: number;
|
|
53
|
+
noops: number;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** End-to-end result of a single pipeline invocation. */
|
|
57
|
+
export interface PipelineResult {
|
|
58
|
+
candidates: number;
|
|
59
|
+
consolidation: ConsolidationResult;
|
|
60
|
+
edgesCreated: number;
|
|
61
|
+
flagsProcessed: number;
|
|
62
|
+
durationMs: number;
|
|
63
|
+
circuitBreakerActive: boolean;
|
|
64
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
// Module: community — CLI for printing community structure
|
|
2
|
+
|
|
3
|
+
import { resolveRepoHash } from "@/capture/hook";
|
|
4
|
+
import { detectCommunities } from "@/community/leiden";
|
|
5
|
+
import { buildSummaryTree } from "@/community/raptor";
|
|
6
|
+
import { summarizeCommunities } from "@/community/summarize";
|
|
7
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
8
|
+
import { openGraphDb } from "@/graph/semantic-db";
|
|
9
|
+
import { getConfig } from "@/shared/config";
|
|
10
|
+
|
|
11
|
+
interface CliCommunity {
|
|
12
|
+
id: string;
|
|
13
|
+
summary: string | null;
|
|
14
|
+
memberCount: number;
|
|
15
|
+
parentId: string | null;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
interface CliEntity {
|
|
19
|
+
name: string;
|
|
20
|
+
importance: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface CommunityCliOptions {
|
|
24
|
+
packagePath?: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
async function countCommunities(db: SiaDb): Promise<number> {
|
|
28
|
+
const result = await db.execute(`SELECT COUNT(*) as count FROM communities`);
|
|
29
|
+
return Number((result.rows[0] as { count: number }).count ?? 0);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function loadCommunities(
|
|
33
|
+
db: SiaDb,
|
|
34
|
+
level: number,
|
|
35
|
+
packagePath?: string,
|
|
36
|
+
): Promise<CliCommunity[]> {
|
|
37
|
+
const params: unknown[] = [level];
|
|
38
|
+
let wherePackage = "";
|
|
39
|
+
if (packagePath) {
|
|
40
|
+
wherePackage = "AND package_path = ?";
|
|
41
|
+
params.push(packagePath);
|
|
42
|
+
}
|
|
43
|
+
const result = await db.execute(
|
|
44
|
+
`SELECT id, summary, member_count as memberCount, parent_id as parentId
|
|
45
|
+
FROM communities
|
|
46
|
+
WHERE level = ?
|
|
47
|
+
${wherePackage}
|
|
48
|
+
ORDER BY memberCount DESC`,
|
|
49
|
+
params,
|
|
50
|
+
);
|
|
51
|
+
return result.rows as unknown as CliCommunity[];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async function topEntities(db: SiaDb, communityId: string): Promise<CliEntity[]> {
|
|
55
|
+
const result = await db.execute(
|
|
56
|
+
`SELECT e.name, e.importance
|
|
57
|
+
FROM community_members cm
|
|
58
|
+
JOIN graph_nodes e ON cm.entity_id = e.id
|
|
59
|
+
WHERE cm.community_id = ?
|
|
60
|
+
ORDER BY e.importance DESC
|
|
61
|
+
LIMIT 5`,
|
|
62
|
+
[communityId],
|
|
63
|
+
);
|
|
64
|
+
return result.rows as unknown as CliEntity[];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export async function formatCommunityTree(
|
|
68
|
+
db: SiaDb,
|
|
69
|
+
opts: CommunityCliOptions = {},
|
|
70
|
+
): Promise<string> {
|
|
71
|
+
const level2 = await loadCommunities(db, 2, opts.packagePath);
|
|
72
|
+
const level1 = await loadCommunities(db, 1, opts.packagePath);
|
|
73
|
+
const level0 = await loadCommunities(db, 0, opts.packagePath);
|
|
74
|
+
|
|
75
|
+
if (level2.length === 0 && level1.length === 0) {
|
|
76
|
+
return "No communities yet. Run detection first.";
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const lines: string[] = [];
|
|
80
|
+
|
|
81
|
+
for (const community of level2) {
|
|
82
|
+
const header = `Community ${community.id.slice(0, 8)} — members: ${community.memberCount}`;
|
|
83
|
+
lines.push(header);
|
|
84
|
+
if (community.summary) {
|
|
85
|
+
lines.push(` ${community.summary}`);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const children = level1.filter((c) => c.parentId === community.id);
|
|
89
|
+
for (const child of children) {
|
|
90
|
+
const childTitle = ` - ${child.summary ?? child.id.slice(0, 8)} (${child.memberCount} members)`;
|
|
91
|
+
lines.push(childTitle);
|
|
92
|
+
const entities = await topEntities(db, child.id);
|
|
93
|
+
if (entities.length === 0) {
|
|
94
|
+
lines.push(" (no entities)");
|
|
95
|
+
} else {
|
|
96
|
+
for (const entity of entities) {
|
|
97
|
+
lines.push(` - ${entity.name} [importance ${entity.importance.toFixed(2)}]`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Level 0 — briefly noted
|
|
102
|
+
const level0Children = level0.filter((c) => c.parentId === child.id);
|
|
103
|
+
if (level0Children.length > 0) {
|
|
104
|
+
lines.push(` (${level0Children.length} fine-grained clusters)`);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return lines.join("\n");
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export async function runCommunityCommand(args: string[]): Promise<void> {
|
|
113
|
+
let packagePath: string | undefined;
|
|
114
|
+
for (let i = 0; i < args.length; i++) {
|
|
115
|
+
if (args[i] === "--package" || args[i] === "-p") {
|
|
116
|
+
packagePath = args[i + 1];
|
|
117
|
+
i++;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const repoHash = resolveRepoHash(process.cwd());
|
|
122
|
+
const db = openGraphDb(repoHash);
|
|
123
|
+
const config = getConfig();
|
|
124
|
+
|
|
125
|
+
try {
|
|
126
|
+
const existing = await countCommunities(db);
|
|
127
|
+
if (existing === 0) {
|
|
128
|
+
await detectCommunities(db);
|
|
129
|
+
await summarizeCommunities(db, { airGapped: config.airGapped });
|
|
130
|
+
await buildSummaryTree(db);
|
|
131
|
+
}
|
|
132
|
+
const output = await formatCommunityTree(db, { packagePath });
|
|
133
|
+
console.log(output);
|
|
134
|
+
} finally {
|
|
135
|
+
await db.close();
|
|
136
|
+
}
|
|
137
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
// Module: compare — Compare knowledge graph state between two time points
|
|
2
|
+
//
|
|
3
|
+
// Usage:
|
|
4
|
+
// sia compare --since 2026-03-01 --until 2026-03-15
|
|
5
|
+
// sia compare --since "last week"
|
|
6
|
+
|
|
7
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
8
|
+
|
|
9
|
+
export interface CompareResult {
|
|
10
|
+
added: Array<{ id: string; type: string; name: string; created_at: number }>;
|
|
11
|
+
invalidated: Array<{ id: string; type: string; name: string; t_valid_until: number }>;
|
|
12
|
+
archived: Array<{ id: string; type: string; name: string; archived_at: number }>;
|
|
13
|
+
summary: { added: number; invalidated: number; archived: number };
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export async function compareGraphState(
|
|
17
|
+
db: SiaDb,
|
|
18
|
+
since: number,
|
|
19
|
+
until: number,
|
|
20
|
+
): Promise<CompareResult> {
|
|
21
|
+
// Entities created in the time range
|
|
22
|
+
const addedResult = await db.execute(
|
|
23
|
+
`SELECT id, type, name, created_at FROM graph_nodes
|
|
24
|
+
WHERE created_at >= ? AND created_at <= ?
|
|
25
|
+
ORDER BY created_at DESC`,
|
|
26
|
+
[since, until],
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
// Entities invalidated in the time range
|
|
30
|
+
const invalidatedResult = await db.execute(
|
|
31
|
+
`SELECT id, type, name, t_valid_until FROM graph_nodes
|
|
32
|
+
WHERE t_valid_until >= ? AND t_valid_until <= ?
|
|
33
|
+
ORDER BY t_valid_until DESC`,
|
|
34
|
+
[since, until],
|
|
35
|
+
);
|
|
36
|
+
|
|
37
|
+
// Entities archived in the time range
|
|
38
|
+
const archivedResult = await db.execute(
|
|
39
|
+
`SELECT id, type, name, archived_at FROM graph_nodes
|
|
40
|
+
WHERE archived_at >= ? AND archived_at <= ?
|
|
41
|
+
ORDER BY archived_at DESC`,
|
|
42
|
+
[since, until],
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
const added = addedResult.rows as CompareResult["added"];
|
|
46
|
+
const invalidated = invalidatedResult.rows as CompareResult["invalidated"];
|
|
47
|
+
const archived = archivedResult.rows as CompareResult["archived"];
|
|
48
|
+
|
|
49
|
+
return {
|
|
50
|
+
added,
|
|
51
|
+
invalidated,
|
|
52
|
+
archived,
|
|
53
|
+
summary: {
|
|
54
|
+
added: added.length,
|
|
55
|
+
invalidated: invalidated.length,
|
|
56
|
+
archived: archived.length,
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export function formatComparison(result: CompareResult, since: number, until: number): string {
|
|
62
|
+
const lines: string[] = [];
|
|
63
|
+
const sinceDate = new Date(since).toISOString().split("T")[0];
|
|
64
|
+
const untilDate = new Date(until).toISOString().split("T")[0];
|
|
65
|
+
|
|
66
|
+
lines.push(`=== SIA Graph Comparison (${sinceDate} to ${untilDate}) ===\n`);
|
|
67
|
+
lines.push(`Added: ${result.summary.added}`);
|
|
68
|
+
lines.push(`Invalidated: ${result.summary.invalidated}`);
|
|
69
|
+
lines.push(`Archived: ${result.summary.archived}`);
|
|
70
|
+
|
|
71
|
+
if (result.added.length > 0) {
|
|
72
|
+
lines.push("\n--- Added ---");
|
|
73
|
+
for (const e of result.added.slice(0, 20)) {
|
|
74
|
+
lines.push(` + [${e.type}] ${e.name}`);
|
|
75
|
+
}
|
|
76
|
+
if (result.added.length > 20) lines.push(` ... and ${result.added.length - 20} more`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (result.invalidated.length > 0) {
|
|
80
|
+
lines.push("\n--- Invalidated (superseded) ---");
|
|
81
|
+
for (const e of result.invalidated.slice(0, 10)) {
|
|
82
|
+
lines.push(` ~ [${e.type}] ${e.name}`);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (result.archived.length > 0) {
|
|
87
|
+
lines.push("\n--- Archived (decayed) ---");
|
|
88
|
+
for (const e of result.archived.slice(0, 10)) {
|
|
89
|
+
lines.push(` - [${e.type}] ${e.name}`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return lines.join("\n");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export async function runCompare(args: string[]): Promise<void> {
|
|
97
|
+
const { resolveRepoHash } = await import("@/capture/hook");
|
|
98
|
+
const { openGraphDb } = await import("@/graph/semantic-db");
|
|
99
|
+
const { resolveSiaHome } = await import("@/shared/config");
|
|
100
|
+
|
|
101
|
+
let since = Date.now() - 7 * 86400000; // default: last 7 days
|
|
102
|
+
let until = Date.now();
|
|
103
|
+
|
|
104
|
+
for (let i = 0; i < args.length; i++) {
|
|
105
|
+
if (args[i] === "--since" && args[i + 1]) {
|
|
106
|
+
since = new Date(args[++i]).getTime();
|
|
107
|
+
} else if (args[i] === "--until" && args[i + 1]) {
|
|
108
|
+
until = new Date(args[++i]).getTime();
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const cwd = process.cwd();
|
|
113
|
+
const repoHash = resolveRepoHash(cwd);
|
|
114
|
+
const siaHome = resolveSiaHome();
|
|
115
|
+
const db = openGraphDb(repoHash, siaHome);
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
const result = await compareGraphState(db, since, until);
|
|
119
|
+
console.log(formatComparison(result, since, until));
|
|
120
|
+
} finally {
|
|
121
|
+
await db.close();
|
|
122
|
+
}
|
|
123
|
+
}
|