@rkarim08/sia 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +35 -0
- package/.claude-plugin/plugin.json +27 -0
- package/.mcp.json +13 -0
- package/CLAUDE.md +226 -0
- package/LICENSE +202 -0
- package/PLUGIN_README.md +253 -0
- package/README.md +1013 -0
- package/agents/sia-changelog-writer.md +89 -0
- package/agents/sia-code-reviewer.md +86 -0
- package/agents/sia-conflict-resolver.md +100 -0
- package/agents/sia-convention-enforcer.md +69 -0
- package/agents/sia-debug.md +106 -0
- package/agents/sia-decision-reviewer.md +101 -0
- package/agents/sia-dependency-tracker.md +80 -0
- package/agents/sia-explain.md +126 -0
- package/agents/sia-feature.md +116 -0
- package/agents/sia-knowledge-capture.md +117 -0
- package/agents/sia-lead-architecture-advisor.md +93 -0
- package/agents/sia-lead-team-health.md +107 -0
- package/agents/sia-migration.md +100 -0
- package/agents/sia-onboarding.md +115 -0
- package/agents/sia-orientation.md +99 -0
- package/agents/sia-pm-briefing.md +106 -0
- package/agents/sia-pm-risk-advisor.md +82 -0
- package/agents/sia-qa-analyst.md +116 -0
- package/agents/sia-qa-regression-map.md +94 -0
- package/agents/sia-refactor.md +115 -0
- package/agents/sia-regression.md +112 -0
- package/agents/sia-security-audit.md +125 -0
- package/agents/sia-test-advisor.md +91 -0
- package/hooks/hooks.json +98 -0
- package/migrations/bridge/001_initial.sql +34 -0
- package/migrations/episodic/001_initial.sql +35 -0
- package/migrations/meta/001_initial.sql +68 -0
- package/migrations/semantic/001_initial.sql +292 -0
- package/migrations/semantic/002_ontology.sql +89 -0
- package/migrations/semantic/003_freshness.sql +63 -0
- package/migrations/semantic/004_v5_unified_schema.sql +194 -0
- package/migrations/semantic/005_backfill_event_kinds.sql +8 -0
- package/migrations/semantic/006_tree_sitter.sql +6 -0
- package/migrations/semantic/007_branch_snapshots.sql +22 -0
- package/package.json +110 -0
- package/scripts/branch-switch.sh +13 -0
- package/scripts/build-wasm-grammars.sh +81 -0
- package/scripts/post-compact.sh +8 -0
- package/scripts/post-tool-use.sh +10 -0
- package/scripts/pre-compact.sh +8 -0
- package/scripts/session-end.sh +8 -0
- package/scripts/session-start.sh +8 -0
- package/scripts/start-mcp.ts +45 -0
- package/scripts/stop-hook.sh +8 -0
- package/scripts/user-prompt-submit.sh +8 -0
- package/scripts/viz-server.ts +152 -0
- package/skills/sia-brainstorm/SKILL.md +156 -0
- package/skills/sia-brainstorm/scripts/frame-template.html +214 -0
- package/skills/sia-brainstorm/scripts/helper.js +95 -0
- package/skills/sia-brainstorm/scripts/server.cjs +338 -0
- package/skills/sia-brainstorm/scripts/start-server.sh +153 -0
- package/skills/sia-brainstorm/scripts/stop-server.sh +55 -0
- package/skills/sia-brainstorm/spec-document-reviewer-prompt.md +49 -0
- package/skills/sia-brainstorm/visual-companion.md +286 -0
- package/skills/sia-capture/SKILL.md +64 -0
- package/skills/sia-compare/SKILL.md +33 -0
- package/skills/sia-conflicts/SKILL.md +38 -0
- package/skills/sia-debug-workflow/SKILL.md +120 -0
- package/skills/sia-debug-workflow/root-cause-tracing.md +70 -0
- package/skills/sia-debug-workflow/scripts/find-polluter.sh +64 -0
- package/skills/sia-debug-workflow/temporal-investigation.md +72 -0
- package/skills/sia-digest/SKILL.md +23 -0
- package/skills/sia-dispatch/SKILL.md +69 -0
- package/skills/sia-dispatch/agent-task-template.md +99 -0
- package/skills/sia-doctor/SKILL.md +39 -0
- package/skills/sia-execute/SKILL.md +70 -0
- package/skills/sia-execute-plan/SKILL.md +85 -0
- package/skills/sia-export-import/SKILL.md +49 -0
- package/skills/sia-export-knowledge/SKILL.md +46 -0
- package/skills/sia-finish/SKILL.md +100 -0
- package/skills/sia-finish/pr-summary-template.md +54 -0
- package/skills/sia-freshness/SKILL.md +38 -0
- package/skills/sia-history/SKILL.md +42 -0
- package/skills/sia-impact/SKILL.md +70 -0
- package/skills/sia-index/SKILL.md +54 -0
- package/skills/sia-install/SKILL.md +39 -0
- package/skills/sia-lead-compliance/SKILL.md +16 -0
- package/skills/sia-lead-drift-report/SKILL.md +16 -0
- package/skills/sia-lead-knowledge-map/SKILL.md +16 -0
- package/skills/sia-learn/SKILL.md +58 -0
- package/skills/sia-plan/SKILL.md +68 -0
- package/skills/sia-plan/plan-reviewer-prompt.md +63 -0
- package/skills/sia-playbooks/SKILL.md +29 -0
- package/skills/sia-playbooks/reference-feature.md +100 -0
- package/skills/sia-playbooks/reference-flagging.md +50 -0
- package/skills/sia-playbooks/reference-orientation.md +92 -0
- package/skills/sia-playbooks/reference-regression.md +115 -0
- package/skills/sia-playbooks/reference-review.md +64 -0
- package/skills/sia-playbooks/reference-tools.md +239 -0
- package/skills/sia-pm-decision-log/SKILL.md +28 -0
- package/skills/sia-pm-risk-dashboard/SKILL.md +24 -0
- package/skills/sia-pm-sprint-summary/SKILL.md +27 -0
- package/skills/sia-prune/SKILL.md +45 -0
- package/skills/sia-qa-coverage/SKILL.md +28 -0
- package/skills/sia-qa-flaky/SKILL.md +20 -0
- package/skills/sia-qa-report/SKILL.md +26 -0
- package/skills/sia-reindex/SKILL.md +30 -0
- package/skills/sia-review-respond/SKILL.md +88 -0
- package/skills/sia-review-respond/pushback-patterns.md +90 -0
- package/skills/sia-search/SKILL.md +47 -0
- package/skills/sia-setup/SKILL.md +82 -0
- package/skills/sia-setup/setup-checklist.md +97 -0
- package/skills/sia-stats/SKILL.md +36 -0
- package/skills/sia-status/SKILL.md +44 -0
- package/skills/sia-sync/SKILL.md +46 -0
- package/skills/sia-team/SKILL.md +64 -0
- package/skills/sia-test/SKILL.md +92 -0
- package/skills/sia-test/testing-anti-patterns.md +104 -0
- package/skills/sia-tour/SKILL.md +29 -0
- package/skills/sia-upgrade/SKILL.md +43 -0
- package/skills/sia-verify/SKILL.md +81 -0
- package/skills/sia-visualize/SKILL.md +28 -0
- package/skills/sia-visualize-live/SKILL.md +55 -0
- package/skills/sia-visualize-live/scripts/graph-template.html +389 -0
- package/skills/sia-visualize-live/scripts/start-visualizer.sh +161 -0
- package/skills/sia-visualize-live/scripts/stop-visualizer.sh +55 -0
- package/skills/sia-visualize-live/scripts/visualizer-server.cjs +264 -0
- package/skills/sia-workspace/SKILL.md +57 -0
- package/src/agent/claude-md-template-flagging.md +219 -0
- package/src/agent/claude-md-template.md +213 -0
- package/src/agent/modules/sia-feature.md +100 -0
- package/src/agent/modules/sia-flagging.md +50 -0
- package/src/agent/modules/sia-orientation.md +92 -0
- package/src/agent/modules/sia-regression.md +115 -0
- package/src/agent/modules/sia-review.md +64 -0
- package/src/agent/modules/sia-tools.md +239 -0
- package/src/ast/extractors/c-include.ts +189 -0
- package/src/ast/extractors/csharp-project.ts +260 -0
- package/src/ast/extractors/prisma-schema.ts +44 -0
- package/src/ast/extractors/project-manifest.ts +111 -0
- package/src/ast/extractors/sql-schema.ts +67 -0
- package/src/ast/extractors/tier-a.ts +423 -0
- package/src/ast/extractors/tier-b.ts +289 -0
- package/src/ast/extractors/tier-dispatch.ts +247 -0
- package/src/ast/index-worker.ts +108 -0
- package/src/ast/indexer.ts +484 -0
- package/src/ast/languages.ts +408 -0
- package/src/ast/pagerank-builder.ts +125 -0
- package/src/ast/path-utils.ts +137 -0
- package/src/ast/tree-sitter/backends/native.ts +57 -0
- package/src/ast/tree-sitter/backends/wasm.ts +39 -0
- package/src/ast/tree-sitter/call-walker.ts +44 -0
- package/src/ast/tree-sitter/edit-computer.ts +55 -0
- package/src/ast/tree-sitter/query-runner.ts +46 -0
- package/src/ast/tree-sitter/service.ts +174 -0
- package/src/ast/tree-sitter/tree-cache.ts +39 -0
- package/src/ast/tree-sitter/types.ts +79 -0
- package/src/ast/watcher.ts +322 -0
- package/src/capture/chunker.ts +169 -0
- package/src/capture/consolidate.ts +127 -0
- package/src/capture/edge-inferrer.ts +161 -0
- package/src/capture/embedder.ts +166 -0
- package/src/capture/embedding-cache.ts +73 -0
- package/src/capture/flag-processor.ts +64 -0
- package/src/capture/hook.ts +67 -0
- package/src/capture/pipeline.ts +450 -0
- package/src/capture/prompts/consolidate.ts +25 -0
- package/src/capture/prompts/edge-infer.ts +29 -0
- package/src/capture/prompts/extract-flagged.ts +36 -0
- package/src/capture/prompts/extract.ts +42 -0
- package/src/capture/tokenizer.ts +147 -0
- package/src/capture/track-a-ast.ts +93 -0
- package/src/capture/track-b-llm.ts +149 -0
- package/src/capture/types.ts +64 -0
- package/src/cli/commands/community.ts +137 -0
- package/src/cli/commands/compare.ts +123 -0
- package/src/cli/commands/conflicts.ts +41 -0
- package/src/cli/commands/digest.ts +197 -0
- package/src/cli/commands/disable-flagging.ts +34 -0
- package/src/cli/commands/doctor.ts +240 -0
- package/src/cli/commands/download-model.ts +161 -0
- package/src/cli/commands/enable-flagging.ts +34 -0
- package/src/cli/commands/export-knowledge.ts +208 -0
- package/src/cli/commands/export.ts +85 -0
- package/src/cli/commands/freshness.ts +164 -0
- package/src/cli/commands/graph.ts +51 -0
- package/src/cli/commands/history.ts +139 -0
- package/src/cli/commands/import.ts +335 -0
- package/src/cli/commands/install.ts +156 -0
- package/src/cli/commands/lead-report.ts +241 -0
- package/src/cli/commands/learn.ts +321 -0
- package/src/cli/commands/pm-report.ts +413 -0
- package/src/cli/commands/prune.ts +75 -0
- package/src/cli/commands/qa-report.ts +278 -0
- package/src/cli/commands/reindex.ts +104 -0
- package/src/cli/commands/rollback.ts +70 -0
- package/src/cli/commands/search.ts +103 -0
- package/src/cli/commands/server.ts +91 -0
- package/src/cli/commands/share.ts +33 -0
- package/src/cli/commands/stats.ts +79 -0
- package/src/cli/commands/status.ts +176 -0
- package/src/cli/commands/sync.ts +96 -0
- package/src/cli/commands/team.ts +118 -0
- package/src/cli/commands/tour.ts +157 -0
- package/src/cli/commands/visualize-live.ts +162 -0
- package/src/cli/commands/workspace.ts +117 -0
- package/src/cli/index.ts +424 -0
- package/src/cli/learn-progress.ts +87 -0
- package/src/community/detection-bridge.ts +344 -0
- package/src/community/leiden.ts +462 -0
- package/src/community/raptor.ts +210 -0
- package/src/community/scheduler.ts +74 -0
- package/src/community/summarize.ts +115 -0
- package/src/decay/archiver.ts +73 -0
- package/src/decay/bridge-orphan-cleanup.ts +212 -0
- package/src/decay/consolidation-sweep.ts +112 -0
- package/src/decay/decay.ts +116 -0
- package/src/decay/deep-validator.ts +62 -0
- package/src/decay/episodic-promoter.ts +132 -0
- package/src/decay/maintenance-scheduler.ts +326 -0
- package/src/decay/scheduler.ts +6 -0
- package/src/decay/session-sweeper.ts +79 -0
- package/src/decay/types.ts +17 -0
- package/src/freshness/confidence-decay.ts +122 -0
- package/src/freshness/cuckoo-filter.ts +176 -0
- package/src/freshness/deep-validation.ts +345 -0
- package/src/freshness/dirty-tracker.ts +237 -0
- package/src/freshness/file-watcher-layer.ts +119 -0
- package/src/freshness/firewall.ts +64 -0
- package/src/freshness/git-reconcile-layer.ts +161 -0
- package/src/freshness/inverted-index.ts +158 -0
- package/src/freshness/stale-read-layer.ts +222 -0
- package/src/graph/audit.ts +69 -0
- package/src/graph/bridge-db.ts +141 -0
- package/src/graph/communities.ts +195 -0
- package/src/graph/db-interface.ts +259 -0
- package/src/graph/edges.ts +163 -0
- package/src/graph/entities.ts +327 -0
- package/src/graph/episodic-db.ts +113 -0
- package/src/graph/flags.ts +31 -0
- package/src/graph/meta-db.ts +200 -0
- package/src/graph/semantic-db.ts +101 -0
- package/src/graph/session-resume.ts +56 -0
- package/src/graph/snapshots.ts +342 -0
- package/src/graph/staging.ts +151 -0
- package/src/graph/types.ts +128 -0
- package/src/hooks/adapters/claude-code.ts +21 -0
- package/src/hooks/adapters/cline.ts +43 -0
- package/src/hooks/adapters/cursor.ts +65 -0
- package/src/hooks/adapters/generic.ts +12 -0
- package/src/hooks/agent-detect.ts +34 -0
- package/src/hooks/claude-md-directives.ts +32 -0
- package/src/hooks/event-router.ts +182 -0
- package/src/hooks/extractors/pattern-detector.ts +111 -0
- package/src/hooks/handlers/post-compact.ts +30 -0
- package/src/hooks/handlers/post-tool-use.ts +403 -0
- package/src/hooks/handlers/pre-compact.ts +100 -0
- package/src/hooks/handlers/session-end.ts +47 -0
- package/src/hooks/handlers/session-start.ts +154 -0
- package/src/hooks/handlers/stop.ts +128 -0
- package/src/hooks/handlers/user-prompt-submit.ts +68 -0
- package/src/hooks/plugin-branch-switch.ts +68 -0
- package/src/hooks/plugin-common.ts +47 -0
- package/src/hooks/plugin-post-compact.ts +28 -0
- package/src/hooks/plugin-post-tool-use.ts +38 -0
- package/src/hooks/plugin-pre-compact.ts +37 -0
- package/src/hooks/plugin-session-end.ts +37 -0
- package/src/hooks/plugin-session-start.ts +75 -0
- package/src/hooks/plugin-stop.ts +61 -0
- package/src/hooks/plugin-user-prompt-submit.ts +47 -0
- package/src/hooks/types.ts +43 -0
- package/src/knowledge/discovery.ts +238 -0
- package/src/knowledge/external-refs.ts +98 -0
- package/src/knowledge/freshness.ts +221 -0
- package/src/knowledge/ingest.ts +330 -0
- package/src/knowledge/markdown-export.ts +229 -0
- package/src/knowledge/markdown-import.ts +359 -0
- package/src/knowledge/patterns.ts +74 -0
- package/src/knowledge/templates.ts +307 -0
- package/src/llm/ai-sdk-adapter.ts +46 -0
- package/src/llm/config.ts +88 -0
- package/src/llm/cost-tracker.ts +110 -0
- package/src/llm/prompts/extraction.ts +55 -0
- package/src/llm/prompts/summarization.ts +36 -0
- package/src/llm/prompts/validation.ts +37 -0
- package/src/llm/provider-registry.ts +68 -0
- package/src/llm/reliability.ts +179 -0
- package/src/llm/schemas.ts +52 -0
- package/src/mcp/freshness-annotator.ts +69 -0
- package/src/mcp/server.ts +949 -0
- package/src/mcp/tools/sia-ast-query.ts +225 -0
- package/src/mcp/tools/sia-at-time.ts +151 -0
- package/src/mcp/tools/sia-backlinks.ts +87 -0
- package/src/mcp/tools/sia-batch-execute.ts +169 -0
- package/src/mcp/tools/sia-by-file.ts +89 -0
- package/src/mcp/tools/sia-community.ts +113 -0
- package/src/mcp/tools/sia-doctor.ts +73 -0
- package/src/mcp/tools/sia-execute-file.ts +122 -0
- package/src/mcp/tools/sia-execute.ts +104 -0
- package/src/mcp/tools/sia-expand.ts +158 -0
- package/src/mcp/tools/sia-fetch-and-index.ts +241 -0
- package/src/mcp/tools/sia-flag.ts +65 -0
- package/src/mcp/tools/sia-index.ts +111 -0
- package/src/mcp/tools/sia-note.ts +134 -0
- package/src/mcp/tools/sia-search.ts +105 -0
- package/src/mcp/tools/sia-stats.ts +63 -0
- package/src/mcp/tools/sia-sync-status.ts +44 -0
- package/src/mcp/tools/sia-upgrade.ts +247 -0
- package/src/mcp/truncate.ts +231 -0
- package/src/native/bridge.ts +167 -0
- package/src/native/fallback-ast-diff.ts +144 -0
- package/src/native/fallback-graph.ts +325 -0
- package/src/ontology/constraints.ts +56 -0
- package/src/ontology/errors.ts +8 -0
- package/src/ontology/middleware.ts +266 -0
- package/src/retrieval/bm25-search.ts +151 -0
- package/src/retrieval/context-assembly.ts +76 -0
- package/src/retrieval/graph-traversal.ts +168 -0
- package/src/retrieval/pagerank.ts +40 -0
- package/src/retrieval/query-classifier.ts +106 -0
- package/src/retrieval/reranker.ts +156 -0
- package/src/retrieval/search.ts +236 -0
- package/src/retrieval/throttle.ts +102 -0
- package/src/retrieval/vector-search.ts +203 -0
- package/src/retrieval/workspace-search.ts +130 -0
- package/src/sandbox/context-mode.ts +285 -0
- package/src/sandbox/credential-pass.ts +55 -0
- package/src/sandbox/executor.ts +235 -0
- package/src/security/pattern-detector.ts +127 -0
- package/src/security/rule-of-two.ts +50 -0
- package/src/security/sanitize.ts +46 -0
- package/src/security/semantic-consistency.ts +93 -0
- package/src/security/staging-promoter.ts +154 -0
- package/src/shared/config.ts +302 -0
- package/src/shared/diagnostics.ts +210 -0
- package/src/shared/errors.ts +48 -0
- package/src/shared/git-utils.ts +143 -0
- package/src/shared/llm-client.ts +120 -0
- package/src/shared/logger.ts +99 -0
- package/src/shared/types.ts +79 -0
- package/src/sync/client.ts +43 -0
- package/src/sync/conflict.ts +106 -0
- package/src/sync/dedup.ts +183 -0
- package/src/sync/hlc.ts +117 -0
- package/src/sync/keychain.ts +144 -0
- package/src/sync/pull.ts +232 -0
- package/src/sync/push.ts +131 -0
- package/src/types/chokidar.d.ts +23 -0
- package/src/visualization/graph-renderer.ts +312 -0
- package/src/visualization/subgraph-extract.ts +208 -0
- package/src/visualization/views/community-clusters.ts +246 -0
- package/src/visualization/views/dependency-map.ts +189 -0
- package/src/visualization/views/graph-explorer.ts +364 -0
- package/src/visualization/views/timeline.ts +247 -0
- package/src/workspace/api-contracts.ts +226 -0
- package/src/workspace/cross-repo.ts +61 -0
- package/src/workspace/detector.ts +190 -0
- package/src/workspace/manifest.ts +141 -0
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
// Module: ingest — Heading-based markdown chunking and graph ingestion
|
|
2
|
+
|
|
3
|
+
import { readFileSync } from "node:fs";
|
|
4
|
+
import { basename } from "node:path";
|
|
5
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
6
|
+
import { insertEdge } from "@/graph/edges";
|
|
7
|
+
import { insertEntity } from "@/graph/entities";
|
|
8
|
+
|
|
9
|
+
/** A chunk extracted from a markdown document. */
|
|
10
|
+
export interface DocChunk {
|
|
11
|
+
heading: string;
|
|
12
|
+
headingLevel: number;
|
|
13
|
+
headingPath: string[];
|
|
14
|
+
content: string;
|
|
15
|
+
codeBlocks: CodeBlock[];
|
|
16
|
+
internalLinks: InternalLink[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface CodeBlock {
|
|
20
|
+
language: string;
|
|
21
|
+
code: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface InternalLink {
|
|
25
|
+
text: string;
|
|
26
|
+
target: string;
|
|
27
|
+
isAnchor: boolean;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface IngestResult {
|
|
31
|
+
fileNodeId: string;
|
|
32
|
+
chunksCreated: number;
|
|
33
|
+
edgesCreated: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ---------------------------------------------------------------
|
|
37
|
+
// Markdown parsing
|
|
38
|
+
// ---------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Parse markdown content into heading-based chunks.
|
|
42
|
+
* Splits at heading boundaries (#, ##, ###), preserving heading hierarchy.
|
|
43
|
+
* Code blocks and lists are kept intact within their heading-scoped chunks.
|
|
44
|
+
*/
|
|
45
|
+
export function parseMarkdown(content: string): DocChunk[] {
|
|
46
|
+
const lines = content.split("\n");
|
|
47
|
+
const chunks: DocChunk[] = [];
|
|
48
|
+
|
|
49
|
+
// State for the current chunk being built
|
|
50
|
+
let currentHeading = "";
|
|
51
|
+
let currentLevel = 0;
|
|
52
|
+
let headingPath: string[] = [];
|
|
53
|
+
let contentLines: string[] = [];
|
|
54
|
+
let inCodeFence = false;
|
|
55
|
+
|
|
56
|
+
function flushChunk(): void {
|
|
57
|
+
const body = contentLines.join("\n");
|
|
58
|
+
// Only emit a chunk when there is a heading or non-empty content
|
|
59
|
+
if (currentHeading !== "" || body.trim().length > 0) {
|
|
60
|
+
chunks.push({
|
|
61
|
+
heading: currentHeading,
|
|
62
|
+
headingLevel: currentLevel,
|
|
63
|
+
headingPath: [...headingPath],
|
|
64
|
+
content: body.trimEnd(),
|
|
65
|
+
codeBlocks: extractCodeBlocks(body),
|
|
66
|
+
internalLinks: extractInternalLinks(body),
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
contentLines = [];
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
for (const line of lines) {
|
|
73
|
+
// Track code fences — content inside fences is never treated as headings
|
|
74
|
+
if (line.trimStart().startsWith("```")) {
|
|
75
|
+
inCodeFence = !inCodeFence;
|
|
76
|
+
contentLines.push(line);
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (inCodeFence) {
|
|
81
|
+
contentLines.push(line);
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Detect ATX-style headings: # H1, ## H2, ### H3 etc.
|
|
86
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
87
|
+
if (headingMatch) {
|
|
88
|
+
// Save the previous chunk before starting a new one
|
|
89
|
+
flushChunk();
|
|
90
|
+
|
|
91
|
+
const level = headingMatch[1].length;
|
|
92
|
+
const heading = headingMatch[2].trim();
|
|
93
|
+
|
|
94
|
+
// Update heading path: keep entries up to the parent level, then add current
|
|
95
|
+
headingPath = headingPath.filter((_, i) => i < level - 1);
|
|
96
|
+
// Ensure path length matches: fill with empty if there are gaps
|
|
97
|
+
while (headingPath.length < level - 1) {
|
|
98
|
+
headingPath.push("");
|
|
99
|
+
}
|
|
100
|
+
headingPath[level - 1] = heading;
|
|
101
|
+
headingPath = headingPath.slice(0, level);
|
|
102
|
+
|
|
103
|
+
currentHeading = heading;
|
|
104
|
+
currentLevel = level;
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
contentLines.push(line);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Flush the last chunk
|
|
112
|
+
flushChunk();
|
|
113
|
+
|
|
114
|
+
return chunks;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Extract fenced code blocks from markdown content.
|
|
119
|
+
* Recognises ```language ... ``` patterns.
|
|
120
|
+
*/
|
|
121
|
+
function extractCodeBlocks(content: string): CodeBlock[] {
|
|
122
|
+
const blocks: CodeBlock[] = [];
|
|
123
|
+
const lines = content.split("\n");
|
|
124
|
+
let inBlock = false;
|
|
125
|
+
let language = "";
|
|
126
|
+
let codeLines: string[] = [];
|
|
127
|
+
|
|
128
|
+
for (const line of lines) {
|
|
129
|
+
if (!inBlock && line.trimStart().startsWith("```")) {
|
|
130
|
+
inBlock = true;
|
|
131
|
+
language = line.trimStart().slice(3).trim();
|
|
132
|
+
codeLines = [];
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
if (inBlock && line.trimStart().startsWith("```")) {
|
|
136
|
+
blocks.push({ language, code: codeLines.join("\n") });
|
|
137
|
+
inBlock = false;
|
|
138
|
+
language = "";
|
|
139
|
+
codeLines = [];
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
if (inBlock) {
|
|
143
|
+
codeLines.push(line);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return blocks;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Extract internal links (markdown link syntax) from content.
|
|
152
|
+
* Internal links are those whose target is a relative path or an anchor (#).
|
|
153
|
+
* Absolute URLs (http://, https://) are excluded.
|
|
154
|
+
*/
|
|
155
|
+
function extractInternalLinks(content: string): InternalLink[] {
|
|
156
|
+
const links: InternalLink[] = [];
|
|
157
|
+
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
|
|
158
|
+
let match = linkRegex.exec(content);
|
|
159
|
+
|
|
160
|
+
while (match !== null) {
|
|
161
|
+
const text = match[1];
|
|
162
|
+
const target = match[2];
|
|
163
|
+
|
|
164
|
+
// Skip absolute URLs
|
|
165
|
+
if (!/^https?:\/\//.test(target)) {
|
|
166
|
+
links.push({
|
|
167
|
+
text,
|
|
168
|
+
target,
|
|
169
|
+
isAnchor: target.startsWith("#"),
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
match = linkRegex.exec(content);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return links;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// ---------------------------------------------------------------
|
|
180
|
+
// Frontmatter parsing
|
|
181
|
+
// ---------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Parse YAML frontmatter from markdown content.
|
|
185
|
+
* Returns the frontmatter as key-value pairs and the remaining content.
|
|
186
|
+
*/
|
|
187
|
+
export function parseFrontmatter(content: string): {
|
|
188
|
+
frontmatter: Record<string, string>;
|
|
189
|
+
body: string;
|
|
190
|
+
} {
|
|
191
|
+
const frontmatter: Record<string, string> = {};
|
|
192
|
+
|
|
193
|
+
if (!content.startsWith("---\n") && !content.startsWith("---\r\n")) {
|
|
194
|
+
return { frontmatter, body: content };
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Find the closing delimiter
|
|
198
|
+
const endIdx = content.indexOf("\n---", 4);
|
|
199
|
+
if (endIdx === -1) {
|
|
200
|
+
return { frontmatter, body: content };
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const yamlBlock = content.slice(4, endIdx);
|
|
204
|
+
// Skip past the closing ---\n
|
|
205
|
+
const bodyStart = content.indexOf("\n", endIdx + 1);
|
|
206
|
+
const body = bodyStart === -1 ? "" : content.slice(bodyStart + 1);
|
|
207
|
+
|
|
208
|
+
// Simple key: value parser (no nested YAML)
|
|
209
|
+
for (const line of yamlBlock.split("\n")) {
|
|
210
|
+
const trimmed = line.trim();
|
|
211
|
+
if (trimmed === "" || trimmed.startsWith("#")) continue;
|
|
212
|
+
|
|
213
|
+
const colonIdx = trimmed.indexOf(":");
|
|
214
|
+
if (colonIdx === -1) continue;
|
|
215
|
+
|
|
216
|
+
const key = trimmed.slice(0, colonIdx).trim();
|
|
217
|
+
const value = trimmed.slice(colonIdx + 1).trim();
|
|
218
|
+
if (key) {
|
|
219
|
+
frontmatter[key] = value;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
return { frontmatter, body };
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// ---------------------------------------------------------------
|
|
227
|
+
// Graph ingestion
|
|
228
|
+
// ---------------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Ingest a documentation file into the knowledge graph.
|
|
232
|
+
*
|
|
233
|
+
* 1. Create or reuse a FileNode entity for the file
|
|
234
|
+
* 2. Parse the markdown into chunks
|
|
235
|
+
* 3. Create ContentChunk entities for each chunk
|
|
236
|
+
* 4. Create child_of edges from chunks to the FileNode
|
|
237
|
+
* 5. Resolve internal links to references edges
|
|
238
|
+
*/
|
|
239
|
+
export async function ingestDocument(
|
|
240
|
+
db: SiaDb,
|
|
241
|
+
filePath: string,
|
|
242
|
+
relativePath: string,
|
|
243
|
+
opts?: {
|
|
244
|
+
tag?: string;
|
|
245
|
+
trustTier?: 1 | 2;
|
|
246
|
+
packagePath?: string | null;
|
|
247
|
+
},
|
|
248
|
+
): Promise<IngestResult> {
|
|
249
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
250
|
+
const { frontmatter, body } = parseFrontmatter(raw);
|
|
251
|
+
const chunks = parseMarkdown(body);
|
|
252
|
+
|
|
253
|
+
const tag = opts?.tag ?? "project-docs";
|
|
254
|
+
const trustTier = opts?.trustTier ?? 1;
|
|
255
|
+
const packagePath = opts?.packagePath ?? null;
|
|
256
|
+
const fileName = basename(relativePath);
|
|
257
|
+
|
|
258
|
+
let edgesCreated = 0;
|
|
259
|
+
|
|
260
|
+
// ---- Step 1: Find or create FileNode ----
|
|
261
|
+
let fileNodeId: string;
|
|
262
|
+
|
|
263
|
+
const existing = await db.execute(
|
|
264
|
+
"SELECT id FROM graph_nodes WHERE type = 'FileNode' AND file_paths LIKE ? AND t_valid_until IS NULL AND archived_at IS NULL",
|
|
265
|
+
[`%"${relativePath}"%`],
|
|
266
|
+
);
|
|
267
|
+
|
|
268
|
+
if (existing.rows.length > 0) {
|
|
269
|
+
fileNodeId = existing.rows[0].id as string;
|
|
270
|
+
} else {
|
|
271
|
+
const summary = frontmatter.description ?? frontmatter.title ?? raw.slice(0, 200).trim();
|
|
272
|
+
|
|
273
|
+
const fileNode = await insertEntity(db, {
|
|
274
|
+
type: "FileNode",
|
|
275
|
+
name: fileName,
|
|
276
|
+
content: summary,
|
|
277
|
+
summary: `Documentation file: ${relativePath}`,
|
|
278
|
+
package_path: packagePath,
|
|
279
|
+
tags: JSON.stringify([tag]),
|
|
280
|
+
file_paths: JSON.stringify([relativePath]),
|
|
281
|
+
trust_tier: trustTier,
|
|
282
|
+
confidence: 1.0,
|
|
283
|
+
extraction_method: "document-ingest",
|
|
284
|
+
});
|
|
285
|
+
fileNodeId = fileNode.id;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// ---- Step 2-4: Create ContentChunk entities and child_of edges ----
|
|
289
|
+
let chunksCreated = 0;
|
|
290
|
+
|
|
291
|
+
for (const chunk of chunks) {
|
|
292
|
+
const chunkName = chunk.heading !== "" ? chunk.heading : `${fileName} - Introduction`;
|
|
293
|
+
|
|
294
|
+
const contentPreview = chunk.content.slice(0, 150).trim();
|
|
295
|
+
|
|
296
|
+
const chunkTags: string[] = [tag];
|
|
297
|
+
if (chunk.headingLevel > 0) {
|
|
298
|
+
chunkTags.push(`h${chunk.headingLevel}`);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const chunkEntity = await insertEntity(db, {
|
|
302
|
+
type: "ContentChunk",
|
|
303
|
+
name: chunkName,
|
|
304
|
+
content: chunk.content,
|
|
305
|
+
summary: contentPreview,
|
|
306
|
+
package_path: packagePath,
|
|
307
|
+
tags: JSON.stringify(chunkTags),
|
|
308
|
+
file_paths: JSON.stringify([relativePath]),
|
|
309
|
+
trust_tier: trustTier,
|
|
310
|
+
confidence: 1.0,
|
|
311
|
+
extraction_method: "document-ingest",
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
// child_of edge: chunk -> FileNode
|
|
315
|
+
await insertEdge(db, {
|
|
316
|
+
from_id: chunkEntity.id,
|
|
317
|
+
to_id: fileNodeId,
|
|
318
|
+
type: "child_of",
|
|
319
|
+
extraction_method: "document-ingest",
|
|
320
|
+
});
|
|
321
|
+
edgesCreated++;
|
|
322
|
+
chunksCreated++;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
return {
|
|
326
|
+
fileNodeId,
|
|
327
|
+
chunksCreated,
|
|
328
|
+
edgesCreated,
|
|
329
|
+
};
|
|
330
|
+
}
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
// Module: markdown-export — Export knowledge graph as markdown vault
|
|
2
|
+
|
|
3
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import type { SiaDb } from "@/graph/db-interface";
|
|
6
|
+
|
|
7
|
+
export interface MarkdownExportOpts {
|
|
8
|
+
outputDir: string;
|
|
9
|
+
types?: string[];
|
|
10
|
+
includeCode?: boolean;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface MarkdownExportResult {
|
|
14
|
+
filesWritten: number;
|
|
15
|
+
entitiesExported: number;
|
|
16
|
+
outputDir: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/** Default semantic entity types to export (excludes CodeEntity and FileNode). */
|
|
20
|
+
const DEFAULT_TYPES = ["Decision", "Convention", "Bug", "Solution", "Concept"];
|
|
21
|
+
|
|
22
|
+
/** Related entity info resolved from edges. */
|
|
23
|
+
interface RelatedEntity {
|
|
24
|
+
edgeType: string;
|
|
25
|
+
id: string;
|
|
26
|
+
entityType: string;
|
|
27
|
+
name: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Convert entity type to directory name.
|
|
32
|
+
* "Decision" -> "decisions", "CodeEntity" -> "code", etc.
|
|
33
|
+
*/
|
|
34
|
+
function typeToDir(type: string): string {
|
|
35
|
+
if (type === "CodeEntity") return "code";
|
|
36
|
+
if (type === "FileNode") return "files";
|
|
37
|
+
return `${type.toLowerCase()}s`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Slugify a name for use as a filename.
|
|
42
|
+
* Lowercases, replaces whitespace/underscores with hyphens,
|
|
43
|
+
* strips non-alphanumeric characters (except hyphens), and collapses
|
|
44
|
+
* multiple consecutive hyphens.
|
|
45
|
+
*/
|
|
46
|
+
export function slugify(name: string): string {
|
|
47
|
+
return name
|
|
48
|
+
.toLowerCase()
|
|
49
|
+
.replace(/[\s_]+/g, "-")
|
|
50
|
+
.replace(/[^a-z0-9-]/g, "")
|
|
51
|
+
.replace(/-{2,}/g, "-")
|
|
52
|
+
.replace(/^-+|-+$/g, "");
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Format a millisecond epoch timestamp as an ISO 8601 date string.
|
|
57
|
+
* Returns "unknown" if the timestamp is null or undefined.
|
|
58
|
+
*/
|
|
59
|
+
function toIsoDate(ts: number | null | undefined): string {
|
|
60
|
+
if (ts == null || ts === 0) return "unknown";
|
|
61
|
+
return new Date(ts).toISOString();
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Parse a JSON tags string into an array of strings.
|
|
66
|
+
* Returns empty array for any parse failure.
|
|
67
|
+
*/
|
|
68
|
+
function parseTags(tags: unknown): string[] {
|
|
69
|
+
if (typeof tags !== "string") return [];
|
|
70
|
+
try {
|
|
71
|
+
const parsed = JSON.parse(tags);
|
|
72
|
+
return Array.isArray(parsed) ? parsed.map(String) : [];
|
|
73
|
+
} catch {
|
|
74
|
+
return [];
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Escape YAML string values that may contain special characters.
|
|
80
|
+
* Wraps in double quotes if necessary.
|
|
81
|
+
*/
|
|
82
|
+
function yamlString(value: string): string {
|
|
83
|
+
if (/[:#{}[\],&*?|>!%@`]/.test(value) || value.includes('"') || value.includes("'")) {
|
|
84
|
+
return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`;
|
|
85
|
+
}
|
|
86
|
+
return value;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Export knowledge graph entities as markdown files organized by type.
|
|
91
|
+
* Each entity becomes a markdown file with YAML frontmatter.
|
|
92
|
+
* Wikilinks connect related entities for Obsidian compatibility.
|
|
93
|
+
*/
|
|
94
|
+
export async function exportAsMarkdown(
|
|
95
|
+
db: SiaDb,
|
|
96
|
+
opts: MarkdownExportOpts,
|
|
97
|
+
): Promise<MarkdownExportResult> {
|
|
98
|
+
const types = opts.types ?? DEFAULT_TYPES;
|
|
99
|
+
const includeCode = opts.includeCode ?? false;
|
|
100
|
+
|
|
101
|
+
// Build the effective type list
|
|
102
|
+
const effectiveTypes = [...types];
|
|
103
|
+
if (includeCode && !effectiveTypes.includes("CodeEntity")) {
|
|
104
|
+
effectiveTypes.push("CodeEntity");
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Query active entities filtered by type
|
|
108
|
+
const placeholders = effectiveTypes.map(() => "?").join(", ");
|
|
109
|
+
const { rows: entityRows } = await db.execute(
|
|
110
|
+
`SELECT id, type, name, content, summary, importance, trust_tier, tags, created_at, t_valid_from
|
|
111
|
+
FROM graph_nodes
|
|
112
|
+
WHERE t_valid_until IS NULL AND archived_at IS NULL
|
|
113
|
+
AND type IN (${placeholders})
|
|
114
|
+
ORDER BY type, importance DESC`,
|
|
115
|
+
effectiveTypes,
|
|
116
|
+
);
|
|
117
|
+
|
|
118
|
+
// Create output directory structure
|
|
119
|
+
mkdirSync(opts.outputDir, { recursive: true });
|
|
120
|
+
const dirsCreated = new Set<string>();
|
|
121
|
+
for (const t of effectiveTypes) {
|
|
122
|
+
const dir = join(opts.outputDir, typeToDir(t));
|
|
123
|
+
mkdirSync(dir, { recursive: true });
|
|
124
|
+
dirsCreated.add(dir);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// For each entity, resolve related entities and write markdown file
|
|
128
|
+
let filesWritten = 0;
|
|
129
|
+
const typeCounts = new Map<string, number>();
|
|
130
|
+
|
|
131
|
+
for (const row of entityRows) {
|
|
132
|
+
const entity = row as Record<string, unknown>;
|
|
133
|
+
const entityId = entity.id as string;
|
|
134
|
+
const entityType = entity.type as string;
|
|
135
|
+
const entityName = entity.name as string;
|
|
136
|
+
const entityContent = entity.content as string;
|
|
137
|
+
const trustTier = entity.trust_tier as number;
|
|
138
|
+
const createdAt = entity.created_at as number | null;
|
|
139
|
+
const tags = parseTags(entity.tags);
|
|
140
|
+
const importance = entity.importance as number;
|
|
141
|
+
|
|
142
|
+
// Track type counts for index
|
|
143
|
+
typeCounts.set(entityType, (typeCounts.get(entityType) ?? 0) + 1);
|
|
144
|
+
|
|
145
|
+
// Resolve related entities via outgoing edges
|
|
146
|
+
const { rows: relatedRows } = await db.execute(
|
|
147
|
+
`SELECT e.type AS edge_type, ent.id, ent.type AS entity_type, ent.name
|
|
148
|
+
FROM graph_edges e
|
|
149
|
+
JOIN graph_nodes ent ON ent.id = e.to_id
|
|
150
|
+
WHERE e.from_id = ? AND e.t_valid_until IS NULL
|
|
151
|
+
AND ent.t_valid_until IS NULL AND ent.archived_at IS NULL`,
|
|
152
|
+
[entityId],
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
const related: RelatedEntity[] = (relatedRows as Record<string, unknown>[]).map((r) => ({
|
|
156
|
+
edgeType: r.edge_type as string,
|
|
157
|
+
id: r.id as string,
|
|
158
|
+
entityType: r.entity_type as string,
|
|
159
|
+
name: r.name as string,
|
|
160
|
+
}));
|
|
161
|
+
|
|
162
|
+
// Build YAML frontmatter
|
|
163
|
+
const tagsYaml = tags.length > 0 ? `[${tags.map((t) => yamlString(t)).join(", ")}]` : "[]";
|
|
164
|
+
|
|
165
|
+
const lines: string[] = [
|
|
166
|
+
"---",
|
|
167
|
+
`id: ${yamlString(entityId)}`,
|
|
168
|
+
`kind: ${entityType}`,
|
|
169
|
+
`trust_tier: ${trustTier}`,
|
|
170
|
+
`created_at: ${yamlString(toIsoDate(createdAt))}`,
|
|
171
|
+
`tags: ${tagsYaml}`,
|
|
172
|
+
`importance: ${importance}`,
|
|
173
|
+
"---",
|
|
174
|
+
"",
|
|
175
|
+
`# ${entityName}`,
|
|
176
|
+
"",
|
|
177
|
+
entityContent,
|
|
178
|
+
];
|
|
179
|
+
|
|
180
|
+
// Add related section if there are linked entities
|
|
181
|
+
if (related.length > 0) {
|
|
182
|
+
lines.push("", "## Related", "");
|
|
183
|
+
for (const rel of related) {
|
|
184
|
+
const targetDir = typeToDir(rel.entityType);
|
|
185
|
+
const targetSlug = slugify(rel.name);
|
|
186
|
+
lines.push(`- ${rel.edgeType}: [[${targetDir}/${targetSlug}]]`);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
lines.push(""); // trailing newline
|
|
191
|
+
|
|
192
|
+
// Write file
|
|
193
|
+
const dir = typeToDir(entityType);
|
|
194
|
+
const filename = `${slugify(entityName)}.md`;
|
|
195
|
+
const filePath = join(opts.outputDir, dir, filename);
|
|
196
|
+
writeFileSync(filePath, lines.join("\n"), "utf-8");
|
|
197
|
+
filesWritten++;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Generate index.md
|
|
201
|
+
const indexLines: string[] = [
|
|
202
|
+
"# Sia Knowledge Graph Export",
|
|
203
|
+
"",
|
|
204
|
+
`**Exported at:** ${new Date().toISOString()}`,
|
|
205
|
+
`**Entities:** ${entityRows.length}`,
|
|
206
|
+
"",
|
|
207
|
+
"## Summary",
|
|
208
|
+
"",
|
|
209
|
+
"| Type | Count |",
|
|
210
|
+
"|------|-------|",
|
|
211
|
+
];
|
|
212
|
+
|
|
213
|
+
for (const t of effectiveTypes) {
|
|
214
|
+
const count = typeCounts.get(t) ?? 0;
|
|
215
|
+
if (count > 0) {
|
|
216
|
+
indexLines.push(`| ${t} | ${count} |`);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
indexLines.push(""); // trailing newline
|
|
221
|
+
writeFileSync(join(opts.outputDir, "index.md"), indexLines.join("\n"), "utf-8");
|
|
222
|
+
filesWritten++;
|
|
223
|
+
|
|
224
|
+
return {
|
|
225
|
+
filesWritten,
|
|
226
|
+
entitiesExported: entityRows.length,
|
|
227
|
+
outputDir: opts.outputDir,
|
|
228
|
+
};
|
|
229
|
+
}
|