@rkarim08/sia 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/.claude-plugin/marketplace.json +35 -0
  2. package/.claude-plugin/plugin.json +27 -0
  3. package/.mcp.json +13 -0
  4. package/CLAUDE.md +226 -0
  5. package/LICENSE +202 -0
  6. package/PLUGIN_README.md +253 -0
  7. package/README.md +1013 -0
  8. package/agents/sia-changelog-writer.md +89 -0
  9. package/agents/sia-code-reviewer.md +86 -0
  10. package/agents/sia-conflict-resolver.md +100 -0
  11. package/agents/sia-convention-enforcer.md +69 -0
  12. package/agents/sia-debug.md +106 -0
  13. package/agents/sia-decision-reviewer.md +101 -0
  14. package/agents/sia-dependency-tracker.md +80 -0
  15. package/agents/sia-explain.md +126 -0
  16. package/agents/sia-feature.md +116 -0
  17. package/agents/sia-knowledge-capture.md +117 -0
  18. package/agents/sia-lead-architecture-advisor.md +93 -0
  19. package/agents/sia-lead-team-health.md +107 -0
  20. package/agents/sia-migration.md +100 -0
  21. package/agents/sia-onboarding.md +115 -0
  22. package/agents/sia-orientation.md +99 -0
  23. package/agents/sia-pm-briefing.md +106 -0
  24. package/agents/sia-pm-risk-advisor.md +82 -0
  25. package/agents/sia-qa-analyst.md +116 -0
  26. package/agents/sia-qa-regression-map.md +94 -0
  27. package/agents/sia-refactor.md +115 -0
  28. package/agents/sia-regression.md +112 -0
  29. package/agents/sia-security-audit.md +125 -0
  30. package/agents/sia-test-advisor.md +91 -0
  31. package/hooks/hooks.json +98 -0
  32. package/migrations/bridge/001_initial.sql +34 -0
  33. package/migrations/episodic/001_initial.sql +35 -0
  34. package/migrations/meta/001_initial.sql +68 -0
  35. package/migrations/semantic/001_initial.sql +292 -0
  36. package/migrations/semantic/002_ontology.sql +89 -0
  37. package/migrations/semantic/003_freshness.sql +63 -0
  38. package/migrations/semantic/004_v5_unified_schema.sql +194 -0
  39. package/migrations/semantic/005_backfill_event_kinds.sql +8 -0
  40. package/migrations/semantic/006_tree_sitter.sql +6 -0
  41. package/migrations/semantic/007_branch_snapshots.sql +22 -0
  42. package/package.json +110 -0
  43. package/scripts/branch-switch.sh +13 -0
  44. package/scripts/build-wasm-grammars.sh +81 -0
  45. package/scripts/post-compact.sh +8 -0
  46. package/scripts/post-tool-use.sh +10 -0
  47. package/scripts/pre-compact.sh +8 -0
  48. package/scripts/session-end.sh +8 -0
  49. package/scripts/session-start.sh +8 -0
  50. package/scripts/start-mcp.ts +45 -0
  51. package/scripts/stop-hook.sh +8 -0
  52. package/scripts/user-prompt-submit.sh +8 -0
  53. package/scripts/viz-server.ts +152 -0
  54. package/skills/sia-brainstorm/SKILL.md +156 -0
  55. package/skills/sia-brainstorm/scripts/frame-template.html +214 -0
  56. package/skills/sia-brainstorm/scripts/helper.js +95 -0
  57. package/skills/sia-brainstorm/scripts/server.cjs +338 -0
  58. package/skills/sia-brainstorm/scripts/start-server.sh +153 -0
  59. package/skills/sia-brainstorm/scripts/stop-server.sh +55 -0
  60. package/skills/sia-brainstorm/spec-document-reviewer-prompt.md +49 -0
  61. package/skills/sia-brainstorm/visual-companion.md +286 -0
  62. package/skills/sia-capture/SKILL.md +64 -0
  63. package/skills/sia-compare/SKILL.md +33 -0
  64. package/skills/sia-conflicts/SKILL.md +38 -0
  65. package/skills/sia-debug-workflow/SKILL.md +120 -0
  66. package/skills/sia-debug-workflow/root-cause-tracing.md +70 -0
  67. package/skills/sia-debug-workflow/scripts/find-polluter.sh +64 -0
  68. package/skills/sia-debug-workflow/temporal-investigation.md +72 -0
  69. package/skills/sia-digest/SKILL.md +23 -0
  70. package/skills/sia-dispatch/SKILL.md +69 -0
  71. package/skills/sia-dispatch/agent-task-template.md +99 -0
  72. package/skills/sia-doctor/SKILL.md +39 -0
  73. package/skills/sia-execute/SKILL.md +70 -0
  74. package/skills/sia-execute-plan/SKILL.md +85 -0
  75. package/skills/sia-export-import/SKILL.md +49 -0
  76. package/skills/sia-export-knowledge/SKILL.md +46 -0
  77. package/skills/sia-finish/SKILL.md +100 -0
  78. package/skills/sia-finish/pr-summary-template.md +54 -0
  79. package/skills/sia-freshness/SKILL.md +38 -0
  80. package/skills/sia-history/SKILL.md +42 -0
  81. package/skills/sia-impact/SKILL.md +70 -0
  82. package/skills/sia-index/SKILL.md +54 -0
  83. package/skills/sia-install/SKILL.md +39 -0
  84. package/skills/sia-lead-compliance/SKILL.md +16 -0
  85. package/skills/sia-lead-drift-report/SKILL.md +16 -0
  86. package/skills/sia-lead-knowledge-map/SKILL.md +16 -0
  87. package/skills/sia-learn/SKILL.md +58 -0
  88. package/skills/sia-plan/SKILL.md +68 -0
  89. package/skills/sia-plan/plan-reviewer-prompt.md +63 -0
  90. package/skills/sia-playbooks/SKILL.md +29 -0
  91. package/skills/sia-playbooks/reference-feature.md +100 -0
  92. package/skills/sia-playbooks/reference-flagging.md +50 -0
  93. package/skills/sia-playbooks/reference-orientation.md +92 -0
  94. package/skills/sia-playbooks/reference-regression.md +115 -0
  95. package/skills/sia-playbooks/reference-review.md +64 -0
  96. package/skills/sia-playbooks/reference-tools.md +239 -0
  97. package/skills/sia-pm-decision-log/SKILL.md +28 -0
  98. package/skills/sia-pm-risk-dashboard/SKILL.md +24 -0
  99. package/skills/sia-pm-sprint-summary/SKILL.md +27 -0
  100. package/skills/sia-prune/SKILL.md +45 -0
  101. package/skills/sia-qa-coverage/SKILL.md +28 -0
  102. package/skills/sia-qa-flaky/SKILL.md +20 -0
  103. package/skills/sia-qa-report/SKILL.md +26 -0
  104. package/skills/sia-reindex/SKILL.md +30 -0
  105. package/skills/sia-review-respond/SKILL.md +88 -0
  106. package/skills/sia-review-respond/pushback-patterns.md +90 -0
  107. package/skills/sia-search/SKILL.md +47 -0
  108. package/skills/sia-setup/SKILL.md +82 -0
  109. package/skills/sia-setup/setup-checklist.md +97 -0
  110. package/skills/sia-stats/SKILL.md +36 -0
  111. package/skills/sia-status/SKILL.md +44 -0
  112. package/skills/sia-sync/SKILL.md +46 -0
  113. package/skills/sia-team/SKILL.md +64 -0
  114. package/skills/sia-test/SKILL.md +92 -0
  115. package/skills/sia-test/testing-anti-patterns.md +104 -0
  116. package/skills/sia-tour/SKILL.md +29 -0
  117. package/skills/sia-upgrade/SKILL.md +43 -0
  118. package/skills/sia-verify/SKILL.md +81 -0
  119. package/skills/sia-visualize/SKILL.md +28 -0
  120. package/skills/sia-visualize-live/SKILL.md +55 -0
  121. package/skills/sia-visualize-live/scripts/graph-template.html +389 -0
  122. package/skills/sia-visualize-live/scripts/start-visualizer.sh +161 -0
  123. package/skills/sia-visualize-live/scripts/stop-visualizer.sh +55 -0
  124. package/skills/sia-visualize-live/scripts/visualizer-server.cjs +264 -0
  125. package/skills/sia-workspace/SKILL.md +57 -0
  126. package/src/agent/claude-md-template-flagging.md +219 -0
  127. package/src/agent/claude-md-template.md +213 -0
  128. package/src/agent/modules/sia-feature.md +100 -0
  129. package/src/agent/modules/sia-flagging.md +50 -0
  130. package/src/agent/modules/sia-orientation.md +92 -0
  131. package/src/agent/modules/sia-regression.md +115 -0
  132. package/src/agent/modules/sia-review.md +64 -0
  133. package/src/agent/modules/sia-tools.md +239 -0
  134. package/src/ast/extractors/c-include.ts +189 -0
  135. package/src/ast/extractors/csharp-project.ts +260 -0
  136. package/src/ast/extractors/prisma-schema.ts +44 -0
  137. package/src/ast/extractors/project-manifest.ts +111 -0
  138. package/src/ast/extractors/sql-schema.ts +67 -0
  139. package/src/ast/extractors/tier-a.ts +423 -0
  140. package/src/ast/extractors/tier-b.ts +289 -0
  141. package/src/ast/extractors/tier-dispatch.ts +247 -0
  142. package/src/ast/index-worker.ts +108 -0
  143. package/src/ast/indexer.ts +484 -0
  144. package/src/ast/languages.ts +408 -0
  145. package/src/ast/pagerank-builder.ts +125 -0
  146. package/src/ast/path-utils.ts +137 -0
  147. package/src/ast/tree-sitter/backends/native.ts +57 -0
  148. package/src/ast/tree-sitter/backends/wasm.ts +39 -0
  149. package/src/ast/tree-sitter/call-walker.ts +44 -0
  150. package/src/ast/tree-sitter/edit-computer.ts +55 -0
  151. package/src/ast/tree-sitter/query-runner.ts +46 -0
  152. package/src/ast/tree-sitter/service.ts +174 -0
  153. package/src/ast/tree-sitter/tree-cache.ts +39 -0
  154. package/src/ast/tree-sitter/types.ts +79 -0
  155. package/src/ast/watcher.ts +322 -0
  156. package/src/capture/chunker.ts +169 -0
  157. package/src/capture/consolidate.ts +127 -0
  158. package/src/capture/edge-inferrer.ts +161 -0
  159. package/src/capture/embedder.ts +166 -0
  160. package/src/capture/embedding-cache.ts +73 -0
  161. package/src/capture/flag-processor.ts +64 -0
  162. package/src/capture/hook.ts +67 -0
  163. package/src/capture/pipeline.ts +450 -0
  164. package/src/capture/prompts/consolidate.ts +25 -0
  165. package/src/capture/prompts/edge-infer.ts +29 -0
  166. package/src/capture/prompts/extract-flagged.ts +36 -0
  167. package/src/capture/prompts/extract.ts +42 -0
  168. package/src/capture/tokenizer.ts +147 -0
  169. package/src/capture/track-a-ast.ts +93 -0
  170. package/src/capture/track-b-llm.ts +149 -0
  171. package/src/capture/types.ts +64 -0
  172. package/src/cli/commands/community.ts +137 -0
  173. package/src/cli/commands/compare.ts +123 -0
  174. package/src/cli/commands/conflicts.ts +41 -0
  175. package/src/cli/commands/digest.ts +197 -0
  176. package/src/cli/commands/disable-flagging.ts +34 -0
  177. package/src/cli/commands/doctor.ts +240 -0
  178. package/src/cli/commands/download-model.ts +161 -0
  179. package/src/cli/commands/enable-flagging.ts +34 -0
  180. package/src/cli/commands/export-knowledge.ts +208 -0
  181. package/src/cli/commands/export.ts +85 -0
  182. package/src/cli/commands/freshness.ts +164 -0
  183. package/src/cli/commands/graph.ts +51 -0
  184. package/src/cli/commands/history.ts +139 -0
  185. package/src/cli/commands/import.ts +335 -0
  186. package/src/cli/commands/install.ts +156 -0
  187. package/src/cli/commands/lead-report.ts +241 -0
  188. package/src/cli/commands/learn.ts +321 -0
  189. package/src/cli/commands/pm-report.ts +413 -0
  190. package/src/cli/commands/prune.ts +75 -0
  191. package/src/cli/commands/qa-report.ts +278 -0
  192. package/src/cli/commands/reindex.ts +104 -0
  193. package/src/cli/commands/rollback.ts +70 -0
  194. package/src/cli/commands/search.ts +103 -0
  195. package/src/cli/commands/server.ts +91 -0
  196. package/src/cli/commands/share.ts +33 -0
  197. package/src/cli/commands/stats.ts +79 -0
  198. package/src/cli/commands/status.ts +176 -0
  199. package/src/cli/commands/sync.ts +96 -0
  200. package/src/cli/commands/team.ts +118 -0
  201. package/src/cli/commands/tour.ts +157 -0
  202. package/src/cli/commands/visualize-live.ts +162 -0
  203. package/src/cli/commands/workspace.ts +117 -0
  204. package/src/cli/index.ts +424 -0
  205. package/src/cli/learn-progress.ts +87 -0
  206. package/src/community/detection-bridge.ts +344 -0
  207. package/src/community/leiden.ts +462 -0
  208. package/src/community/raptor.ts +210 -0
  209. package/src/community/scheduler.ts +74 -0
  210. package/src/community/summarize.ts +115 -0
  211. package/src/decay/archiver.ts +73 -0
  212. package/src/decay/bridge-orphan-cleanup.ts +212 -0
  213. package/src/decay/consolidation-sweep.ts +112 -0
  214. package/src/decay/decay.ts +116 -0
  215. package/src/decay/deep-validator.ts +62 -0
  216. package/src/decay/episodic-promoter.ts +132 -0
  217. package/src/decay/maintenance-scheduler.ts +326 -0
  218. package/src/decay/scheduler.ts +6 -0
  219. package/src/decay/session-sweeper.ts +79 -0
  220. package/src/decay/types.ts +17 -0
  221. package/src/freshness/confidence-decay.ts +122 -0
  222. package/src/freshness/cuckoo-filter.ts +176 -0
  223. package/src/freshness/deep-validation.ts +345 -0
  224. package/src/freshness/dirty-tracker.ts +237 -0
  225. package/src/freshness/file-watcher-layer.ts +119 -0
  226. package/src/freshness/firewall.ts +64 -0
  227. package/src/freshness/git-reconcile-layer.ts +161 -0
  228. package/src/freshness/inverted-index.ts +158 -0
  229. package/src/freshness/stale-read-layer.ts +222 -0
  230. package/src/graph/audit.ts +69 -0
  231. package/src/graph/bridge-db.ts +141 -0
  232. package/src/graph/communities.ts +195 -0
  233. package/src/graph/db-interface.ts +259 -0
  234. package/src/graph/edges.ts +163 -0
  235. package/src/graph/entities.ts +327 -0
  236. package/src/graph/episodic-db.ts +113 -0
  237. package/src/graph/flags.ts +31 -0
  238. package/src/graph/meta-db.ts +200 -0
  239. package/src/graph/semantic-db.ts +101 -0
  240. package/src/graph/session-resume.ts +56 -0
  241. package/src/graph/snapshots.ts +342 -0
  242. package/src/graph/staging.ts +151 -0
  243. package/src/graph/types.ts +128 -0
  244. package/src/hooks/adapters/claude-code.ts +21 -0
  245. package/src/hooks/adapters/cline.ts +43 -0
  246. package/src/hooks/adapters/cursor.ts +65 -0
  247. package/src/hooks/adapters/generic.ts +12 -0
  248. package/src/hooks/agent-detect.ts +34 -0
  249. package/src/hooks/claude-md-directives.ts +32 -0
  250. package/src/hooks/event-router.ts +182 -0
  251. package/src/hooks/extractors/pattern-detector.ts +111 -0
  252. package/src/hooks/handlers/post-compact.ts +30 -0
  253. package/src/hooks/handlers/post-tool-use.ts +403 -0
  254. package/src/hooks/handlers/pre-compact.ts +100 -0
  255. package/src/hooks/handlers/session-end.ts +47 -0
  256. package/src/hooks/handlers/session-start.ts +154 -0
  257. package/src/hooks/handlers/stop.ts +128 -0
  258. package/src/hooks/handlers/user-prompt-submit.ts +68 -0
  259. package/src/hooks/plugin-branch-switch.ts +68 -0
  260. package/src/hooks/plugin-common.ts +47 -0
  261. package/src/hooks/plugin-post-compact.ts +28 -0
  262. package/src/hooks/plugin-post-tool-use.ts +38 -0
  263. package/src/hooks/plugin-pre-compact.ts +37 -0
  264. package/src/hooks/plugin-session-end.ts +37 -0
  265. package/src/hooks/plugin-session-start.ts +75 -0
  266. package/src/hooks/plugin-stop.ts +61 -0
  267. package/src/hooks/plugin-user-prompt-submit.ts +47 -0
  268. package/src/hooks/types.ts +43 -0
  269. package/src/knowledge/discovery.ts +238 -0
  270. package/src/knowledge/external-refs.ts +98 -0
  271. package/src/knowledge/freshness.ts +221 -0
  272. package/src/knowledge/ingest.ts +330 -0
  273. package/src/knowledge/markdown-export.ts +229 -0
  274. package/src/knowledge/markdown-import.ts +359 -0
  275. package/src/knowledge/patterns.ts +74 -0
  276. package/src/knowledge/templates.ts +307 -0
  277. package/src/llm/ai-sdk-adapter.ts +46 -0
  278. package/src/llm/config.ts +88 -0
  279. package/src/llm/cost-tracker.ts +110 -0
  280. package/src/llm/prompts/extraction.ts +55 -0
  281. package/src/llm/prompts/summarization.ts +36 -0
  282. package/src/llm/prompts/validation.ts +37 -0
  283. package/src/llm/provider-registry.ts +68 -0
  284. package/src/llm/reliability.ts +179 -0
  285. package/src/llm/schemas.ts +52 -0
  286. package/src/mcp/freshness-annotator.ts +69 -0
  287. package/src/mcp/server.ts +949 -0
  288. package/src/mcp/tools/sia-ast-query.ts +225 -0
  289. package/src/mcp/tools/sia-at-time.ts +151 -0
  290. package/src/mcp/tools/sia-backlinks.ts +87 -0
  291. package/src/mcp/tools/sia-batch-execute.ts +169 -0
  292. package/src/mcp/tools/sia-by-file.ts +89 -0
  293. package/src/mcp/tools/sia-community.ts +113 -0
  294. package/src/mcp/tools/sia-doctor.ts +73 -0
  295. package/src/mcp/tools/sia-execute-file.ts +122 -0
  296. package/src/mcp/tools/sia-execute.ts +104 -0
  297. package/src/mcp/tools/sia-expand.ts +158 -0
  298. package/src/mcp/tools/sia-fetch-and-index.ts +241 -0
  299. package/src/mcp/tools/sia-flag.ts +65 -0
  300. package/src/mcp/tools/sia-index.ts +111 -0
  301. package/src/mcp/tools/sia-note.ts +134 -0
  302. package/src/mcp/tools/sia-search.ts +105 -0
  303. package/src/mcp/tools/sia-stats.ts +63 -0
  304. package/src/mcp/tools/sia-sync-status.ts +44 -0
  305. package/src/mcp/tools/sia-upgrade.ts +247 -0
  306. package/src/mcp/truncate.ts +231 -0
  307. package/src/native/bridge.ts +167 -0
  308. package/src/native/fallback-ast-diff.ts +144 -0
  309. package/src/native/fallback-graph.ts +325 -0
  310. package/src/ontology/constraints.ts +56 -0
  311. package/src/ontology/errors.ts +8 -0
  312. package/src/ontology/middleware.ts +266 -0
  313. package/src/retrieval/bm25-search.ts +151 -0
  314. package/src/retrieval/context-assembly.ts +76 -0
  315. package/src/retrieval/graph-traversal.ts +168 -0
  316. package/src/retrieval/pagerank.ts +40 -0
  317. package/src/retrieval/query-classifier.ts +106 -0
  318. package/src/retrieval/reranker.ts +156 -0
  319. package/src/retrieval/search.ts +236 -0
  320. package/src/retrieval/throttle.ts +102 -0
  321. package/src/retrieval/vector-search.ts +203 -0
  322. package/src/retrieval/workspace-search.ts +130 -0
  323. package/src/sandbox/context-mode.ts +285 -0
  324. package/src/sandbox/credential-pass.ts +55 -0
  325. package/src/sandbox/executor.ts +235 -0
  326. package/src/security/pattern-detector.ts +127 -0
  327. package/src/security/rule-of-two.ts +50 -0
  328. package/src/security/sanitize.ts +46 -0
  329. package/src/security/semantic-consistency.ts +93 -0
  330. package/src/security/staging-promoter.ts +154 -0
  331. package/src/shared/config.ts +302 -0
  332. package/src/shared/diagnostics.ts +210 -0
  333. package/src/shared/errors.ts +48 -0
  334. package/src/shared/git-utils.ts +143 -0
  335. package/src/shared/llm-client.ts +120 -0
  336. package/src/shared/logger.ts +99 -0
  337. package/src/shared/types.ts +79 -0
  338. package/src/sync/client.ts +43 -0
  339. package/src/sync/conflict.ts +106 -0
  340. package/src/sync/dedup.ts +183 -0
  341. package/src/sync/hlc.ts +117 -0
  342. package/src/sync/keychain.ts +144 -0
  343. package/src/sync/pull.ts +232 -0
  344. package/src/sync/push.ts +131 -0
  345. package/src/types/chokidar.d.ts +23 -0
  346. package/src/visualization/graph-renderer.ts +312 -0
  347. package/src/visualization/subgraph-extract.ts +208 -0
  348. package/src/visualization/views/community-clusters.ts +246 -0
  349. package/src/visualization/views/dependency-map.ts +189 -0
  350. package/src/visualization/views/graph-explorer.ts +364 -0
  351. package/src/visualization/views/timeline.ts +247 -0
  352. package/src/workspace/api-contracts.ts +226 -0
  353. package/src/workspace/cross-repo.ts +61 -0
  354. package/src/workspace/detector.ts +190 -0
  355. package/src/workspace/manifest.ts +141 -0
@@ -0,0 +1,330 @@
1
+ // Module: ingest — Heading-based markdown chunking and graph ingestion
2
+
3
+ import { readFileSync } from "node:fs";
4
+ import { basename } from "node:path";
5
+ import type { SiaDb } from "@/graph/db-interface";
6
+ import { insertEdge } from "@/graph/edges";
7
+ import { insertEntity } from "@/graph/entities";
8
+
9
+ /** A chunk extracted from a markdown document. */
10
+ export interface DocChunk {
11
+ heading: string;
12
+ headingLevel: number;
13
+ headingPath: string[];
14
+ content: string;
15
+ codeBlocks: CodeBlock[];
16
+ internalLinks: InternalLink[];
17
+ }
18
+
19
+ export interface CodeBlock {
20
+ language: string;
21
+ code: string;
22
+ }
23
+
24
+ export interface InternalLink {
25
+ text: string;
26
+ target: string;
27
+ isAnchor: boolean;
28
+ }
29
+
30
+ export interface IngestResult {
31
+ fileNodeId: string;
32
+ chunksCreated: number;
33
+ edgesCreated: number;
34
+ }
35
+
36
+ // ---------------------------------------------------------------
37
+ // Markdown parsing
38
+ // ---------------------------------------------------------------
39
+
40
+ /**
41
+ * Parse markdown content into heading-based chunks.
42
+ * Splits at heading boundaries (#, ##, ###), preserving heading hierarchy.
43
+ * Code blocks and lists are kept intact within their heading-scoped chunks.
44
+ */
45
+ export function parseMarkdown(content: string): DocChunk[] {
46
+ const lines = content.split("\n");
47
+ const chunks: DocChunk[] = [];
48
+
49
+ // State for the current chunk being built
50
+ let currentHeading = "";
51
+ let currentLevel = 0;
52
+ let headingPath: string[] = [];
53
+ let contentLines: string[] = [];
54
+ let inCodeFence = false;
55
+
56
+ function flushChunk(): void {
57
+ const body = contentLines.join("\n");
58
+ // Only emit a chunk when there is a heading or non-empty content
59
+ if (currentHeading !== "" || body.trim().length > 0) {
60
+ chunks.push({
61
+ heading: currentHeading,
62
+ headingLevel: currentLevel,
63
+ headingPath: [...headingPath],
64
+ content: body.trimEnd(),
65
+ codeBlocks: extractCodeBlocks(body),
66
+ internalLinks: extractInternalLinks(body),
67
+ });
68
+ }
69
+ contentLines = [];
70
+ }
71
+
72
+ for (const line of lines) {
73
+ // Track code fences — content inside fences is never treated as headings
74
+ if (line.trimStart().startsWith("```")) {
75
+ inCodeFence = !inCodeFence;
76
+ contentLines.push(line);
77
+ continue;
78
+ }
79
+
80
+ if (inCodeFence) {
81
+ contentLines.push(line);
82
+ continue;
83
+ }
84
+
85
+ // Detect ATX-style headings: # H1, ## H2, ### H3 etc.
86
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
87
+ if (headingMatch) {
88
+ // Save the previous chunk before starting a new one
89
+ flushChunk();
90
+
91
+ const level = headingMatch[1].length;
92
+ const heading = headingMatch[2].trim();
93
+
94
+ // Update heading path: keep entries up to the parent level, then add current
95
+ headingPath = headingPath.filter((_, i) => i < level - 1);
96
+ // Ensure path length matches: fill with empty if there are gaps
97
+ while (headingPath.length < level - 1) {
98
+ headingPath.push("");
99
+ }
100
+ headingPath[level - 1] = heading;
101
+ headingPath = headingPath.slice(0, level);
102
+
103
+ currentHeading = heading;
104
+ currentLevel = level;
105
+ continue;
106
+ }
107
+
108
+ contentLines.push(line);
109
+ }
110
+
111
+ // Flush the last chunk
112
+ flushChunk();
113
+
114
+ return chunks;
115
+ }
116
+
117
+ /**
118
+ * Extract fenced code blocks from markdown content.
119
+ * Recognises ```language ... ``` patterns.
120
+ */
121
+ function extractCodeBlocks(content: string): CodeBlock[] {
122
+ const blocks: CodeBlock[] = [];
123
+ const lines = content.split("\n");
124
+ let inBlock = false;
125
+ let language = "";
126
+ let codeLines: string[] = [];
127
+
128
+ for (const line of lines) {
129
+ if (!inBlock && line.trimStart().startsWith("```")) {
130
+ inBlock = true;
131
+ language = line.trimStart().slice(3).trim();
132
+ codeLines = [];
133
+ continue;
134
+ }
135
+ if (inBlock && line.trimStart().startsWith("```")) {
136
+ blocks.push({ language, code: codeLines.join("\n") });
137
+ inBlock = false;
138
+ language = "";
139
+ codeLines = [];
140
+ continue;
141
+ }
142
+ if (inBlock) {
143
+ codeLines.push(line);
144
+ }
145
+ }
146
+
147
+ return blocks;
148
+ }
149
+
150
+ /**
151
+ * Extract internal links (markdown link syntax) from content.
152
+ * Internal links are those whose target is a relative path or an anchor (#).
153
+ * Absolute URLs (http://, https://) are excluded.
154
+ */
155
+ function extractInternalLinks(content: string): InternalLink[] {
156
+ const links: InternalLink[] = [];
157
+ const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
158
+ let match = linkRegex.exec(content);
159
+
160
+ while (match !== null) {
161
+ const text = match[1];
162
+ const target = match[2];
163
+
164
+ // Skip absolute URLs
165
+ if (!/^https?:\/\//.test(target)) {
166
+ links.push({
167
+ text,
168
+ target,
169
+ isAnchor: target.startsWith("#"),
170
+ });
171
+ }
172
+
173
+ match = linkRegex.exec(content);
174
+ }
175
+
176
+ return links;
177
+ }
178
+
179
+ // ---------------------------------------------------------------
180
+ // Frontmatter parsing
181
+ // ---------------------------------------------------------------
182
+
183
+ /**
184
+ * Parse YAML frontmatter from markdown content.
185
+ * Returns the frontmatter as key-value pairs and the remaining content.
186
+ */
187
+ export function parseFrontmatter(content: string): {
188
+ frontmatter: Record<string, string>;
189
+ body: string;
190
+ } {
191
+ const frontmatter: Record<string, string> = {};
192
+
193
+ if (!content.startsWith("---\n") && !content.startsWith("---\r\n")) {
194
+ return { frontmatter, body: content };
195
+ }
196
+
197
+ // Find the closing delimiter
198
+ const endIdx = content.indexOf("\n---", 4);
199
+ if (endIdx === -1) {
200
+ return { frontmatter, body: content };
201
+ }
202
+
203
+ const yamlBlock = content.slice(4, endIdx);
204
+ // Skip past the closing ---\n
205
+ const bodyStart = content.indexOf("\n", endIdx + 1);
206
+ const body = bodyStart === -1 ? "" : content.slice(bodyStart + 1);
207
+
208
+ // Simple key: value parser (no nested YAML)
209
+ for (const line of yamlBlock.split("\n")) {
210
+ const trimmed = line.trim();
211
+ if (trimmed === "" || trimmed.startsWith("#")) continue;
212
+
213
+ const colonIdx = trimmed.indexOf(":");
214
+ if (colonIdx === -1) continue;
215
+
216
+ const key = trimmed.slice(0, colonIdx).trim();
217
+ const value = trimmed.slice(colonIdx + 1).trim();
218
+ if (key) {
219
+ frontmatter[key] = value;
220
+ }
221
+ }
222
+
223
+ return { frontmatter, body };
224
+ }
225
+
226
+ // ---------------------------------------------------------------
227
+ // Graph ingestion
228
+ // ---------------------------------------------------------------
229
+
230
+ /**
231
+ * Ingest a documentation file into the knowledge graph.
232
+ *
233
+ * 1. Create or reuse a FileNode entity for the file
234
+ * 2. Parse the markdown into chunks
235
+ * 3. Create ContentChunk entities for each chunk
236
+ * 4. Create child_of edges from chunks to the FileNode
237
+ * 5. Resolve internal links to references edges
238
+ */
239
+ export async function ingestDocument(
240
+ db: SiaDb,
241
+ filePath: string,
242
+ relativePath: string,
243
+ opts?: {
244
+ tag?: string;
245
+ trustTier?: 1 | 2;
246
+ packagePath?: string | null;
247
+ },
248
+ ): Promise<IngestResult> {
249
+ const raw = readFileSync(filePath, "utf-8");
250
+ const { frontmatter, body } = parseFrontmatter(raw);
251
+ const chunks = parseMarkdown(body);
252
+
253
+ const tag = opts?.tag ?? "project-docs";
254
+ const trustTier = opts?.trustTier ?? 1;
255
+ const packagePath = opts?.packagePath ?? null;
256
+ const fileName = basename(relativePath);
257
+
258
+ let edgesCreated = 0;
259
+
260
+ // ---- Step 1: Find or create FileNode ----
261
+ let fileNodeId: string;
262
+
263
+ const existing = await db.execute(
264
+ "SELECT id FROM graph_nodes WHERE type = 'FileNode' AND file_paths LIKE ? AND t_valid_until IS NULL AND archived_at IS NULL",
265
+ [`%"${relativePath}"%`],
266
+ );
267
+
268
+ if (existing.rows.length > 0) {
269
+ fileNodeId = existing.rows[0].id as string;
270
+ } else {
271
+ const summary = frontmatter.description ?? frontmatter.title ?? raw.slice(0, 200).trim();
272
+
273
+ const fileNode = await insertEntity(db, {
274
+ type: "FileNode",
275
+ name: fileName,
276
+ content: summary,
277
+ summary: `Documentation file: ${relativePath}`,
278
+ package_path: packagePath,
279
+ tags: JSON.stringify([tag]),
280
+ file_paths: JSON.stringify([relativePath]),
281
+ trust_tier: trustTier,
282
+ confidence: 1.0,
283
+ extraction_method: "document-ingest",
284
+ });
285
+ fileNodeId = fileNode.id;
286
+ }
287
+
288
+ // ---- Step 2-4: Create ContentChunk entities and child_of edges ----
289
+ let chunksCreated = 0;
290
+
291
+ for (const chunk of chunks) {
292
+ const chunkName = chunk.heading !== "" ? chunk.heading : `${fileName} - Introduction`;
293
+
294
+ const contentPreview = chunk.content.slice(0, 150).trim();
295
+
296
+ const chunkTags: string[] = [tag];
297
+ if (chunk.headingLevel > 0) {
298
+ chunkTags.push(`h${chunk.headingLevel}`);
299
+ }
300
+
301
+ const chunkEntity = await insertEntity(db, {
302
+ type: "ContentChunk",
303
+ name: chunkName,
304
+ content: chunk.content,
305
+ summary: contentPreview,
306
+ package_path: packagePath,
307
+ tags: JSON.stringify(chunkTags),
308
+ file_paths: JSON.stringify([relativePath]),
309
+ trust_tier: trustTier,
310
+ confidence: 1.0,
311
+ extraction_method: "document-ingest",
312
+ });
313
+
314
+ // child_of edge: chunk -> FileNode
315
+ await insertEdge(db, {
316
+ from_id: chunkEntity.id,
317
+ to_id: fileNodeId,
318
+ type: "child_of",
319
+ extraction_method: "document-ingest",
320
+ });
321
+ edgesCreated++;
322
+ chunksCreated++;
323
+ }
324
+
325
+ return {
326
+ fileNodeId,
327
+ chunksCreated,
328
+ edgesCreated,
329
+ };
330
+ }
@@ -0,0 +1,229 @@
1
+ // Module: markdown-export — Export knowledge graph as markdown vault
2
+
3
+ import { mkdirSync, writeFileSync } from "node:fs";
4
+ import { join } from "node:path";
5
+ import type { SiaDb } from "@/graph/db-interface";
6
+
7
+ export interface MarkdownExportOpts {
8
+ outputDir: string;
9
+ types?: string[];
10
+ includeCode?: boolean;
11
+ }
12
+
13
+ export interface MarkdownExportResult {
14
+ filesWritten: number;
15
+ entitiesExported: number;
16
+ outputDir: string;
17
+ }
18
+
19
+ /** Default semantic entity types to export (excludes CodeEntity and FileNode). */
20
+ const DEFAULT_TYPES = ["Decision", "Convention", "Bug", "Solution", "Concept"];
21
+
22
+ /** Related entity info resolved from edges. */
23
+ interface RelatedEntity {
24
+ edgeType: string;
25
+ id: string;
26
+ entityType: string;
27
+ name: string;
28
+ }
29
+
30
+ /**
31
+ * Convert entity type to directory name.
32
+ * "Decision" -> "decisions", "CodeEntity" -> "code", etc.
33
+ */
34
+ function typeToDir(type: string): string {
35
+ if (type === "CodeEntity") return "code";
36
+ if (type === "FileNode") return "files";
37
+ return `${type.toLowerCase()}s`;
38
+ }
39
+
40
+ /**
41
+ * Slugify a name for use as a filename.
42
+ * Lowercases, replaces whitespace/underscores with hyphens,
43
+ * strips non-alphanumeric characters (except hyphens), and collapses
44
+ * multiple consecutive hyphens.
45
+ */
46
+ export function slugify(name: string): string {
47
+ return name
48
+ .toLowerCase()
49
+ .replace(/[\s_]+/g, "-")
50
+ .replace(/[^a-z0-9-]/g, "")
51
+ .replace(/-{2,}/g, "-")
52
+ .replace(/^-+|-+$/g, "");
53
+ }
54
+
55
+ /**
56
+ * Format a millisecond epoch timestamp as an ISO 8601 date string.
57
+ * Returns "unknown" if the timestamp is null or undefined.
58
+ */
59
+ function toIsoDate(ts: number | null | undefined): string {
60
+ if (ts == null || ts === 0) return "unknown";
61
+ return new Date(ts).toISOString();
62
+ }
63
+
64
+ /**
65
+ * Parse a JSON tags string into an array of strings.
66
+ * Returns empty array for any parse failure.
67
+ */
68
+ function parseTags(tags: unknown): string[] {
69
+ if (typeof tags !== "string") return [];
70
+ try {
71
+ const parsed = JSON.parse(tags);
72
+ return Array.isArray(parsed) ? parsed.map(String) : [];
73
+ } catch {
74
+ return [];
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Escape YAML string values that may contain special characters.
80
+ * Wraps in double quotes if necessary.
81
+ */
82
+ function yamlString(value: string): string {
83
+ if (/[:#{}[\],&*?|>!%@`]/.test(value) || value.includes('"') || value.includes("'")) {
84
+ return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`;
85
+ }
86
+ return value;
87
+ }
88
+
89
+ /**
90
+ * Export knowledge graph entities as markdown files organized by type.
91
+ * Each entity becomes a markdown file with YAML frontmatter.
92
+ * Wikilinks connect related entities for Obsidian compatibility.
93
+ */
94
+ export async function exportAsMarkdown(
95
+ db: SiaDb,
96
+ opts: MarkdownExportOpts,
97
+ ): Promise<MarkdownExportResult> {
98
+ const types = opts.types ?? DEFAULT_TYPES;
99
+ const includeCode = opts.includeCode ?? false;
100
+
101
+ // Build the effective type list
102
+ const effectiveTypes = [...types];
103
+ if (includeCode && !effectiveTypes.includes("CodeEntity")) {
104
+ effectiveTypes.push("CodeEntity");
105
+ }
106
+
107
+ // Query active entities filtered by type
108
+ const placeholders = effectiveTypes.map(() => "?").join(", ");
109
+ const { rows: entityRows } = await db.execute(
110
+ `SELECT id, type, name, content, summary, importance, trust_tier, tags, created_at, t_valid_from
111
+ FROM graph_nodes
112
+ WHERE t_valid_until IS NULL AND archived_at IS NULL
113
+ AND type IN (${placeholders})
114
+ ORDER BY type, importance DESC`,
115
+ effectiveTypes,
116
+ );
117
+
118
+ // Create output directory structure
119
+ mkdirSync(opts.outputDir, { recursive: true });
120
+ const dirsCreated = new Set<string>();
121
+ for (const t of effectiveTypes) {
122
+ const dir = join(opts.outputDir, typeToDir(t));
123
+ mkdirSync(dir, { recursive: true });
124
+ dirsCreated.add(dir);
125
+ }
126
+
127
+ // For each entity, resolve related entities and write markdown file
128
+ let filesWritten = 0;
129
+ const typeCounts = new Map<string, number>();
130
+
131
+ for (const row of entityRows) {
132
+ const entity = row as Record<string, unknown>;
133
+ const entityId = entity.id as string;
134
+ const entityType = entity.type as string;
135
+ const entityName = entity.name as string;
136
+ const entityContent = entity.content as string;
137
+ const trustTier = entity.trust_tier as number;
138
+ const createdAt = entity.created_at as number | null;
139
+ const tags = parseTags(entity.tags);
140
+ const importance = entity.importance as number;
141
+
142
+ // Track type counts for index
143
+ typeCounts.set(entityType, (typeCounts.get(entityType) ?? 0) + 1);
144
+
145
+ // Resolve related entities via outgoing edges
146
+ const { rows: relatedRows } = await db.execute(
147
+ `SELECT e.type AS edge_type, ent.id, ent.type AS entity_type, ent.name
148
+ FROM graph_edges e
149
+ JOIN graph_nodes ent ON ent.id = e.to_id
150
+ WHERE e.from_id = ? AND e.t_valid_until IS NULL
151
+ AND ent.t_valid_until IS NULL AND ent.archived_at IS NULL`,
152
+ [entityId],
153
+ );
154
+
155
+ const related: RelatedEntity[] = (relatedRows as Record<string, unknown>[]).map((r) => ({
156
+ edgeType: r.edge_type as string,
157
+ id: r.id as string,
158
+ entityType: r.entity_type as string,
159
+ name: r.name as string,
160
+ }));
161
+
162
+ // Build YAML frontmatter
163
+ const tagsYaml = tags.length > 0 ? `[${tags.map((t) => yamlString(t)).join(", ")}]` : "[]";
164
+
165
+ const lines: string[] = [
166
+ "---",
167
+ `id: ${yamlString(entityId)}`,
168
+ `kind: ${entityType}`,
169
+ `trust_tier: ${trustTier}`,
170
+ `created_at: ${yamlString(toIsoDate(createdAt))}`,
171
+ `tags: ${tagsYaml}`,
172
+ `importance: ${importance}`,
173
+ "---",
174
+ "",
175
+ `# ${entityName}`,
176
+ "",
177
+ entityContent,
178
+ ];
179
+
180
+ // Add related section if there are linked entities
181
+ if (related.length > 0) {
182
+ lines.push("", "## Related", "");
183
+ for (const rel of related) {
184
+ const targetDir = typeToDir(rel.entityType);
185
+ const targetSlug = slugify(rel.name);
186
+ lines.push(`- ${rel.edgeType}: [[${targetDir}/${targetSlug}]]`);
187
+ }
188
+ }
189
+
190
+ lines.push(""); // trailing newline
191
+
192
+ // Write file
193
+ const dir = typeToDir(entityType);
194
+ const filename = `${slugify(entityName)}.md`;
195
+ const filePath = join(opts.outputDir, dir, filename);
196
+ writeFileSync(filePath, lines.join("\n"), "utf-8");
197
+ filesWritten++;
198
+ }
199
+
200
+ // Generate index.md
201
+ const indexLines: string[] = [
202
+ "# Sia Knowledge Graph Export",
203
+ "",
204
+ `**Exported at:** ${new Date().toISOString()}`,
205
+ `**Entities:** ${entityRows.length}`,
206
+ "",
207
+ "## Summary",
208
+ "",
209
+ "| Type | Count |",
210
+ "|------|-------|",
211
+ ];
212
+
213
+ for (const t of effectiveTypes) {
214
+ const count = typeCounts.get(t) ?? 0;
215
+ if (count > 0) {
216
+ indexLines.push(`| ${t} | ${count} |`);
217
+ }
218
+ }
219
+
220
+ indexLines.push(""); // trailing newline
221
+ writeFileSync(join(opts.outputDir, "index.md"), indexLines.join("\n"), "utf-8");
222
+ filesWritten++;
223
+
224
+ return {
225
+ filesWritten,
226
+ entitiesExported: entityRows.length,
227
+ outputDir: opts.outputDir,
228
+ };
229
+ }