sanook-cli 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/.env.example +19 -0
  2. package/CHANGELOG.md +173 -0
  3. package/README.md +153 -20
  4. package/README.th.md +136 -0
  5. package/dist/agentContext.js +4 -0
  6. package/dist/approval.js +6 -0
  7. package/dist/bin.js +405 -57
  8. package/dist/brain.js +92 -59
  9. package/dist/brand.js +47 -0
  10. package/dist/checkpoint.js +37 -0
  11. package/dist/commands.js +86 -6
  12. package/dist/compaction.js +76 -5
  13. package/dist/config.js +100 -12
  14. package/dist/cost.js +60 -3
  15. package/dist/doctor.js +92 -0
  16. package/dist/gateway/auth.js +2 -2
  17. package/dist/gateway/ledger.js +2 -2
  18. package/dist/gateway/scheduler.js +1 -0
  19. package/dist/gateway/serve.js +6 -4
  20. package/dist/gateway/server.js +10 -2
  21. package/dist/git.js +11 -2
  22. package/dist/hooks.js +43 -17
  23. package/dist/knowledge.js +48 -49
  24. package/dist/loop.js +182 -66
  25. package/dist/lsp/client.js +173 -0
  26. package/dist/lsp/framing.js +56 -0
  27. package/dist/lsp/index.js +138 -0
  28. package/dist/lsp/servers.js +82 -0
  29. package/dist/mcp-server.js +244 -0
  30. package/dist/mcp.js +184 -29
  31. package/dist/memory-store.js +559 -0
  32. package/dist/memory.js +143 -29
  33. package/dist/orchestrate.js +150 -0
  34. package/dist/providers/codex.js +21 -7
  35. package/dist/providers/keys.js +3 -2
  36. package/dist/providers/models.js +22 -6
  37. package/dist/providers/registry.js +155 -1
  38. package/dist/repomap.js +93 -0
  39. package/dist/search/chunk.js +158 -0
  40. package/dist/search/embed-store.js +187 -0
  41. package/dist/search/engine.js +203 -0
  42. package/dist/search/fuse.js +35 -0
  43. package/dist/search/index-core.js +187 -0
  44. package/dist/search/indexer.js +241 -0
  45. package/dist/search/store.js +77 -0
  46. package/dist/session.js +42 -8
  47. package/dist/skill-install.js +10 -10
  48. package/dist/skills.js +12 -9
  49. package/dist/summarize.js +31 -0
  50. package/dist/tools/bash.js +21 -2
  51. package/dist/tools/diagnostics.js +41 -0
  52. package/dist/tools/edit.js +29 -7
  53. package/dist/tools/index.js +8 -1
  54. package/dist/tools/list.js +7 -2
  55. package/dist/tools/permission.js +90 -9
  56. package/dist/tools/read.js +23 -4
  57. package/dist/tools/remember.js +1 -1
  58. package/dist/tools/sandbox.js +61 -0
  59. package/dist/tools/search.js +105 -4
  60. package/dist/tools/task.js +195 -29
  61. package/dist/tools/timeout.js +35 -0
  62. package/dist/tools/util.js +10 -0
  63. package/dist/tools/write.js +6 -4
  64. package/dist/trust.js +89 -0
  65. package/dist/ui/app.js +228 -31
  66. package/dist/ui/banner.js +4 -9
  67. package/dist/ui/brain-wizard.js +2 -2
  68. package/dist/ui/history.js +30 -0
  69. package/dist/ui/mentions.js +44 -0
  70. package/dist/ui/render.js +55 -15
  71. package/dist/ui/setup.js +97 -12
  72. package/dist/ui/useEditor.js +83 -0
  73. package/dist/update.js +114 -0
  74. package/dist/worktree.js +173 -0
  75. package/package.json +11 -5
  76. package/scripts/postinstall.mjs +33 -0
  77. package/second-brain/.agents/_Index.md +30 -0
  78. package/second-brain/.agents/skills/_Index.md +30 -0
  79. package/second-brain/.agents/workflows/_Index.md +30 -0
  80. package/second-brain/AGENTS.md +4 -4
  81. package/second-brain/Acceptance/_Index.md +30 -0
  82. package/second-brain/Acceptance/golden-case-template.md +39 -0
  83. package/second-brain/Areas/_Index.md +30 -0
  84. package/second-brain/Bugs/System-OS/_Index.md +30 -0
  85. package/second-brain/Bugs/_Index.md +30 -0
  86. package/second-brain/CLAUDE.md +4 -1
  87. package/second-brain/Checklists/_Index.md +30 -0
  88. package/second-brain/Checklists/preflight-postflight-template.md +29 -0
  89. package/second-brain/Distillations/_Index.md +30 -0
  90. package/second-brain/Entities/_Index.md +30 -0
  91. package/second-brain/Entities/entity-template.md +33 -0
  92. package/second-brain/Evals/_Index.md +30 -0
  93. package/second-brain/Evals/correction-pairs.md +24 -0
  94. package/second-brain/Evals/failure-taxonomy.md +24 -0
  95. package/second-brain/Evals/golden-set.md +25 -0
  96. package/second-brain/Evals/quality-ledger.md +23 -0
  97. package/second-brain/Evals/self-eval-rubric.md +23 -0
  98. package/second-brain/GEMINI.md +4 -4
  99. package/second-brain/Goals/_Index.md +30 -0
  100. package/second-brain/Handoffs/_Index.md +30 -0
  101. package/second-brain/Home.md +7 -0
  102. package/second-brain/Intake/Raw Sources/_Index.md +30 -0
  103. package/second-brain/Intake/_Index.md +30 -0
  104. package/second-brain/Intake/_Quarantine/_Index.md +30 -0
  105. package/second-brain/Learning/_Index.md +30 -0
  106. package/second-brain/Playbooks/_Index.md +30 -0
  107. package/second-brain/Playbooks/playbook-template.md +23 -0
  108. package/second-brain/Projects/_Index.md +30 -0
  109. package/second-brain/Prompts/_Index.md +30 -0
  110. package/second-brain/README.md +2 -1
  111. package/second-brain/Research/_Index.md +30 -0
  112. package/second-brain/Retrospectives/_Index.md +30 -0
  113. package/second-brain/Reviews/_Index.md +30 -0
  114. package/second-brain/Runbooks/_Index.md +30 -0
  115. package/second-brain/Runbooks/eval-loop.md +24 -0
  116. package/second-brain/Sessions/_Index.md +30 -0
  117. package/second-brain/Shared/AI-Context-Index.md +20 -0
  118. package/second-brain/Shared/AI-Threads/_Index.md +30 -0
  119. package/second-brain/Shared/Archive/_Index.md +30 -0
  120. package/second-brain/Shared/Assets/_Index.md +30 -0
  121. package/second-brain/Shared/Context-Packs/_Index.md +30 -0
  122. package/second-brain/Shared/Context7-Docs/_Index.md +30 -0
  123. package/second-brain/Shared/Coordination/NOW.md +28 -0
  124. package/second-brain/Shared/Coordination/_Index.md +30 -0
  125. package/second-brain/Shared/Coordination/agent-registry.md +24 -0
  126. package/second-brain/Shared/Coordination/task-board/_Index.md +30 -0
  127. package/second-brain/Shared/Coordination/task-board/task-template.md +43 -0
  128. package/second-brain/Shared/Coordination/task-board.md +32 -0
  129. package/second-brain/Shared/Core-Facts/_Index.md +30 -0
  130. package/second-brain/Shared/Decision-Memory/_Index.md +30 -0
  131. package/second-brain/Shared/Glossary/_Index.md +30 -0
  132. package/second-brain/Shared/Memory-Inbox/_Index.md +30 -0
  133. package/second-brain/Shared/Operating-State/_Index.md +30 -0
  134. package/second-brain/Shared/Prompting/_Index.md +30 -0
  135. package/second-brain/Shared/Provenance/_Index.md +30 -0
  136. package/second-brain/Shared/Rules/_Index.md +30 -0
  137. package/second-brain/Shared/Rules/contextual-note-rule.md +30 -0
  138. package/second-brain/Shared/Rules/frontmatter-standard.md +10 -0
  139. package/second-brain/Shared/Rules/memory-write-protocol.md +28 -0
  140. package/second-brain/Shared/Rules/procedural-runbook-header.md +40 -0
  141. package/second-brain/Shared/Rules/review-and-staleness-policy.md +22 -0
  142. package/second-brain/Shared/Rules/rules-formatting.md +34 -0
  143. package/second-brain/Shared/Scripts/_Index.md +30 -0
  144. package/second-brain/Shared/Scripts-Archive/_Index.md +30 -0
  145. package/second-brain/Shared/Tech-Standards/_Index.md +30 -0
  146. package/second-brain/Shared/Tech-Standards/verification-standard.md +40 -0
  147. package/second-brain/Shared/User-Memory/_Index.md +30 -0
  148. package/second-brain/Shared/User-Persona/_Index.md +30 -0
  149. package/second-brain/Shared/User-Persona/owner-profile.md +25 -0
  150. package/second-brain/Shared/Working-Memory/_Index.md +30 -0
  151. package/second-brain/Shared/_Index.md +30 -0
  152. package/second-brain/Shared/mcp-servers/_Index.md +30 -0
  153. package/second-brain/Skills/_Index.md +30 -0
  154. package/second-brain/Templates/_Index.md +30 -0
  155. package/second-brain/Templates/bug.md +2 -0
  156. package/second-brain/Templates/handoff.md +2 -0
  157. package/second-brain/Templates/session.md +2 -0
  158. package/second-brain/Tools/_Index.md +30 -0
  159. package/second-brain/Traces/_Index.md +30 -0
  160. package/second-brain/Vault Structure Map.md +33 -1
  161. package/second-brain/copilot/_Index.md +30 -0
  162. package/skills/audit-license-compliance/SKILL.md +117 -0
  163. package/skills/author-codemod/SKILL.md +110 -0
  164. package/skills/build-audit-logging/SKILL.md +112 -0
  165. package/skills/build-cdc-streaming-pipeline/SKILL.md +123 -0
  166. package/skills/build-cli-tool/SKILL.md +108 -0
  167. package/skills/build-data-table/SKILL.md +141 -0
  168. package/skills/build-native-mobile-ui/SKILL.md +154 -0
  169. package/skills/build-offline-first-sync/SKILL.md +118 -0
  170. package/skills/build-realtime-channel/SKILL.md +122 -0
  171. package/skills/build-vector-search/SKILL.md +131 -0
  172. package/skills/compose-local-dev-stack/SKILL.md +149 -0
  173. package/skills/configure-bundler-build/SKILL.md +166 -0
  174. package/skills/configure-dns-tls/SKILL.md +142 -0
  175. package/skills/configure-reverse-proxy-lb/SKILL.md +129 -0
  176. package/skills/configure-security-headers-csp/SKILL.md +122 -0
  177. package/skills/contract-testing/SKILL.md +140 -0
  178. package/skills/datetime-timezone-correctness/SKILL.md +125 -0
  179. package/skills/debug-ci-pipeline-failure/SKILL.md +134 -0
  180. package/skills/debug-flaky-tests/SKILL.md +128 -0
  181. package/skills/defend-llm-prompt-injection/SKILL.md +110 -0
  182. package/skills/deliver-webhooks/SKILL.md +116 -0
  183. package/skills/design-api-pagination/SKILL.md +144 -0
  184. package/skills/design-authorization-model/SKILL.md +119 -0
  185. package/skills/design-backup-dr-recovery/SKILL.md +113 -0
  186. package/skills/design-event-sourcing-cqrs/SKILL.md +143 -0
  187. package/skills/design-multi-tenancy/SKILL.md +100 -0
  188. package/skills/design-protobuf-grpc-service/SKILL.md +146 -0
  189. package/skills/design-relational-schema/SKILL.md +129 -0
  190. package/skills/design-search-index-infra/SKILL.md +151 -0
  191. package/skills/design-state-machine/SKILL.md +108 -0
  192. package/skills/design-token-system/SKILL.md +109 -0
  193. package/skills/distributed-locks-leases/SKILL.md +120 -0
  194. package/skills/encrypt-sensitive-data/SKILL.md +148 -0
  195. package/skills/feature-flags-rollout/SKILL.md +130 -0
  196. package/skills/file-upload-object-storage/SKILL.md +107 -0
  197. package/skills/fuzz-dynamic-security-test/SKILL.md +111 -0
  198. package/skills/harden-llm-app-reliability/SKILL.md +126 -0
  199. package/skills/i18n-localization-setup/SKILL.md +113 -0
  200. package/skills/idempotency-keys/SKILL.md +107 -0
  201. package/skills/implement-push-notifications/SKILL.md +142 -0
  202. package/skills/ingest-webhook-secure/SKILL.md +120 -0
  203. package/skills/integrate-oauth-oidc/SKILL.md +126 -0
  204. package/skills/load-stress-test/SKILL.md +129 -0
  205. package/skills/map-privacy-data-gdpr/SKILL.md +146 -0
  206. package/skills/model-nosql-data/SKILL.md +118 -0
  207. package/skills/money-decimal-arithmetic/SKILL.md +123 -0
  208. package/skills/monitor-ml-drift/SKILL.md +109 -0
  209. package/skills/numeric-precision-units/SKILL.md +144 -0
  210. package/skills/optimize-llm-cost-latency/SKILL.md +103 -0
  211. package/skills/optimize-react-rerenders/SKILL.md +124 -0
  212. package/skills/orchestrate-agent-workflow/SKILL.md +100 -0
  213. package/skills/payments-billing-integration/SKILL.md +114 -0
  214. package/skills/pin-toolchain-versions/SKILL.md +116 -0
  215. package/skills/plan-strangler-migration/SKILL.md +95 -0
  216. package/skills/property-based-testing/SKILL.md +108 -0
  217. package/skills/publish-package-registry/SKILL.md +130 -0
  218. package/skills/recover-git-state/SKILL.md +119 -0
  219. package/skills/remediate-web-vulnerabilities/SKILL.md +125 -0
  220. package/skills/resilience-timeouts-retries/SKILL.md +104 -0
  221. package/skills/resolve-merge-rebase-conflict/SKILL.md +97 -0
  222. package/skills/rewrite-git-history/SKILL.md +109 -0
  223. package/skills/scaffold-cross-platform-app/SKILL.md +137 -0
  224. package/skills/schema-evolution-compatibility/SKILL.md +121 -0
  225. package/skills/send-transactional-email/SKILL.md +126 -0
  226. package/skills/serve-deploy-ml-model/SKILL.md +107 -0
  227. package/skills/setup-cdn-edge-waf/SKILL.md +107 -0
  228. package/skills/setup-devcontainer-env/SKILL.md +131 -0
  229. package/skills/setup-lint-format-precommit/SKILL.md +140 -0
  230. package/skills/setup-monorepo-tooling/SKILL.md +125 -0
  231. package/skills/ship-mobile-app-store-release/SKILL.md +137 -0
  232. package/skills/structured-output-llm/SKILL.md +86 -0
  233. package/skills/supply-chain-sbom-provenance/SKILL.md +120 -0
  234. package/skills/test-data-factories/SKILL.md +158 -0
  235. package/skills/threat-model-stride/SKILL.md +123 -0
  236. package/skills/train-evaluate-ml-model/SKILL.md +109 -0
  237. package/skills/unicode-text-correctness/SKILL.md +109 -0
  238. package/skills/visual-regression-testing/SKILL.md +120 -0
@@ -7,6 +7,7 @@ import { createMistral } from '@ai-sdk/mistral';
7
7
  import { createGroq } from '@ai-sdk/groq';
8
8
  import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
9
9
  import { resolveKeyFromEnv, assertDirectApiKey } from './keys.js';
10
+ import { BRAND } from '../brand.js';
10
11
  // ────────────────────────────────────────────────────────────────────────────
11
12
  // PROVIDER TABLE — เพิ่มค่าย = เพิ่ม 1 entry (loop/cost/keys ไม่ต้องแตะ)
12
13
  // auth/format/OAuth-reject verify มิ.ย. 2026 (ดู Research/provider-connect-matrix)
@@ -20,6 +21,7 @@ export const PROVIDERS = {
20
21
  baseURL: 'https://api.anthropic.com/v1',
21
22
  requiresKey: true,
22
23
  keyFormat: /^sk-ant-api\d{2}-/,
24
+ keyExample: 'sk-ant-…', // สั้นพอไม่โดน redactKey (sk- + ≥6 chars ถึงโดนตัด)
23
25
  oauthRejectPrefixes: ['sk-ant-oat'], // Claude.ai subscription OAuth → banned
24
26
  models: {
25
27
  default: 'claude-opus-4-8',
@@ -39,6 +41,7 @@ export const PROVIDERS = {
39
41
  envFallbacks: ['GOOGLE_API_KEY', 'GEMINI_API_KEY'],
40
42
  requiresKey: true,
41
43
  keyFormat: /^AIza[0-9A-Za-z_-]{35}$/,
44
+ keyExample: 'AIza…',
42
45
  oauthRejectPrefixes: ['ya29.', 'AQ.'], // Google OAuth / restricted token → banned
43
46
  models: {
44
47
  default: 'gemini-2.5-pro',
@@ -57,6 +60,7 @@ export const PROVIDERS = {
57
60
  baseURL: 'https://api.openai.com/v1',
58
61
  requiresKey: true,
59
62
  keyFormat: /^sk-/,
63
+ keyExample: 'sk-…',
60
64
  models: {
61
65
  default: 'gpt-5.5',
62
66
  smart: 'gpt-5.5',
@@ -83,6 +87,7 @@ export const PROVIDERS = {
83
87
  envVar: 'XAI_API_KEY',
84
88
  requiresKey: true,
85
89
  keyFormat: /^xai-[A-Za-z0-9]{16,}$/,
90
+ keyExample: 'xai-…',
86
91
  // grok-4 (snapshot grok-4-0709) retired 2026-05-15 → redirect grok-4.3 (doc audit มิ.ย. 2026)
87
92
  models: { default: 'grok-4.3', smart: 'grok-4.3', grok: 'grok-4.3' },
88
93
  create: (key) => createXai({ apiKey: key }),
@@ -102,6 +107,7 @@ export const PROVIDERS = {
102
107
  envVar: 'GROQ_API_KEY',
103
108
  requiresKey: true,
104
109
  keyFormat: /^gsk_[A-Za-z0-9]{20,}$/,
110
+ keyExample: 'gsk_…',
105
111
  models: { default: 'llama-3.3-70b-versatile', fast: 'llama-3.3-70b-versatile' },
106
112
  create: (key) => createGroq({ apiKey: key }),
107
113
  },
@@ -215,6 +221,65 @@ export function specKey(spec) {
215
221
  const { provider, model } = parseSpec(spec);
216
222
  return `${provider}:${model}`;
217
223
  }
224
+ /** หน้า console ที่ใช้สร้าง API key ต่อ provider — โชว์ในข้อความ error/wizard ("ไปเอา key ที่ไหน") */
225
+ const CONSOLE_URLS = {
226
+ anthropic: 'https://console.anthropic.com/settings/keys',
227
+ google: 'https://aistudio.google.com/apikey',
228
+ openai: 'https://platform.openai.com/api-keys',
229
+ deepseek: 'https://platform.deepseek.com/api_keys',
230
+ xai: 'https://console.x.ai',
231
+ mistral: 'https://console.mistral.ai/api-keys',
232
+ groq: 'https://console.groq.com/keys',
233
+ minimax: 'https://platform.minimax.io',
234
+ glm: 'https://z.ai/manage-apikey/apikey-list',
235
+ };
236
+ export function consoleUrl(provider) {
237
+ return CONSOLE_URLS[provider];
238
+ }
239
+ /**
240
+ * provider นี้มี key ใน env ที่ "ใช้ได้จริง" ไหม — มี key + ผ่าน policy (ไม่ใช่ OAuth/subscription token
241
+ * หรือ format ผิด). ใช้ทั้ง first-run smart-skip และ -m flag เพื่อไม่ให้ข้าม wizard ทั้งที่ key ใช้ไม่ได้
242
+ * (เช่น export ANTHROPIC_API_KEY=sk-ant-oat… → ถูกแบน → ต้องเข้า wizard ไม่ใช่ขึ้น "พร้อมใช้")
243
+ */
244
+ export function hasUsableEnvKey(provider) {
245
+ const cfg = PROVIDERS[provider];
246
+ if (!cfg)
247
+ return false;
248
+ if (!cfg.requiresKey)
249
+ return true; // local — ไม่ต้อง key
250
+ const k = resolveKeyFromEnv(cfg.envVar, cfg.envFallbacks);
251
+ if (!k)
252
+ return false;
253
+ try {
254
+ assertDirectApiKey(cfg, k); // reject OAuth prefix / format ผิด
255
+ return true;
256
+ }
257
+ catch {
258
+ return false;
259
+ }
260
+ }
261
+ /** หา provider ที่ "มี key ใช้ได้จริงใน env" (cloud, ตามลำดับนิยม) — ใช้ทำ first-run smart skip + แนะ headless */
262
+ export function detectEnvProvider() {
263
+ for (const id of ['anthropic', 'openai', 'google', 'deepseek', 'xai', 'mistral', 'groq', 'glm', 'minimax']) {
264
+ const cfg = PROVIDERS[id];
265
+ if (cfg?.requiresKey && hasUsableEnvKey(id)) {
266
+ return { provider: id, label: cfg.label, envVar: cfg.envVar, model: cfg.models.default };
267
+ }
268
+ }
269
+ return null;
270
+ }
271
+ /**
272
+ * model ที่ "ถูก/เร็วกว่า" ในค่ายเดียวกับ spec (สำหรับงานกลไก เช่น summarize/compaction) —
273
+ * ใช้ key เดียวกัน ไม่ต้องตั้ง key ใหม่. ไม่มี fast tier → คืน spec เดิม (ทำงานได้แต่ไม่ประหยัด)
274
+ */
275
+ export function fastSibling(spec) {
276
+ const { provider } = parseSpec(spec);
277
+ const cfg = PROVIDERS[provider];
278
+ if (!cfg)
279
+ return spec;
280
+ const fast = cfg.models.fast ?? cfg.models.flash ?? cfg.models.haiku ?? cfg.models.air;
281
+ return fast ? `${provider}:${fast}` : spec;
282
+ }
218
283
  /** resolve spec → LanguageModel (throw ถ้าไม่มี key / provider ผิด / key เป็น OAuth) */
219
284
  export function resolveModel(spec) {
220
285
  const { provider, model } = parseSpec(spec);
@@ -226,7 +291,11 @@ export function resolveModel(spec) {
226
291
  if (cfg.requiresKey) {
227
292
  const found = resolveKeyFromEnv(cfg.envVar, cfg.envFallbacks);
228
293
  if (!found) {
229
- throw new Error(`ต้องตั้ง ${cfg.envVar} ก่อนใช้ provider "${provider}" (BYOK — API key ตรงจาก console)`);
294
+ const url = consoleUrl(provider);
295
+ throw new Error(`ยังไม่มี API key ของ ${cfg.label} (${cfg.envVar})\n` +
296
+ (url ? ` • เอา key ที่: ${url}\n` : '') +
297
+ ` • ตั้ง: export ${cfg.envVar}="..." ` +
298
+ `หรือรัน \`${BRAND.cliName}\` (ไม่ใส่ task) เพื่อ setup wizard`);
230
299
  }
231
300
  assertDirectApiKey(cfg, found); // reject OAuth/subscription token + format ผิด
232
301
  key = found;
@@ -239,3 +308,88 @@ export function resolveModel(spec) {
239
308
  (cfg.requiresKey ? cfg.baseURL : process.env[cfg.envVar] ?? cfg.baseURL);
240
309
  return cfg.create(key, baseURL)(model);
241
310
  }
311
+ export const EMBEDDING_PROVIDERS = {
312
+ openai: {
313
+ envVar: 'OPENAI_API_KEY',
314
+ requiresKey: true,
315
+ defaultModel: 'text-embedding-3-small',
316
+ create: (key, baseURL) => (id) => createOpenAI({ apiKey: key, baseURL }).textEmbeddingModel(id),
317
+ },
318
+ mistral: {
319
+ envVar: 'MISTRAL_API_KEY',
320
+ requiresKey: true,
321
+ defaultModel: 'mistral-embed',
322
+ create: (key) => (id) => createMistral({ apiKey: key }).textEmbeddingModel(id),
323
+ },
324
+ google: {
325
+ envVar: 'GOOGLE_GENERATIVE_AI_API_KEY',
326
+ envFallbacks: ['GOOGLE_API_KEY', 'GEMINI_API_KEY'],
327
+ requiresKey: true,
328
+ defaultModel: 'text-embedding-004',
329
+ create: (key) => (id) => createGoogleGenerativeAI({ apiKey: key }).textEmbeddingModel(id),
330
+ },
331
+ // local — only picked when explicitly requested (auto-detect never assumes a server is up)
332
+ ollama: {
333
+ envVar: 'OLLAMA_BASE_URL',
334
+ requiresKey: false,
335
+ localPlaceholderKey: 'ollama',
336
+ defaultModel: 'nomic-embed-text',
337
+ create: (key, baseURL) => (id) => createOpenAICompatible({ name: 'ollama', apiKey: key, baseURL: baseURL ?? 'http://localhost:11434/v1' }).textEmbeddingModel(id),
338
+ },
339
+ };
340
+ /** cloud, key-gated providers tried (in order) when no explicit embeddingModel is configured. */
341
+ const EMBED_AUTODETECT = ['openai', 'mistral', 'google'];
342
+ function buildEmbedder(provider, modelId) {
343
+ const cfg = EMBEDDING_PROVIDERS[provider];
344
+ if (!cfg)
345
+ return null;
346
+ let key;
347
+ if (cfg.requiresKey) {
348
+ const found = resolveKeyFromEnv(cfg.envVar, cfg.envFallbacks);
349
+ if (!found)
350
+ return null;
351
+ const policy = PROVIDERS[provider];
352
+ if (policy) {
353
+ try {
354
+ assertDirectApiKey(policy, found);
355
+ }
356
+ catch {
357
+ return null;
358
+ }
359
+ }
360
+ key = found;
361
+ }
362
+ else {
363
+ key = resolveKeyFromEnv(cfg.envVar) ?? cfg.localPlaceholderKey ?? 'local';
364
+ }
365
+ const baseURL = process.env[`${provider.toUpperCase()}_BASE_URL`] ??
366
+ (cfg.requiresKey ? undefined : process.env[cfg.envVar] ?? undefined);
367
+ const id = modelId ?? cfg.defaultModel;
368
+ try {
369
+ return { model: cfg.create(key, baseURL)(id), provider, modelId: id, tag: `${provider}:${id}` };
370
+ }
371
+ catch {
372
+ return null;
373
+ }
374
+ }
375
+ /**
376
+ * Resolve an embeddings model. `spec` is 'provider' | 'provider:modelId' | undefined.
377
+ * undefined → auto-detect the first cloud provider whose key is present. Returns null
378
+ * (never throws) when nothing resolves, so callers degrade to BM25-only.
379
+ */
380
+ export function resolveEmbedder(spec) {
381
+ if (spec) {
382
+ const idx = spec.indexOf(':');
383
+ const provider = (idx === -1 ? spec : spec.slice(0, idx)).trim();
384
+ if (!provider)
385
+ return null;
386
+ const modelId = idx === -1 ? undefined : spec.slice(idx + 1).trim() || undefined;
387
+ return buildEmbedder(provider, modelId);
388
+ }
389
+ for (const provider of EMBED_AUTODETECT) {
390
+ const e = buildEmbedder(provider);
391
+ if (e)
392
+ return e;
393
+ }
394
+ return null;
395
+ }
@@ -0,0 +1,93 @@
1
+ import { readFile } from 'node:fs/promises';
2
+ import { join, extname } from 'node:path';
3
+ import { runGit, isGitRepo } from './git.js';
4
+ // repo map = symbol map คร่าวๆ ของ repo (zero-dep, regex per ภาษา) inject ตอน session start
5
+ // ช่วย agent เลือกไฟล์ถูกโดยไม่ต้อง grep/read ทีละไฟล์ — เลียน Aider repo-map (เวอร์ชัน lightweight)
6
+ const MAX_FILES = 400;
7
+ const MAX_FILE_BYTES = 32 * 1024;
8
+ const SYMS_PER_FILE = 12;
9
+ const SOURCE_EXT = new Set([
10
+ '.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', '.py', '.go', '.rs',
11
+ '.java', '.rb', '.c', '.h', '.cpp', '.hpp', '.cs', '.php', '.swift', '.kt',
12
+ ]);
13
+ const IGNORE_DIR = /(^|\/)(node_modules|dist|build|coverage|\.next|\.cache|\.git|vendor|__pycache__)(\/|$)/;
14
+ // regex ดึง top-level / exported symbol — หลายภาษา รวมกัน dedup
15
+ const SYMBOL_PATTERNS = [
16
+ /^export\s+(?:default\s+)?(?:async\s+)?(?:function|class|const|interface|type|enum)\s+([A-Za-z0-9_$]+)/gm, // TS/JS export
17
+ /^(?:export\s+)?(?:async\s+)?function\s+([A-Za-z0-9_$]+)/gm, // JS function
18
+ /^(?:export\s+)?class\s+([A-Za-z0-9_$]+)/gm, // JS class
19
+ /^(?:def|class)\s+([A-Za-z0-9_]+)/gm, // Python
20
+ /^func\s+(?:\([^)]*\)\s+)?([A-Za-z0-9_]+)/gm, // Go
21
+ /^(?:pub\s+)?(?:fn|struct|enum|trait|impl)\s+([A-Za-z0-9_]+)/gm, // Rust
22
+ ];
23
+ function extractSymbols(content) {
24
+ const found = new Set();
25
+ for (const re of SYMBOL_PATTERNS) {
26
+ re.lastIndex = 0;
27
+ let m;
28
+ while ((m = re.exec(content)) !== null) {
29
+ if (m[1])
30
+ found.add(m[1]);
31
+ if (found.size >= SYMS_PER_FILE * 3)
32
+ break;
33
+ }
34
+ }
35
+ return [...found].slice(0, SYMS_PER_FILE);
36
+ }
37
+ function isSource(rel) {
38
+ return SOURCE_EXT.has(extname(rel).toLowerCase()) && !IGNORE_DIR.test(rel);
39
+ }
40
+ // คืน null = git ล้มชั่วคราว (อย่า cache, ลองใหม่รอบหน้า) · [] = ไม่ใช่ git repo จริงๆ (cache ได้)
41
+ async function listFiles(cwd) {
42
+ if (await isGitRepo(cwd)) {
43
+ try {
44
+ return (await runGit(['ls-files'], cwd)).split('\n').filter(Boolean);
45
+ }
46
+ catch {
47
+ return null; // ls-files ล้ม (เช่น maxBuffer / index lock) ≠ repo ว่าง
48
+ }
49
+ }
50
+ return [];
51
+ }
52
+ let cached = null;
53
+ /**
54
+ * โครงสร้าง symbol ของ repo (cap ที่ maxChars) — cache ต่อ process ต่อ cwd (โครงสร้างไม่ค่อยเปลี่ยนกลาง session)
55
+ * คืน '' ถ้าไม่ใช่ git repo / ไม่มี source file (เช่น brain vault ที่มีแต่ markdown)
56
+ */
57
+ export async function loadRepoMap(cwd = process.cwd(), maxChars = 4000) {
58
+ if (cached && cached.cwd === cwd)
59
+ return cached.map;
60
+ const raw = await listFiles(cwd);
61
+ if (raw === null)
62
+ return ''; // git ล้มชั่วคราว → คืนว่างแต่ไม่ cache (ลองใหม่รอบหน้า)
63
+ const files = raw.filter(isSource).slice(0, MAX_FILES);
64
+ if (!files.length) {
65
+ cached = { cwd, map: '' };
66
+ return '';
67
+ }
68
+ const entries = await Promise.all(files.map(async (rel) => {
69
+ try {
70
+ const content = (await readFile(join(cwd, rel), 'utf8')).slice(0, MAX_FILE_BYTES);
71
+ const syms = extractSymbols(content);
72
+ return syms.length ? `${rel}: ${syms.join(', ')}` : rel;
73
+ }
74
+ catch {
75
+ return rel;
76
+ }
77
+ }));
78
+ let body = '';
79
+ for (const e of entries) {
80
+ if (body.length + e.length + 1 > maxChars) {
81
+ body += '\n…';
82
+ break;
83
+ }
84
+ body += (body ? '\n' : '') + e;
85
+ }
86
+ const map = `<repo_map note="symbol คร่าวๆ ของ repo (อาจไม่ครบ/ไม่เป๊ะ) — ใช้ glob/grep/read_file ยืนยันก่อนแก้">\n${body}\n</repo_map>`;
87
+ cached = { cwd, map };
88
+ return map;
89
+ }
90
+ /** เคลียร์ cache (สำหรับ test / เมื่อ cwd เปลี่ยน) */
91
+ export function clearRepoMapCache() {
92
+ cached = null;
93
+ }
@@ -0,0 +1,158 @@
1
+ // ============================================================================
2
+ // src/search/chunk.ts — ONE generic, heading-aware markdown chunker.
3
+ //
4
+ // arra-oracle ships five hardcoded type parsers (resonance/learning/retro/
5
+ // distillation/security-corpus), each splitting on its own header convention.
6
+ // We replace all five with a single type-agnostic chunker: split on ATX
7
+ // headings, fold sub-MIN sections forward so we never emit a tiny chunk, and key
8
+ // each chunk by a stable hash of (path)#ordinal so re-indexing a file replaces
9
+ // exactly its chunks (no posting creep — see index-core.addDoc).
10
+ //
11
+ // Everything is pure (no fs) and DEFENSIVE: malformed frontmatter, nested YAML,
12
+ // or a stray [[ inside a code fence degrade to "no frontmatter / no links"
13
+ // rather than throwing. We must never block indexing a real, messy vault file.
14
+ // ============================================================================
15
+ const MIN_CHARS = 120; // sections shorter than this fold into the next chunk
16
+ /** deterministic short hash of a path (fnv-1a → base36) — no crypto dep, stable chunk ids. */
17
+ export function pathHash(path) {
18
+ let h = 0x811c9dc5;
19
+ for (let i = 0; i < path.length; i++) {
20
+ h ^= path.charCodeAt(i);
21
+ h = Math.imul(h, 0x01000193);
22
+ }
23
+ return (h >>> 0).toString(36);
24
+ }
25
+ /** split a leading `---\n…\n---` frontmatter block from the body. Defensive: no block ⇒ {} + full md. */
26
+ export function parseFrontmatter(md) {
27
+ const empty = { tags: [] };
28
+ if (!md.startsWith('---'))
29
+ return { data: empty, body: md };
30
+ const end = md.indexOf('\n---', 3);
31
+ if (end === -1)
32
+ return { data: empty, body: md };
33
+ const block = md.slice(3, end).trim();
34
+ const body = md.slice(md.indexOf('\n', end + 1) + 1).replace(/^\n+/, '');
35
+ const data = { tags: [] };
36
+ const lines = block.split('\n');
37
+ for (let i = 0; i < lines.length; i++) {
38
+ const line = lines[i];
39
+ const m = /^([A-Za-z0-9_-]+):\s*(.*)$/.exec(line);
40
+ if (!m)
41
+ continue;
42
+ const key = m[1].toLowerCase();
43
+ const val = m[2].trim();
44
+ if (key === 'note_type' || key === 'notetype')
45
+ data.noteType = stripQuotes(val);
46
+ else if (key === 'parent')
47
+ data.parent = unwrapLink(val);
48
+ else if (key === 'up')
49
+ data.up = unwrapLink(val);
50
+ else if (key === 'tags') {
51
+ if (val.startsWith('['))
52
+ data.tags = inlineList(val);
53
+ else if (val)
54
+ data.tags = [stripQuotes(val)];
55
+ else {
56
+ // YAML block list: subsequent "- item" lines
57
+ for (let j = i + 1; j < lines.length && /^\s*-\s+/.test(lines[j]); j++) {
58
+ data.tags.push(stripQuotes(lines[j].replace(/^\s*-\s+/, '').trim()));
59
+ }
60
+ }
61
+ }
62
+ }
63
+ return { data, body };
64
+ }
65
+ function stripQuotes(s) {
66
+ return s.replace(/^["']|["']$/g, '').trim();
67
+ }
68
+ function unwrapLink(s) {
69
+ const m = /\[\[([^\]]+)\]\]/.exec(s);
70
+ return (m ? m[1] : stripQuotes(s)).split('|')[0].trim();
71
+ }
72
+ function inlineList(s) {
73
+ return s
74
+ .replace(/^\[|\]$/g, '')
75
+ .split(',')
76
+ .map((t) => stripQuotes(t))
77
+ .filter(Boolean);
78
+ }
79
+ /** extract [[wikilink]] targets (alias after | dropped), ignoring fenced code blocks. Deduped. */
80
+ export function extractWikilinks(md) {
81
+ const noFences = md.replace(/```[\s\S]*?```/g, ' ').replace(/`[^`]*`/g, ' ');
82
+ const out = new Set();
83
+ for (const m of noFences.matchAll(/\[\[([^\]]+)\]\]/g)) {
84
+ const target = m[1].split('|')[0].split('#')[0].trim();
85
+ if (target)
86
+ out.add(target);
87
+ }
88
+ return [...out];
89
+ }
90
+ /** split body into sections at ATX headings (fenced code blocks are not headings). */
91
+ function splitSections(md) {
92
+ const sections = [];
93
+ let cur = { heading: '', body: '' };
94
+ let inFence = false;
95
+ for (const line of md.split('\n')) {
96
+ if (/^\s*(```|~~~)/.test(line))
97
+ inFence = !inFence;
98
+ const m = inFence ? null : /^(#{1,6})\s+(.*\S)\s*$/.exec(line);
99
+ if (m) {
100
+ if (cur.heading || cur.body.trim())
101
+ sections.push(cur);
102
+ cur = { heading: m[2].trim(), body: '' };
103
+ }
104
+ else {
105
+ cur.body += `${line}\n`;
106
+ }
107
+ }
108
+ if (cur.heading || cur.body.trim())
109
+ sections.push(cur);
110
+ return sections;
111
+ }
112
+ /** greedily pack sections so no chunk is below MIN_CHARS; the first section's heading labels the group. */
113
+ function packSections(sections) {
114
+ const out = [];
115
+ let groupHeading = null;
116
+ let buf = '';
117
+ const flush = () => {
118
+ if (buf.trim())
119
+ out.push({ heading: groupHeading ?? '', body: buf.trim() });
120
+ buf = '';
121
+ groupHeading = null;
122
+ };
123
+ for (const s of sections) {
124
+ if (groupHeading === null) {
125
+ groupHeading = s.heading;
126
+ buf += s.body;
127
+ }
128
+ else {
129
+ if (s.heading)
130
+ buf += `\n${s.heading}\n`;
131
+ buf += s.body;
132
+ }
133
+ if (buf.trim().length >= MIN_CHARS)
134
+ flush();
135
+ }
136
+ flush();
137
+ return out;
138
+ }
139
+ /**
140
+ * Parse a markdown file into frontmatter + wikilink edges + heading-aware chunks.
141
+ * Pure and total — any structural weirdness degrades, never throws.
142
+ */
143
+ export function chunkMarkdown(path, md) {
144
+ // normalize CRLF→LF — ไฟล์ vault บน Windows มัก CRLF; ไม่งั้น frontmatter ('\n---') + split พัง+ hash เพี้ยนข้ามแพลตฟอร์ม
145
+ md = md.replace(/\r\n/g, '\n');
146
+ const { data, body } = parseFrontmatter(md);
147
+ const links = extractWikilinks(body);
148
+ const packed = packSections(splitSections(body));
149
+ const hash = pathHash(path);
150
+ const chunks = packed.map((s, ordinal) => ({
151
+ id: `${hash}#${ordinal}`,
152
+ ordinal,
153
+ heading: s.heading,
154
+ text: s.body,
155
+ }));
156
+ // a file with a body but (after packing) no chunk — e.g. only whitespace — yields none; that's fine.
157
+ return { frontmatter: data, links, chunks };
158
+ }
@@ -0,0 +1,187 @@
1
+ // ============================================================================
2
+ // src/search/embed-store.ts — OPTIONAL L1 semantic layer (BYOK embeddings).
3
+ //
4
+ // arra-oracle's semantic search needs LanceDB/sqlite-vec/Qdrant native binaries
5
+ // (~100MB, no Windows for LanceDB) plus an Ollama model download and a Python
6
+ // reranker sidecar. We need NONE of that: embeddings go through the user's
7
+ // EXISTING ai-SDK provider key (embedMany), vectors live as a compact Float32
8
+ // blob next to index.json, and cosine runs in-process over a BM25-PREFILTERED
9
+ // candidate set (so we never scan the whole corpus per query). The whole layer is
10
+ // LAZY — absent without a key, the engine degrades to BM25 with zero ceremony.
11
+ //
12
+ // Pure math (normalize, cosineTopK, (de)serialize) is unit-tested with fake
13
+ // vectors; the only networked function is embedTexts(), kept thin.
14
+ // ============================================================================
15
+ import { chmod, mkdir, readFile, rename, rm, stat, writeFile } from 'node:fs/promises';
16
+ import { randomUUID } from 'node:crypto';
17
+ import { join } from 'node:path';
18
+ import { embedMany } from 'ai';
19
+ import { appHomePath, persistenceEnabled } from '../brand.js';
20
+ import { resolveEmbedder } from '../providers/registry.js';
21
+ export const VECTORS_PATH = join(appHomePath('search'), 'vectors.json');
22
+ export function emptyVectors(tag = '') {
23
+ return { tag, dim: 0, ids: [], data: new Float32Array(0) };
24
+ }
25
+ /** L2-normalize in place and return — lets cosine reduce to a dot product. */
26
+ export function normalizeVec(v) {
27
+ let sum = 0;
28
+ for (let i = 0; i < v.length; i++)
29
+ sum += v[i] * v[i];
30
+ const norm = Math.sqrt(sum) || 1;
31
+ for (let i = 0; i < v.length; i++)
32
+ v[i] /= norm;
33
+ return v;
34
+ }
35
+ /** build a VectorIndex from rows (vectors normalized on the way in). */
36
+ export function buildVectorIndex(tag, rows) {
37
+ if (!rows.length)
38
+ return emptyVectors(tag);
39
+ const dim = rows[0].vec.length;
40
+ if (dim <= 0)
41
+ return emptyVectors(tag);
42
+ const data = new Float32Array(rows.length * dim);
43
+ const ids = [];
44
+ for (let i = 0; i < rows.length; i++) {
45
+ if (rows[i].vec.length !== dim) {
46
+ throw new Error(`vector dimension mismatch for "${rows[i].id}": expected ${dim}, got ${rows[i].vec.length}`);
47
+ }
48
+ const v = Float32Array.from(rows[i].vec);
49
+ normalizeVec(v);
50
+ data.set(v, i * dim);
51
+ ids.push(rows[i].id);
52
+ }
53
+ return { tag, dim, ids, data };
54
+ }
55
+ /**
56
+ * Cosine top-K over a normalized vector index. `queryVec` is normalized here.
57
+ * An optional candidate allow-list (the BM25 prefilter) means cosine touches only
58
+ * a bounded set, never the whole corpus. Pure + deterministic.
59
+ */
60
+ export function cosineTopK(vi, queryVec, k = 50, candidates) {
61
+ if (!vi.dim || !vi.ids.length)
62
+ return [];
63
+ const q = normalizeVec(Float32Array.from(queryVec));
64
+ if (q.length !== vi.dim)
65
+ return [];
66
+ const out = [];
67
+ for (let i = 0; i < vi.ids.length; i++) {
68
+ const id = vi.ids[i];
69
+ if (candidates && !candidates.has(id))
70
+ continue;
71
+ let dot = 0;
72
+ const base = i * vi.dim;
73
+ for (let d = 0; d < vi.dim; d++)
74
+ dot += q[d] * vi.data[base + d];
75
+ out.push({ id, score: dot });
76
+ }
77
+ return out
78
+ .sort((a, b) => b.score - a.score || (a.id < b.id ? -1 : a.id > b.id ? 1 : 0))
79
+ .slice(0, k);
80
+ }
81
+ /** map id → row index, for incremental updates / lookups. */
82
+ export function vectorIds(vi) {
83
+ return new Set(vi.ids);
84
+ }
85
+ const VEC_FILE_VERSION = 1;
86
+ export function serializeVectors(vi) {
87
+ const buf = Buffer.from(vi.data.buffer, vi.data.byteOffset, vi.data.byteLength);
88
+ return { v: VEC_FILE_VERSION, tag: vi.tag, dim: vi.dim, ids: vi.ids, b64: buf.toString('base64') };
89
+ }
90
+ export function deserializeVectors(raw) {
91
+ const o = raw;
92
+ if (!o ||
93
+ o.v !== VEC_FILE_VERSION ||
94
+ typeof o.tag !== 'string' ||
95
+ typeof o.dim !== 'number' ||
96
+ !Number.isInteger(o.dim) ||
97
+ o.dim < 0 ||
98
+ !Array.isArray(o.ids) ||
99
+ !o.ids.every((id) => typeof id === 'string') ||
100
+ typeof o.b64 !== 'string') {
101
+ return emptyVectors();
102
+ }
103
+ // dim=0 is only valid for an empty index — normalize to emptyVectors so the invariant
104
+ // (dim===0 ⇔ ids=[] ⇔ data empty) holds at the deserializer boundary, not just downstream.
105
+ if (o.dim === 0)
106
+ return emptyVectors(o.tag);
107
+ const buf = Buffer.from(o.b64, 'base64');
108
+ if (buf.byteLength % 4 !== 0)
109
+ return emptyVectors(o.tag);
110
+ const arrayBuffer = buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
111
+ const data = new Float32Array(arrayBuffer);
112
+ // defensive: row count must match ids*dim, else treat as corrupt
113
+ if (data.length !== o.ids.length * o.dim)
114
+ return emptyVectors(o.tag);
115
+ return { tag: o.tag ?? '', dim: o.dim, ids: o.ids, data: Float32Array.from(data) };
116
+ }
117
+ // ---- fs boundary (mirrors store.ts: atomic, 0o600, persistence-gated) ----
118
+ export async function loadVectors() {
119
+ try {
120
+ return deserializeVectors(JSON.parse(await readFile(VECTORS_PATH, 'utf8')));
121
+ }
122
+ catch {
123
+ return emptyVectors();
124
+ }
125
+ }
126
+ export async function saveVectors(vi) {
127
+ if (!persistenceEnabled())
128
+ return;
129
+ const dir = appHomePath('search');
130
+ await mkdir(dir, { recursive: true });
131
+ const tmp = join(dir, `vectors.${randomUUID()}.tmp`);
132
+ try {
133
+ await writeFile(tmp, `${JSON.stringify(serializeVectors(vi))}\n`, { mode: 0o600 });
134
+ await chmod(tmp, 0o600).catch(() => { });
135
+ await rename(tmp, VECTORS_PATH);
136
+ }
137
+ catch (e) {
138
+ await rm(tmp, { force: true }).catch(() => { });
139
+ throw e;
140
+ }
141
+ }
142
+ export async function vectorsMtimeMs() {
143
+ try {
144
+ return (await stat(VECTORS_PATH)).mtimeMs;
145
+ }
146
+ catch {
147
+ return 0;
148
+ }
149
+ }
150
+ // ---- networked: embedding (the only part that talks to a provider) ----
151
+ const BATCH = 64;
152
+ /** resolve a BYOK embedder (or null). Thin re-export so search code imports from one place. */
153
+ export function getEmbedder(spec) {
154
+ return resolveEmbedder(spec);
155
+ }
156
+ /**
157
+ * Embed many texts in batches with exponential backoff on rate limits. Returns
158
+ * one number[] per input, in order. Throws only if every retry fails — callers
159
+ * (engine/indexer) catch and fall back to BM25.
160
+ */
161
+ export async function embedTexts(embedder, texts) {
162
+ const out = [];
163
+ for (let i = 0; i < texts.length; i += BATCH) {
164
+ const slice = texts.slice(i, i + BATCH);
165
+ out.push(...(await embedBatchWithRetry(embedder, slice)));
166
+ }
167
+ return out;
168
+ }
169
+ /** embed a single query string. */
170
+ export async function embedQuery(embedder, text) {
171
+ return (await embedBatchWithRetry(embedder, [text]))[0];
172
+ }
173
+ async function embedBatchWithRetry(embedder, values, attempt = 0) {
174
+ try {
175
+ const { embeddings } = await embedMany({ model: embedder.model, values });
176
+ return embeddings;
177
+ }
178
+ catch (e) {
179
+ const msg = e.message ?? '';
180
+ const retryable = /429|rate.?limit|timeout|ECONNRESET|503|overloaded/i.test(msg);
181
+ if (retryable && attempt < 4) {
182
+ await new Promise((r) => setTimeout(r, 400 * 2 ** attempt));
183
+ return embedBatchWithRetry(embedder, values, attempt + 1);
184
+ }
185
+ throw e;
186
+ }
187
+ }