skyloom 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/.github/workflows/ci.yml +36 -0
  2. package/CONVERSION_PLAN.md +191 -0
  3. package/README.md +67 -0
  4. package/dist/agents/dew.d.ts +15 -0
  5. package/dist/agents/dew.d.ts.map +1 -0
  6. package/dist/agents/dew.js +74 -0
  7. package/dist/agents/dew.js.map +1 -0
  8. package/dist/agents/fair.d.ts +15 -0
  9. package/dist/agents/fair.d.ts.map +1 -0
  10. package/dist/agents/fair.js +106 -0
  11. package/dist/agents/fair.js.map +1 -0
  12. package/dist/agents/fog.d.ts +15 -0
  13. package/dist/agents/fog.d.ts.map +1 -0
  14. package/dist/agents/fog.js +52 -0
  15. package/dist/agents/fog.js.map +1 -0
  16. package/dist/agents/frost.d.ts +15 -0
  17. package/dist/agents/frost.d.ts.map +1 -0
  18. package/dist/agents/frost.js +54 -0
  19. package/dist/agents/frost.js.map +1 -0
  20. package/dist/agents/rain.d.ts +15 -0
  21. package/dist/agents/rain.d.ts.map +1 -0
  22. package/dist/agents/rain.js +54 -0
  23. package/dist/agents/rain.js.map +1 -0
  24. package/dist/agents/snow.d.ts +27 -0
  25. package/dist/agents/snow.d.ts.map +1 -0
  26. package/dist/agents/snow.js +226 -0
  27. package/dist/agents/snow.js.map +1 -0
  28. package/dist/cli/main.d.ts +7 -0
  29. package/dist/cli/main.d.ts.map +1 -0
  30. package/dist/cli/main.js +402 -0
  31. package/dist/cli/main.js.map +1 -0
  32. package/dist/cli/mode.d.ts +17 -0
  33. package/dist/cli/mode.d.ts.map +1 -0
  34. package/dist/cli/mode.js +56 -0
  35. package/dist/cli/mode.js.map +1 -0
  36. package/dist/core/agent.d.ts +174 -0
  37. package/dist/core/agent.d.ts.map +1 -0
  38. package/dist/core/agent.js +1332 -0
  39. package/dist/core/agent.js.map +1 -0
  40. package/dist/core/agent_helpers.d.ts +51 -0
  41. package/dist/core/agent_helpers.d.ts.map +1 -0
  42. package/dist/core/agent_helpers.js +477 -0
  43. package/dist/core/agent_helpers.js.map +1 -0
  44. package/dist/core/bus.d.ts +99 -0
  45. package/dist/core/bus.d.ts.map +1 -0
  46. package/dist/core/bus.js +191 -0
  47. package/dist/core/bus.js.map +1 -0
  48. package/dist/core/cache.d.ts +63 -0
  49. package/dist/core/cache.d.ts.map +1 -0
  50. package/dist/core/cache.js +121 -0
  51. package/dist/core/cache.js.map +1 -0
  52. package/dist/core/checkpoint.d.ts +19 -0
  53. package/dist/core/checkpoint.d.ts.map +1 -0
  54. package/dist/core/checkpoint.js +120 -0
  55. package/dist/core/checkpoint.js.map +1 -0
  56. package/dist/core/circuit_breaker.d.ts +46 -0
  57. package/dist/core/circuit_breaker.d.ts.map +1 -0
  58. package/dist/core/circuit_breaker.js +99 -0
  59. package/dist/core/circuit_breaker.js.map +1 -0
  60. package/dist/core/config.d.ts +97 -0
  61. package/dist/core/config.d.ts.map +1 -0
  62. package/dist/core/config.js +281 -0
  63. package/dist/core/config.js.map +1 -0
  64. package/dist/core/constants.d.ts +78 -0
  65. package/dist/core/constants.d.ts.map +1 -0
  66. package/dist/core/constants.js +84 -0
  67. package/dist/core/constants.js.map +1 -0
  68. package/dist/core/factory.d.ts +63 -0
  69. package/dist/core/factory.d.ts.map +1 -0
  70. package/dist/core/factory.js +537 -0
  71. package/dist/core/factory.js.map +1 -0
  72. package/dist/core/icons.d.ts +28 -0
  73. package/dist/core/icons.d.ts.map +1 -0
  74. package/dist/core/icons.js +86 -0
  75. package/dist/core/icons.js.map +1 -0
  76. package/dist/core/index.d.ts +29 -0
  77. package/dist/core/index.d.ts.map +1 -0
  78. package/dist/core/index.js +54 -0
  79. package/dist/core/index.js.map +1 -0
  80. package/dist/core/llm.d.ts +121 -0
  81. package/dist/core/llm.d.ts.map +1 -0
  82. package/dist/core/llm.js +532 -0
  83. package/dist/core/llm.js.map +1 -0
  84. package/dist/core/logger.d.ts +57 -0
  85. package/dist/core/logger.d.ts.map +1 -0
  86. package/dist/core/logger.js +122 -0
  87. package/dist/core/logger.js.map +1 -0
  88. package/dist/core/mcp.d.ts +190 -0
  89. package/dist/core/mcp.d.ts.map +1 -0
  90. package/dist/core/mcp.js +822 -0
  91. package/dist/core/mcp.js.map +1 -0
  92. package/dist/core/mcp_server.d.ts +26 -0
  93. package/dist/core/mcp_server.d.ts.map +1 -0
  94. package/dist/core/mcp_server.js +211 -0
  95. package/dist/core/mcp_server.js.map +1 -0
  96. package/dist/core/memory.d.ts +190 -0
  97. package/dist/core/memory.d.ts.map +1 -0
  98. package/dist/core/memory.js +988 -0
  99. package/dist/core/memory.js.map +1 -0
  100. package/dist/core/middleware.d.ts +114 -0
  101. package/dist/core/middleware.d.ts.map +1 -0
  102. package/dist/core/middleware.js +248 -0
  103. package/dist/core/middleware.js.map +1 -0
  104. package/dist/core/pipelines.d.ts +87 -0
  105. package/dist/core/pipelines.d.ts.map +1 -0
  106. package/dist/core/pipelines.js +301 -0
  107. package/dist/core/pipelines.js.map +1 -0
  108. package/dist/core/profile.d.ts +23 -0
  109. package/dist/core/profile.d.ts.map +1 -0
  110. package/dist/core/profile.js +289 -0
  111. package/dist/core/profile.js.map +1 -0
  112. package/dist/core/router.d.ts +24 -0
  113. package/dist/core/router.d.ts.map +1 -0
  114. package/dist/core/router.js +111 -0
  115. package/dist/core/router.js.map +1 -0
  116. package/dist/core/schemas.d.ts +82 -0
  117. package/dist/core/schemas.d.ts.map +1 -0
  118. package/dist/core/schemas.js +200 -0
  119. package/dist/core/schemas.js.map +1 -0
  120. package/dist/core/semantic.d.ts +92 -0
  121. package/dist/core/semantic.d.ts.map +1 -0
  122. package/dist/core/semantic.js +175 -0
  123. package/dist/core/semantic.js.map +1 -0
  124. package/dist/core/skill.d.ts +68 -0
  125. package/dist/core/skill.d.ts.map +1 -0
  126. package/dist/core/skill.js +350 -0
  127. package/dist/core/skill.js.map +1 -0
  128. package/dist/core/tool.d.ts +99 -0
  129. package/dist/core/tool.d.ts.map +1 -0
  130. package/dist/core/tool.js +341 -0
  131. package/dist/core/tool.js.map +1 -0
  132. package/dist/core/tool_router.d.ts +29 -0
  133. package/dist/core/tool_router.d.ts.map +1 -0
  134. package/dist/core/tool_router.js +172 -0
  135. package/dist/core/tool_router.js.map +1 -0
  136. package/dist/core/workspace.d.ts +48 -0
  137. package/dist/core/workspace.d.ts.map +1 -0
  138. package/dist/core/workspace.js +179 -0
  139. package/dist/core/workspace.js.map +1 -0
  140. package/dist/plugins/loader.d.ts +17 -0
  141. package/dist/plugins/loader.d.ts.map +1 -0
  142. package/dist/plugins/loader.js +96 -0
  143. package/dist/plugins/loader.js.map +1 -0
  144. package/dist/skills/loader.d.ts +9 -0
  145. package/dist/skills/loader.d.ts.map +1 -0
  146. package/dist/skills/loader.js +78 -0
  147. package/dist/skills/loader.js.map +1 -0
  148. package/dist/tools/builtin.d.ts +10 -0
  149. package/dist/tools/builtin.d.ts.map +1 -0
  150. package/dist/tools/builtin.js +414 -0
  151. package/dist/tools/builtin.js.map +1 -0
  152. package/dist/tools/computer.d.ts +12 -0
  153. package/dist/tools/computer.d.ts.map +1 -0
  154. package/dist/tools/computer.js +326 -0
  155. package/dist/tools/computer.js.map +1 -0
  156. package/dist/tools/delegate.d.ts +10 -0
  157. package/dist/tools/delegate.d.ts.map +1 -0
  158. package/dist/tools/delegate.js +45 -0
  159. package/dist/tools/delegate.js.map +1 -0
  160. package/dist/web/server.d.ts +5 -0
  161. package/dist/web/server.d.ts.map +1 -0
  162. package/dist/web/server.js +647 -0
  163. package/dist/web/server.js.map +1 -0
  164. package/dist/web/tts.d.ts +33 -0
  165. package/dist/web/tts.d.ts.map +1 -0
  166. package/dist/web/tts.js +69 -0
  167. package/dist/web/tts.js.map +1 -0
  168. package/package.json +60 -0
  169. package/scripts/install.js +48 -0
  170. package/scripts/link.js +10 -0
  171. package/setup.bat +79 -0
  172. package/skill-test-ty2fOA/test.md +10 -0
  173. package/src/agents/dew.ts +70 -0
  174. package/src/agents/fair.ts +102 -0
  175. package/src/agents/fog.ts +48 -0
  176. package/src/agents/frost.ts +50 -0
  177. package/src/agents/rain.ts +50 -0
  178. package/src/agents/snow.ts +239 -0
  179. package/src/cli/main.ts +405 -0
  180. package/src/cli/mode.ts +58 -0
  181. package/src/core/agent.ts +1506 -0
  182. package/src/core/agent_helpers.ts +461 -0
  183. package/src/core/bus.ts +221 -0
  184. package/src/core/cache.ts +153 -0
  185. package/src/core/checkpoint.ts +94 -0
  186. package/src/core/circuit_breaker.ts +119 -0
  187. package/src/core/config.ts +341 -0
  188. package/src/core/constants.ts +95 -0
  189. package/src/core/factory.ts +627 -0
  190. package/src/core/icons.ts +53 -0
  191. package/src/core/index.ts +31 -0
  192. package/src/core/llm.ts +724 -0
  193. package/src/core/logger.ts +144 -0
  194. package/src/core/mcp.ts +953 -0
  195. package/src/core/mcp_server.ts +176 -0
  196. package/src/core/memory.ts +1169 -0
  197. package/src/core/middleware.ts +350 -0
  198. package/src/core/pipelines.ts +424 -0
  199. package/src/core/profile.ts +255 -0
  200. package/src/core/router.ts +124 -0
  201. package/src/core/schemas.ts +282 -0
  202. package/src/core/semantic.ts +211 -0
  203. package/src/core/skill.ts +342 -0
  204. package/src/core/tool.ts +427 -0
  205. package/src/core/tool_router.ts +193 -0
  206. package/src/core/workspace.ts +150 -0
  207. package/src/plugins/loader.ts +66 -0
  208. package/src/skills/loader.ts +46 -0
  209. package/src/sql.js.d.ts +29 -0
  210. package/src/tools/builtin.ts +382 -0
  211. package/src/tools/computer.ts +269 -0
  212. package/src/tools/delegate.ts +49 -0
  213. package/src/web/server.ts +634 -0
  214. package/src/web/tts.ts +93 -0
  215. package/tests/bus.test.ts +121 -0
  216. package/tests/icons.test.ts +45 -0
  217. package/tests/router.test.ts +86 -0
  218. package/tests/schemas.test.ts +51 -0
  219. package/tests/semantic.test.ts +83 -0
  220. package/tests/setup.ts +10 -0
  221. package/tests/skill.test.ts +172 -0
  222. package/tests/tool.test.ts +108 -0
  223. package/tests/tool_router.test.ts +71 -0
  224. package/tsconfig.json +37 -0
  225. package/vitest.config.ts +17 -0
@@ -0,0 +1,124 @@
1
+ /**
2
+ * Complexity router: classify a user goal into direct / single / orchestrate.
3
+ *
4
+ * Rules-first (no LLM), so classification stays under 1ms. The router exists
5
+ * solely to keep simple goals from triggering Snow's full task-decomposition
6
+ * LLM call when a single agent could answer in one shot.
7
+ */
8
+
9
+ export type Mode = 'direct' | 'single' | 'orchestrate';
10
+
11
+ const MULTI_STEP_TOKENS = [
12
+ '先', '再', '然后', '接着', '之后', '其次', '最后',
13
+ '第一步', '第二步', '第三步', '步骤', '顺序', '依次', '首先',
14
+ 'step 1', 'step 2', 'first,', 'then,', 'after that', 'finally',
15
+ ];
16
+
17
+ const GREETING_TOKENS = [
18
+ '你好', '您好', 'hi', 'hello', 'hey', '在吗', '嗨',
19
+ '早上好', '晚安', '谢谢', 'thanks', 'thank you', '再见', 'bye',
20
+ ];
21
+
22
+ const SINGLE_ACTION_HINTS = [
23
+ '解释', '什么是', '为什么', '如何', '怎么', '查询', '搜索',
24
+ '搜一下', '翻译', '总结', 'summarize', 'explain', 'what is', 'why', 'how do',
25
+ ];
26
+
27
+ const ACTION_VERBS = [
28
+ '写', '帮我写', '生成', '创建', '实现', '做', '搜', '查',
29
+ '找', '审查', '审计', '翻译', '重构', '修改', '改', '部署',
30
+ '运行', '执行',
31
+ 'write', 'create', 'generate', 'implement', 'search', 'find',
32
+ 'review', 'translate', 'deploy', 'run',
33
+ ];
34
+
35
+ const CODE_BLOCK = /```[\s\S]+?```/;
36
+ const URL_PATTERN = /https?:\/\/\S+/;
37
+ const PATH_PATTERN = /(?:[A-Za-z]:[\\/]|[\\/])[\w\-./\\]+/;
38
+ const NUMBERED_LIST = /(?:^|\n)\s*(?:\d+[.)、]|[-*])\s+/gm;
39
+ const INLINE_ENUMERATED = /\b\d+[.)、]\s*\S/g;
40
+
41
+ /**
42
+ * Decide the execution mode for a user goal.
43
+ *
44
+ * direct: short greeting / single factual question, no tools needed.
45
+ * single: clear single-purpose task, one agent + tools.
46
+ * orchestrate: multi-step plan worth decomposing into sub-tasks.
47
+ */
48
+ export function classify(goal: string): Mode {
49
+ if (!goal || !goal.trim()) return 'direct';
50
+
51
+ const text = goal.trim();
52
+ const lower = text.toLowerCase();
53
+ const length = text.length;
54
+
55
+ const hasCode = CODE_BLOCK.test(text);
56
+ const hasUrl = URL_PATTERN.test(text);
57
+ const hasPath = PATH_PATTERN.test(text);
58
+ const listMatches = (text.match(NUMBERED_LIST) || []).length;
59
+ const inlineEnumHits = (text.match(INLINE_ENUMERATED) || []).length;
60
+
61
+ const multiStepHits = MULTI_STEP_TOKENS.filter(t => lower.includes(t)).length;
62
+ const greetingHits = GREETING_TOKENS.filter(t => lower.includes(t)).length;
63
+ const singleHits = SINGLE_ACTION_HINTS.filter(t => lower.includes(t)).length;
64
+ const actionHits = ACTION_VERBS.filter(t => lower.includes(t)).length;
65
+
66
+ if (greetingHits >= 1 && length < 30 && multiStepHits === 0 && actionHits === 0) {
67
+ return 'direct';
68
+ }
69
+
70
+ if (multiStepHits >= 2 || listMatches >= 2 || inlineEnumHits >= 3) {
71
+ return 'orchestrate';
72
+ }
73
+
74
+ if (length > 200 && multiStepHits >= 1) {
75
+ return 'orchestrate';
76
+ }
77
+
78
+ if (hasCode && length > 150) {
79
+ return 'orchestrate';
80
+ }
81
+
82
+ // Tool-use signals push toward single, not direct
83
+ if (hasPath || hasUrl || actionHits >= 1) {
84
+ return 'single';
85
+ }
86
+
87
+ if (length < 50 && !hasCode) {
88
+ if (singleHits >= 1 || text.endsWith('?') || text.endsWith('?')) {
89
+ return 'direct';
90
+ }
91
+ if (multiStepHits === 0) {
92
+ return 'direct';
93
+ }
94
+ }
95
+
96
+ return 'single';
97
+ }
98
+
99
+ /**
100
+ * Pick a single agent for a non-orchestrate goal, by keyword routing.
101
+ *
102
+ * Returns an agent name guaranteed to be in available, falling back to
103
+ * rain (generation generalist) then to any available agent.
104
+ */
105
+ export function pickAgentForGoal(goal: string, available: Set<string>): string {
106
+ const lower = goal.toLowerCase();
107
+
108
+ // More specific buckets first
109
+ const buckets: Array<[string, string[]]> = [
110
+ ['frost', ['审查', 'review', '漏洞', '安全', '审计', 'lint', '重构建议', 'code smell']],
111
+ ['dew', ['部署', '运行', '执行命令', 'shell', 'deploy', 'ci', 'cd', '环境变量', '运维']],
112
+ ['fog', ['研究', '调研', '搜一下', '搜索', '查一下', '查资料', 'research', 'search', '调查', '找一下', '找资料']],
113
+ ['rain', ['写', '生成', '实现', 'create', 'generate', '写一段', '写个', '代码', '函数', '实现一个']],
114
+ ['fair', ['陪我', '聊天', '心情', '难过', '开心', '孤独', '倾诉', '你好', 'hi', 'hello', '嗨']],
115
+ ];
116
+
117
+ for (const [agent, hints] of buckets) {
118
+ if (!available.has(agent)) continue;
119
+ if (hints.some(h => lower.includes(h))) return agent;
120
+ }
121
+
122
+ if (available.has('rain')) return 'rain';
123
+ return Array.from(available)[0];
124
+ }
@@ -0,0 +1,282 @@
1
+ /**
2
+ * Lightweight structured output schemas for LLM response validation.
3
+ *
4
+ * Why: LLM JSON output is inherently fragile — models emit markdown fences,
5
+ * trailing commas, unquoted keys, or hallucinated fields. Rather than layering
6
+ * heuristic repair (which silently passes corrupted data), we define typed
7
+ * schemas and validate on ingress. Parsing failures surface immediately so the
8
+ * caller can retry with a corrected prompt instead of propagating garbage.
9
+ *
10
+ * Zero external dependencies: uses only JSON and TypeScript types.
11
+ */
12
+
13
+ /**
14
+ * Error raised when an LLM response fails schema validation.
15
+ * Carries both a human-readable message and the raw text so callers
16
+ * can log / retry with full context.
17
+ */
18
+ export class SchemaValidationError extends Error {
19
+ raw: string;
20
+
21
+ constructor(message: string, raw: string = "") {
22
+ super(message);
23
+ this.name = "SchemaValidationError";
24
+ this.raw = raw;
25
+ }
26
+ }
27
+
28
+ /**
29
+ * One step in a task plan (mirrors PipelineStep / Task)
30
+ */
31
+ export interface TaskStepSchema {
32
+ id: string | number;
33
+ description: string;
34
+ agent?: string;
35
+ depends_on?: string[];
36
+ priority?: "low" | "medium" | "high";
37
+ }
38
+
39
+ /**
40
+ * Full task plan output from Snow's orchestrator
41
+ */
42
+ export interface TaskPlanSchema {
43
+ goal: string;
44
+ steps: TaskStepSchema[];
45
+ }
46
+
47
+ /**
48
+ * A single extracted fact for long-term memory
49
+ */
50
+ export interface FactSchema {
51
+ key: string;
52
+ value: string;
53
+ category?: string;
54
+ }
55
+
56
+ /**
57
+ * Structured fact-extraction output from the LLM
58
+ */
59
+ export interface ExtractionResultSchema {
60
+ facts: FactSchema[];
61
+ }
62
+
63
+ /**
64
+ * Tool call schema for LLM responses
65
+ */
66
+ export interface ToolCallSchema {
67
+ name: string;
68
+ arguments: Record<string, unknown>;
69
+ }
70
+
71
+ /**
72
+ * Message schema for agent communication
73
+ */
74
+ export interface MessageSchema {
75
+ role: "user" | "assistant" | "tool";
76
+ content: string;
77
+ tool_calls?: ToolCallSchema[];
78
+ tool_call_id?: string;
79
+ }
80
+
81
+ /**
82
+ * Coerce a value to a target type with best-effort conversion
83
+ */
84
+ function coerceType(value: unknown, targetType: string): unknown {
85
+ if (value === null || value === undefined) {
86
+ return value;
87
+ }
88
+
89
+ switch (targetType) {
90
+ case "string":
91
+ return String(value);
92
+ case "number":
93
+ return Number(value);
94
+ case "boolean":
95
+ return Boolean(value);
96
+ case "array":
97
+ return Array.isArray(value) ? value : [];
98
+ case "object":
99
+ return typeof value === "object" ? value : {};
100
+ default:
101
+ return value;
102
+ }
103
+ }
104
+
105
+ /**
106
+ * Extract JSON object/array from a potentially malformed string
107
+ */
108
+ function extractJSON(text: string): string {
109
+ let cleaned = text.trim();
110
+
111
+ // Strip markdown code fences
112
+ if (cleaned.includes("```")) {
113
+ for (const fence of ["```json", "```"]) {
114
+ if (cleaned.includes(fence)) {
115
+ const after = cleaned.split(fence, 1)[1];
116
+ if (after && after.includes("```")) {
117
+ cleaned = after.split("```")[0].trim();
118
+ break;
119
+ }
120
+ }
121
+ }
122
+ }
123
+
124
+ // Find first JSON object or array
125
+ let objStart = -1;
126
+ let depth = 0;
127
+ for (let i = 0; i < cleaned.length; i++) {
128
+ const ch = cleaned[i];
129
+ if (ch === "{") {
130
+ if (objStart < 0) objStart = i;
131
+ depth++;
132
+ } else if (ch === "}") {
133
+ depth--;
134
+ if (depth === 0 && objStart >= 0) {
135
+ return cleaned.substring(objStart, i + 1);
136
+ }
137
+ }
138
+ }
139
+
140
+ // If still not closed, close with extra braces
141
+ if (objStart >= 0) {
142
+ return cleaned.substring(objStart) + "}".repeat(depth);
143
+ }
144
+
145
+ throw new SchemaValidationError("No valid JSON found in response", text);
146
+ }
147
+
148
+ /**
149
+ * Repair common JSON issues (trailing commas, unquoted keys, etc.)
150
+ */
151
+ function repairJSON(text: string): string {
152
+ let repaired = text;
153
+
154
+ // Remove trailing commas
155
+ repaired = repaired.replace(/,\s*([}\]])/g, "$1");
156
+
157
+ // Quote unquoted keys
158
+ repaired = repaired.replace(/(?<!["\'\w])(\w[\w\d_]*)(\s*:)/g, '"$1"$2');
159
+
160
+ // Normalize quotes
161
+ repaired = repaired.replace(/'/g, '"').replace(/`/g, '"');
162
+
163
+ return repaired;
164
+ }
165
+
166
+ /**
167
+ * Parse a raw LLM response string into a typed JSON object.
168
+ * Handles markdown fences, leading/trailing text, and minor JSON quirks.
169
+ * Raises SchemaValidationError on failure.
170
+ */
171
+ export function parseSchema<T extends Record<string, unknown>>(
172
+ raw: string,
173
+ schemaType?: new () => T
174
+ ): T {
175
+ if (!raw || !raw.trim()) {
176
+ throw new SchemaValidationError("empty response", raw);
177
+ }
178
+
179
+ try {
180
+ const extracted = extractJSON(raw);
181
+ let data = JSON.parse(extracted);
182
+ return data as T;
183
+ } catch (error) {
184
+ if (error instanceof SchemaValidationError) throw error;
185
+
186
+ try {
187
+ const repaired = repairJSON(raw);
188
+ const data = JSON.parse(repaired);
189
+ return data as T;
190
+ } catch (repairError) {
191
+ throw new SchemaValidationError(
192
+ `JSON parse failed: ${(error as Error).message}`,
193
+ raw
194
+ );
195
+ }
196
+ }
197
+ }
198
+
199
+ /**
200
+ * Validate task plan schema
201
+ */
202
+ export function validateTaskPlan(data: unknown): TaskPlanSchema {
203
+ if (!data || typeof data !== "object") {
204
+ throw new SchemaValidationError("Invalid task plan: must be an object");
205
+ }
206
+
207
+ const plan = data as Record<string, unknown>;
208
+
209
+ if (typeof plan.goal !== "string") {
210
+ throw new SchemaValidationError("Invalid task plan: goal must be a string");
211
+ }
212
+
213
+ if (!Array.isArray(plan.steps)) {
214
+ throw new SchemaValidationError("Invalid task plan: steps must be an array");
215
+ }
216
+
217
+ const steps = (plan.steps as unknown[]).map((step: unknown) => {
218
+ if (!step || typeof step !== "object") {
219
+ throw new SchemaValidationError("Invalid task plan: step must be an object");
220
+ }
221
+
222
+ const s = step as Record<string, unknown>;
223
+ if (typeof s.id !== "string" && typeof s.id !== "number") {
224
+ throw new SchemaValidationError("Invalid task plan: step.id must be string or number");
225
+ }
226
+
227
+ if (typeof s.description !== "string") {
228
+ throw new SchemaValidationError("Invalid task plan: step.description must be a string");
229
+ }
230
+
231
+ return {
232
+ id: s.id,
233
+ description: s.description,
234
+ agent: typeof s.agent === "string" ? s.agent : "rain",
235
+ depends_on: Array.isArray(s.depends_on) ? (s.depends_on as string[]) : [],
236
+ priority: (["low", "medium", "high"].includes(s.priority as string)
237
+ ? s.priority
238
+ : "medium") as "low" | "medium" | "high",
239
+ };
240
+ });
241
+
242
+ return {
243
+ goal: plan.goal,
244
+ steps,
245
+ };
246
+ }
247
+
248
+ /**
249
+ * Validate extraction result schema
250
+ */
251
+ export function validateExtractionResult(data: unknown): ExtractionResultSchema {
252
+ if (!data || typeof data !== "object") {
253
+ throw new SchemaValidationError("Invalid extraction result: must be an object");
254
+ }
255
+
256
+ const result = data as Record<string, unknown>;
257
+
258
+ if (!Array.isArray(result.facts)) {
259
+ throw new SchemaValidationError("Invalid extraction result: facts must be an array");
260
+ }
261
+
262
+ const facts = (result.facts as unknown[]).map((fact: unknown) => {
263
+ if (!fact || typeof fact !== "object") {
264
+ throw new SchemaValidationError("Invalid extraction result: fact must be an object");
265
+ }
266
+
267
+ const f = fact as Record<string, unknown>;
268
+ if (typeof f.key !== "string" || typeof f.value !== "string") {
269
+ throw new SchemaValidationError(
270
+ "Invalid extraction result: fact must have key and value strings"
271
+ );
272
+ }
273
+
274
+ return {
275
+ key: f.key,
276
+ value: f.value,
277
+ category: typeof f.category === "string" ? f.category : "auto_extracted",
278
+ };
279
+ });
280
+
281
+ return { facts };
282
+ }
@@ -0,0 +1,211 @@
1
+ /**
2
+ * Lightweight semantic retrieval — zero external dependencies.
3
+ *
4
+ * Why: The existing `recall_for_injection` uses SQL LIKE on tokens, which
5
+ * misses semantically related facts that share few literal characters (e.g.
6
+ * query "deploy" vs. stored fact "release_command"). This module provides
7
+ * a character n-gram Jaccard similarity scorer that catches those cross-
8
+ * lingual and synonym relationships without adding PyTorch / sentence-transformers.
9
+ *
10
+ * Design:
11
+ * - Character n-grams (size 2-4) naturally handle CJK, mixed-language, and
12
+ * code identifiers better than word-level tokenization.
13
+ * - Jaccard similarity on n-gram sets is fast (< 10 µs per pair) and
14
+ * well-correlated with human relevance judgments for short text.
15
+ * - Zero dependencies beyond stdlib.
16
+ */
17
+
18
+ /**
19
+ * Represents a candidate item for ranking
20
+ */
21
+ export interface Candidate {
22
+ key?: string;
23
+ value?: string | Record<string, unknown>;
24
+ [key: string]: unknown;
25
+ }
26
+
27
+ /**
28
+ * Character n-gram semantic similarity scorer.
29
+ *
30
+ * Usage:
31
+ * ```
32
+ * const scorer = new SemanticScorer();
33
+ * const score = scorer.similarity("deploy to prod", "release_command");
34
+ * // score ≈ 0.15 (low but non-zero — catches partial overlap)
35
+ *
36
+ * const ranked = scorer.rank(
37
+ * "search query",
38
+ * [{value: "candidate A"}, {value: "candidate B"}]
39
+ * );
40
+ * // ranked → [[0.85, {value: "candidate A"}], [0.30, {value: "candidate B"}]]
41
+ * ```
42
+ */
43
+ export class SemanticScorer {
44
+ private nRange: [number, number];
45
+ private cache: Map<string, Set<string>>;
46
+ private maxCacheSize = 512;
47
+
48
+ /**
49
+ * Initialize scorer with n-gram range.
50
+ * @param nRange - Tuple of [minSize, maxSize] for n-grams (default [2, 4])
51
+ */
52
+ constructor(nRange: [number, number] = [2, 4]) {
53
+ this.nRange = nRange;
54
+ this.cache = new Map();
55
+ }
56
+
57
+ /**
58
+ * Generate character n-gram fingerprint (cached).
59
+ * @param text - Input text to fingerprint
60
+ * @returns Set of n-grams for the text
61
+ */
62
+ private fingerprint(text: string): Set<string> {
63
+ // Return cached result if available
64
+ if (this.cache.has(text)) {
65
+ return this.cache.get(text)!;
66
+ }
67
+
68
+ const lowered = text.toLowerCase();
69
+ const ngrams = new Set<string>();
70
+
71
+ // Generate n-grams for each size in range
72
+ for (let n = this.nRange[0]; n <= this.nRange[1]; n++) {
73
+ if (lowered.length < n) {
74
+ continue;
75
+ }
76
+ for (let i = 0; i <= lowered.length - n; i++) {
77
+ ngrams.add(lowered.slice(i, i + n));
78
+ }
79
+ }
80
+
81
+ // Cache result if we haven't hit size limit
82
+ if (this.cache.size < this.maxCacheSize) {
83
+ this.cache.set(text, ngrams);
84
+ }
85
+
86
+ return ngrams;
87
+ }
88
+
89
+ /**
90
+ * Calculate Jaccard similarity between two texts using character n-grams.
91
+ * @param a - First text
92
+ * @param b - Second text
93
+ * @returns Similarity score between 0 and 1
94
+ */
95
+ similarity(a: string, b: string): number {
96
+ if (!a || !b) {
97
+ return 0.0;
98
+ }
99
+
100
+ const fpA = this.fingerprint(a);
101
+ const fpB = this.fingerprint(b);
102
+
103
+ if (fpA.size === 0 || fpB.size === 0) {
104
+ return 0.0;
105
+ }
106
+
107
+ // Calculate intersection
108
+ let intersection = 0;
109
+ for (const ngram of fpA) {
110
+ if (fpB.has(ngram)) {
111
+ intersection++;
112
+ }
113
+ }
114
+
115
+ // Calculate union
116
+ const union = fpA.size + fpB.size - intersection;
117
+
118
+ return union > 0 ? intersection / union : 0.0;
119
+ }
120
+
121
+ /**
122
+ * Rank candidates by semantic similarity to the query.
123
+ *
124
+ * Each candidate is a dict. The scorer reads `candidate[keyField]`
125
+ * for text to compare. Also scores the "key" field if present.
126
+ * Returns `[score, candidate]` tuples sorted descending, filtered by `minScore`.
127
+ *
128
+ * @param query - Query string to match against
129
+ * @param candidates - Array of candidate objects
130
+ * @param keyField - Field name to extract text from (default "value")
131
+ * @param topK - Maximum number of results to return (default 3)
132
+ * @param minScore - Minimum similarity threshold (default 0.02)
133
+ * @returns Array of [score, candidate] tuples, sorted by score descending
134
+ */
135
+ rank(
136
+ query: string,
137
+ candidates: Candidate[],
138
+ keyField: string = "value",
139
+ topK: number = 3,
140
+ minScore: number = 0.02
141
+ ): Array<[number, Candidate]> {
142
+ const scored: Array<[number, Candidate]> = [];
143
+
144
+ for (const candidate of candidates) {
145
+ let text = candidate[keyField];
146
+
147
+ // Skip if field is missing or empty
148
+ if (!text) {
149
+ continue;
150
+ }
151
+
152
+ // Convert objects to string representation
153
+ if (typeof text === "object") {
154
+ text = JSON.stringify(text);
155
+ } else {
156
+ text = String(text);
157
+ }
158
+
159
+ // Calculate similarity with the candidate's value field
160
+ let score = this.similarity(query, text as string);
161
+
162
+ // Also score the "key" field if present — often more discriminative
163
+ const keyText = candidate.key;
164
+ if (keyText) {
165
+ const keyScore = this.similarity(query, String(keyText));
166
+ score = Math.max(score, keyScore);
167
+ }
168
+
169
+ // Add to results if above threshold
170
+ if (score >= minScore) {
171
+ scored.push([score, candidate]);
172
+ }
173
+ }
174
+
175
+ // Sort descending by score
176
+ scored.sort((a, b) => b[0] - a[0]);
177
+
178
+ // Return top K results
179
+ return scored.slice(0, topK);
180
+ }
181
+
182
+ /**
183
+ * Clear the fingerprint cache.
184
+ */
185
+ clearCache(): void {
186
+ this.cache.clear();
187
+ }
188
+
189
+ /**
190
+ * Get current cache size.
191
+ */
192
+ getCacheSize(): number {
193
+ return this.cache.size;
194
+ }
195
+ }
196
+
197
+ /**
198
+ * Module-level singleton instance (lazy initialization).
199
+ */
200
+ let scorer: SemanticScorer | null = null;
201
+
202
+ /**
203
+ * Get or create the module-level semantic scorer singleton.
204
+ * @returns SemanticScorer instance
205
+ */
206
+ export function getScorer(): SemanticScorer {
207
+ if (scorer === null) {
208
+ scorer = new SemanticScorer();
209
+ }
210
+ return scorer;
211
+ }