mojulo 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +53 -4
  2. package/lib/audit-logger-new.js +11 -0
  3. package/lib/auth/gate.js +25 -0
  4. package/lib/auth/service.js +17 -0
  5. package/lib/auth/session.js +63 -0
  6. package/lib/builder/chat-processor.js +607 -0
  7. package/lib/builder/composer-bridge.js +82 -0
  8. package/lib/builder/evaluator.js +159 -0
  9. package/lib/builder/executor.js +252 -0
  10. package/lib/builder/index.js +48 -0
  11. package/lib/builder/session.js +248 -0
  12. package/lib/builder/system-prompt.js +422 -0
  13. package/lib/builder/tone-presets.js +75 -0
  14. package/lib/builder/tool-executors.js +1418 -0
  15. package/lib/builder/tools.js +338 -0
  16. package/lib/builder/validators.js +239 -0
  17. package/lib/composer/composer.js +225 -0
  18. package/lib/composer/index.js +40 -0
  19. package/lib/composer/protocols/00_base.txt +19 -0
  20. package/lib/composer/protocols/01_knowledge.txt +9 -0
  21. package/lib/composer/protocols/02_form-gathering.txt +32 -0
  22. package/lib/composer/protocols/03_appointments.txt +16 -0
  23. package/lib/composer/protocols/04_triage.txt +15 -0
  24. package/lib/composer/protocols/05_optical-read.txt +22 -0
  25. package/lib/composer/response-builder.js +98 -0
  26. package/lib/config-builder.js +650 -0
  27. package/lib/db/ids.js +10 -0
  28. package/lib/db/index.js +179 -0
  29. package/lib/db/repositories/apiKeys.js +72 -0
  30. package/lib/db/repositories/auditLogs.js +12 -0
  31. package/lib/db/repositories/botSpaces.js +12 -0
  32. package/lib/db/repositories/builderSessions.js +312 -0
  33. package/lib/db/repositories/deploymentEvents.js +12 -0
  34. package/lib/db/repositories/deployments.js +385 -0
  35. package/lib/db/repositories/documents.js +68 -0
  36. package/lib/db/repositories/mcpJobs.js +84 -0
  37. package/lib/deployers/bot-fleet.js +110 -0
  38. package/lib/deployers/bot-proxy.js +72 -0
  39. package/lib/deployers/build.js +89 -0
  40. package/lib/deployers/cloud-deploy.js +310 -0
  41. package/lib/deployers/docker.js +439 -0
  42. package/lib/deployers/fly.js +432 -0
  43. package/lib/deployers/index.js +38 -0
  44. package/lib/deployment-auth.js +36 -0
  45. package/lib/document-parser.js +171 -0
  46. package/lib/embedder/chunker.js +93 -0
  47. package/lib/embedder/local.js +101 -0
  48. package/lib/embedder/preview-rag.js +93 -0
  49. package/lib/envelope-schema.js +54 -0
  50. package/lib/fleet/scoped-sql.js +342 -0
  51. package/lib/form-schema-config/base.js +135 -0
  52. package/lib/form-schema-config/index.js +286 -0
  53. package/lib/form-schema-config/locales/af-ZA.js +153 -0
  54. package/lib/form-schema-config/locales/ar-AE.js +142 -0
  55. package/lib/form-schema-config/locales/ar-SA.js +164 -0
  56. package/lib/form-schema-config/locales/de-DE.js +152 -0
  57. package/lib/form-schema-config/locales/en-AU.js +161 -0
  58. package/lib/form-schema-config/locales/en-CA.js +115 -0
  59. package/lib/form-schema-config/locales/en-GB.js +132 -0
  60. package/lib/form-schema-config/locales/en-IN.js +219 -0
  61. package/lib/form-schema-config/locales/en-MY.js +171 -0
  62. package/lib/form-schema-config/locales/en-NG.js +198 -0
  63. package/lib/form-schema-config/locales/en-PH.js +186 -0
  64. package/lib/form-schema-config/locales/en-SG.js +153 -0
  65. package/lib/form-schema-config/locales/en-US.js +138 -0
  66. package/lib/form-schema-config/locales/es-ES.js +171 -0
  67. package/lib/form-schema-config/locales/es-MX.js +193 -0
  68. package/lib/form-schema-config/locales/fr-CA.js +138 -0
  69. package/lib/form-schema-config/locales/fr-FR.js +155 -0
  70. package/lib/form-schema-config/locales/hi-IN.js +219 -0
  71. package/lib/form-schema-config/locales/it-IT.js +157 -0
  72. package/lib/form-schema-config/locales/ja-JP.js +169 -0
  73. package/lib/form-schema-config/locales/ko-KR.js +140 -0
  74. package/lib/form-schema-config/locales/nl-NL.js +149 -0
  75. package/lib/form-schema-config/locales/pt-BR.js +168 -0
  76. package/lib/form-schema-config/locales/zh-CN.js +172 -0
  77. package/lib/form-schema-config/locales/zh-HK.js +142 -0
  78. package/lib/form-structure-schema.js +191 -0
  79. package/lib/llm-providers.js +828 -0
  80. package/lib/markdown.js +197 -0
  81. package/lib/mcp/catalysts/appointment-to-calendar.md +84 -0
  82. package/lib/mcp/catalysts/conversations-to-channel-digest.md +104 -0
  83. package/lib/mcp/catalysts/document-extract-to-store.md +92 -0
  84. package/lib/mcp/catalysts/knowledge-gap-miner.md +96 -0
  85. package/lib/mcp/catalysts/loader.js +144 -0
  86. package/lib/mcp/catalysts/qualify-lead-to-crm.md +83 -0
  87. package/lib/mcp/catalysts/scan-conversations-for-signal.md +92 -0
  88. package/lib/mcp/catalysts/submission-to-ticket.md +83 -0
  89. package/lib/mcp/catalysts/submissions-to-warehouse.md +103 -0
  90. package/lib/mcp/catalysts/weekly-submissions-digest.md +82 -0
  91. package/lib/mcp/jobs.js +64 -0
  92. package/lib/mcp/server.js +184 -0
  93. package/lib/mcp/session-binding.js +130 -0
  94. package/lib/mcp/tools/build.js +123 -0
  95. package/lib/mcp/tools/catalysts.js +477 -0
  96. package/lib/mcp/tools/context.js +325 -0
  97. package/lib/mcp/tools/fleet.js +391 -0
  98. package/lib/mcp/tools/jobs-tools.js +240 -0
  99. package/lib/mcp/tools/operate.js +314 -0
  100. package/lib/preview/build-preview-config.js +136 -0
  101. package/lib/rate-limiter.js +11 -0
  102. package/lib/resolve-api-key.js +142 -0
  103. package/lib/storage/index.js +40 -0
  104. package/messages/de.json +2136 -0
  105. package/messages/en.json +2136 -0
  106. package/messages/es.json +2136 -0
  107. package/messages/fr.json +2136 -0
  108. package/messages/it.json +2136 -0
  109. package/messages/ja.json +2136 -0
  110. package/messages/ko.json +2136 -0
  111. package/messages/nl.json +2136 -0
  112. package/messages/pl.json +2136 -0
  113. package/messages/pt.json +2136 -0
  114. package/messages/ru.json +2136 -0
  115. package/messages/uk.json +2136 -0
  116. package/messages/zh.json +2136 -0
  117. package/package.json +61 -5
  118. package/scripts/mcp-config.mjs +162 -0
  119. package/scripts/mcp-stdio-loader.mjs +42 -0
  120. package/scripts/mcp-stdio.mjs +108 -0
  121. package/scripts/mojulo-paths.mjs +48 -0
@@ -0,0 +1,828 @@
1
+ /**
2
+ * LLM Provider Configurations
3
+ * Shared between ConfigForm and config-builder
4
+ */
5
+
6
+ /**
7
+ * Providers whose runtime adapter accepts image input on the current user
8
+ * turn. Optical Read and any future vision-using protocol gate themselves
9
+ * against this set — keep it in sync with the adapters in
10
+ * lite-template/helper/llm-client.js.
11
+ */
12
+ export const VISION_PROVIDERS = new Set(['anthropic', 'openai']);
13
+
14
+ export function providerSupportsVision(provider) {
15
+ return VISION_PROVIDERS.has(provider);
16
+ }
17
+
18
+ /**
19
+ * Per-model protocol allowlist.
20
+ *
21
+ * Ollama — qwen3 and mistral-nemo are small enough that multi-step
22
+ * instruction-following (form-gathering, appointments, triage, optical-read)
23
+ * is unreliable in practice; they can answer over a knowledge base but lose
24
+ * the thread on stateful flows. llama3.3 (70B) handles everything.
25
+ *
26
+ * OpenAI — gpt-4.1 stays on form-free protocols. The form-gathering flow
27
+ * needs the model to track field state across turns and follow stricter
28
+ * shape guidance now that wire-level enforcement is gone; gpt-5 and
29
+ * gpt-5-mini handle it reliably, gpt-4.1 doesn't. Anchor model otherwise.
30
+ *
31
+ * Returns `null` when all protocols are allowed (the common case). Returns a
32
+ * `Set` of allowed protocol IDs when the model is restricted.
33
+ *
34
+ * Protocol IDs match the wizard's `enabledProtocols` keys: knowledge,
35
+ * formGathering, appointments, triage, opticalRead.
36
+ */
37
+ const RESTRICTED_OLLAMA_MODELS = new Set(['qwen3', 'mistral-nemo']);
38
+ const RESTRICTED_OPENAI_MODELS = new Set(['gpt-4.1']);
39
+
40
+ export function getAllowedProtocolsForModel(provider, model) {
41
+ if (provider === 'ollama' && RESTRICTED_OLLAMA_MODELS.has(model)) {
42
+ return new Set(['knowledge']);
43
+ }
44
+ if (provider === 'openai' && RESTRICTED_OPENAI_MODELS.has(model)) {
45
+ return new Set(['knowledge', 'appointments', 'triage', 'opticalRead']);
46
+ }
47
+ return null;
48
+ }
49
+
50
+ export function isProtocolAllowedForModel(provider, model, protocolId) {
51
+ const allowed = getAllowedProtocolsForModel(provider, model);
52
+ if (!allowed) return true;
53
+ return allowed.has(protocolId);
54
+ }
55
+
56
+ export const LLM_PROVIDERS = {
57
+ openai: {
58
+ name: 'OpenAI',
59
+ // User-facing model picker. `gpt-4.1-mini` and `gpt-4.1-nano` are
60
+ // intentionally absent here — `MODEL_TIERS` still resolves to them for
61
+ // control-plane tasks (form-gen, RAG summary), but they're not surfaced
62
+ // as bot-runtime options. `gpt-4.1` is the anchor.
63
+ models: ['gpt-4.1', 'gpt-5', 'gpt-5-mini'],
64
+ defaultModel: 'gpt-4.1',
65
+ baseURL: 'https://api.openai.com/v1',
66
+ endpoint: '/responses'
67
+ },
68
+ anthropic: {
69
+ name: 'Anthropic',
70
+ models: ['claude-opus-4-6', 'claude-sonnet-4-6', 'claude-haiku-4-5'],
71
+ defaultModel: 'claude-sonnet-4-6',
72
+ baseURL: 'https://api.anthropic.com/v1',
73
+ endpoint: '/messages'
74
+ },
75
+ bedrock: {
76
+ name: 'AWS Bedrock (Claude)',
77
+ // Not surfaced in the UI yet. The provider is wired end-to-end (settings,
78
+ // wizard branch, generateSummary/generateStructured, deployer) but stays
79
+ // hidden until we're ready to support it publicly. Consumers that render
80
+ // a provider picker should filter on this flag; code paths keyed on
81
+ // `provider === 'bedrock'` continue to work for anyone driving the API
82
+ // directly.
83
+ hidden: true,
84
+ // Base model IDs without geographic prefix - prefix is added dynamically based on region
85
+ models: [
86
+ { id: 'anthropic.claude-sonnet-4-6', name: 'Claude Sonnet 4.6' },
87
+ { id: 'anthropic.claude-opus-4-6', name: 'Claude Opus 4.6' },
88
+ { id: 'anthropic.claude-sonnet-4-5', name: 'Claude Sonnet 4.5' },
89
+ { id: 'anthropic.claude-opus-4-5', name: 'Claude Opus 4.5' },
90
+ { id: 'anthropic.claude-haiku-4-5', name: 'Claude Haiku 4.5' },
91
+ ],
92
+ defaultModel: 'anthropic.claude-sonnet-4-6',
93
+ // Regions grouped by geographic prefix for cross-region inference
94
+ regions: [
95
+ { id: 'us-east-1', name: 'US East (N. Virginia)', geoPrefix: 'us' },
96
+ { id: 'us-west-2', name: 'US West (Oregon)', geoPrefix: 'us' },
97
+ { id: 'eu-west-1', name: 'Europe (Ireland)', geoPrefix: 'eu' },
98
+ { id: 'eu-central-1', name: 'Europe (Frankfurt)', geoPrefix: 'eu' },
99
+ { id: 'ap-northeast-1', name: 'Asia Pacific (Tokyo)', geoPrefix: 'apac' },
100
+ { id: 'ap-southeast-1', name: 'Asia Pacific (Singapore)', geoPrefix: 'apac' },
101
+ ],
102
+ authModes: ['credentials', 'iam-role'],
103
+ },
104
+ ollama: {
105
+ name: 'Ollama (local)',
106
+ // Opinionated short list: all three are tool-capable in Ollama and have
107
+ // strong instruction-following for the envelope JSON shape. Plain mistral
108
+ // (7B, non-tool-capable) is intentionally omitted — users who want it can
109
+ // still pick a tool-capable model and pull it with `ollama pull <model>`.
110
+ // llama3.3 (70B) is the heaviest local option here — only viable on
111
+ // machines with the VRAM/unified memory to run a 70B model at usable
112
+ // speed; smaller hosts should stick to qwen3 or mistral-nemo.
113
+ models: ['qwen3', 'mistral-nemo', 'llama3.3'],
114
+ defaultModel: 'llama3.3',
115
+ // Canonical Ollama endpoint. We don't bundle Ollama — users run their
116
+ // own. This default works from the control plane (native Node) without
117
+ // assuming any topology. The bot artifact (Docker) needs a different
118
+ // host to reach the user's Ollama: `host.docker.internal:11434` on
119
+ // Mac/Windows, the host's LAN IP on Linux. The wizard helper text calls
120
+ // this out so savvy users override deliberately rather than getting a
121
+ // Docker-shaped default they didn't ask for.
122
+ defaultHost: 'http://localhost:11434',
123
+ }
124
+ };
125
+
126
+ /**
127
+ * Per-task model tiers. The "default" API key (isDefault flag on api_keys)
128
+ * picks a provider; this map picks the right model within that provider for
129
+ * the workload at hand. User-facing semantics of "default" are unchanged.
130
+ *
131
+ * reasoning — agentic loops with tool use (chat builder)
132
+ * structured — single-shot calls bounded by a JSON schema (form gen)
133
+ * summary — single-shot free-text generation (RAG / bot summary)
134
+ *
135
+ * Bedrock uses base model IDs without the geographic prefix — buildBedrockModelId
136
+ * adds the prefix at the wire.
137
+ */
138
+ export const MODEL_TIERS = {
139
+ openai: {
140
+ reasoning: 'gpt-4.1',
141
+ structured: 'gpt-4.1-mini',
142
+ summary: 'gpt-4.1-mini',
143
+ },
144
+ anthropic: {
145
+ reasoning: 'claude-sonnet-4-6',
146
+ structured: 'claude-haiku-4-5',
147
+ summary: 'claude-haiku-4-5',
148
+ },
149
+ bedrock: {
150
+ reasoning: 'anthropic.claude-sonnet-4-6',
151
+ structured: 'anthropic.claude-haiku-4-5',
152
+ summary: 'anthropic.claude-haiku-4-5',
153
+ },
154
+ ollama: {
155
+ // Single model across tiers — Ollama is local/free, so the cost-driven
156
+ // reasoning/structured/summary split that the cloud providers use doesn't
157
+ // apply. llama3.3 is natively tool-tuned (no <think> scratchpad to strip),
158
+ // handles grammar-constrained JSON via Ollama's `format` param, and writes
159
+ // clean prose — one warm model covers all three workloads. Note: it's a
160
+ // 70B model, so it only runs at usable speed on machines with the VRAM /
161
+ // unified memory to host it. Smaller hosts should override to qwen3 or
162
+ // mistral-nemo via the wizard.
163
+ reasoning: 'llama3.3',
164
+ structured: 'llama3.3',
165
+ summary: 'llama3.3',
166
+ },
167
+ };
168
+
169
+ /**
170
+ * Pick the default model for a (provider, task) pair. Falls back to the
171
+ * provider's flat `defaultModel` if the tier is missing — never throws.
172
+ *
173
+ * @param {string} provider — openai | anthropic | bedrock
174
+ * @param {string} task — reasoning | structured | summary
175
+ * @returns {string | undefined}
176
+ */
177
+ export function getDefaultModelForTask(provider, task) {
178
+ return MODEL_TIERS[provider]?.[task] || LLM_PROVIDERS[provider]?.defaultModel;
179
+ }
180
+
181
+ /**
182
+ * Get default Bedrock region from environment or fallback
183
+ * @returns {string} Default AWS region for Bedrock
184
+ */
185
+ export function getDefaultBedrockRegion() {
186
+ return process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || 'us-east-1';
187
+ }
188
+
189
+ /**
190
+ * Get geographic prefix for cross-region inference based on AWS region
191
+ * @param {string} region - AWS region ID (e.g., 'us-east-1')
192
+ * @returns {string} Geographic prefix (e.g., 'us', 'eu', 'apac')
193
+ */
194
+ export function getBedrockGeoPrefix(region) {
195
+ // Handle undefined/null region - default to 'us'
196
+ if (!region) {
197
+ return 'us';
198
+ }
199
+
200
+ const regionConfig = LLM_PROVIDERS.bedrock.regions.find(r => r.id === region);
201
+ if (regionConfig?.geoPrefix) {
202
+ return regionConfig.geoPrefix;
203
+ }
204
+
205
+ // Fallback mapping for regions not in the list
206
+ if (region.startsWith('us-') || region.startsWith('ca-')) return 'us';
207
+ if (region.startsWith('eu-') || region.startsWith('il-')) return 'eu';
208
+ if (region.startsWith('ap-') || region.startsWith('me-')) return 'apac';
209
+
210
+ // Default to 'us' if unknown
211
+ return 'us';
212
+ }
213
+
214
+ /**
215
+ * Build the full Bedrock model ID with geographic prefix for cross-region inference
216
+ * @param {string} baseModelId - Base model ID without prefix (e.g., 'anthropic.claude-sonnet-4-6')
217
+ * @param {string} region - AWS region ID (e.g., 'us-east-1')
218
+ * @returns {string} Full model ID with prefix (e.g., 'us.anthropic.claude-sonnet-4-6')
219
+ */
220
+ export function buildBedrockModelId(baseModelId, region) {
221
+ // If already prefixed (starts with us., eu., apac.), return as-is
222
+ if (/^(us|eu|apac)\./.test(baseModelId)) {
223
+ return baseModelId;
224
+ }
225
+ const geoPrefix = getBedrockGeoPrefix(region);
226
+ return `${geoPrefix}.${baseModelId}`;
227
+ }
228
+
229
+ /**
230
+ * Strip the geographic prefix from a Bedrock model ID
231
+ * @param {string} modelId - Full model ID (e.g., 'us.anthropic.claude-sonnet-4-6')
232
+ * @returns {string} Base model ID without prefix (e.g., 'anthropic.claude-sonnet-4-6')
233
+ */
234
+ export function stripBedrockModelPrefix(modelId) {
235
+ if (!modelId) return modelId;
236
+ // Remove geographic prefix (us., eu., apac.) if present
237
+ return modelId.replace(/^(us|eu|apac)\./, '');
238
+ }
239
+
240
+ /**
241
+ * Resolve an Ollama host URL from the `apiKey` parameter passed through the
242
+ * shared generate*() entry points. The slot can carry one of three shapes:
243
+ *
244
+ * - JSON `{"host":"http://..."}` — settings-resolved saved key
245
+ * - bare URL string `http://...` — direct caller without saved key
246
+ * - empty / null — fall back to LLM_PROVIDERS.ollama.defaultHost
247
+ *
248
+ * Same pattern as Bedrock's `JSON.parse(apiKey)` discriminator; keeps the
249
+ * outer function signatures stable across providers.
250
+ *
251
+ * The host stored in deployment configs is intended for the bot artifact,
252
+ * which runs in Docker and reaches the host via `host.docker.internal`. The
253
+ * control plane (this code) usually runs natively (`npm run dev`) where that
254
+ * alias doesn't resolve. Two escape hatches:
255
+ *
256
+ * 1. process.env.OLLAMA_HOST wins outright — set this when the control
257
+ * plane needs a different endpoint than the bot artifact (Dockerized
258
+ * control plane, remote Ollama, tunneled endpoint).
259
+ * 2. Otherwise, rewrite `host.docker.internal` → `localhost` so a
260
+ * stock-default deployment config works from a native control plane
261
+ * without configuration. Set OLLAMA_HOST explicitly if you actually
262
+ * want the control plane to use the Docker alias.
263
+ */
264
+ export function resolveOllamaHost(apiKey) {
265
+ const envOverride = process.env.OLLAMA_HOST?.trim();
266
+ if (envOverride) return envOverride;
267
+
268
+ const fallback = LLM_PROVIDERS.ollama?.defaultHost || 'http://host.docker.internal:11434';
269
+ let resolved = fallback;
270
+ if (apiKey) {
271
+ const trimmed = String(apiKey).trim();
272
+ if (trimmed) {
273
+ if (trimmed.startsWith('{')) {
274
+ try {
275
+ const parsed = JSON.parse(trimmed);
276
+ resolved = parsed.host || fallback;
277
+ } catch {
278
+ resolved = fallback;
279
+ }
280
+ } else {
281
+ resolved = trimmed;
282
+ }
283
+ }
284
+ }
285
+ return resolved.replace(/host\.docker\.internal/gi, 'localhost');
286
+ }
287
+
288
+ /**
289
+ * Generate summary using specified LLM provider
290
+ * @param {string} provider - The LLM provider (openai, anthropic, bedrock, ollama)
291
+ * @param {string} content - The content to summarize
292
+ * @param {string} apiKey - The API key for the provider
293
+ * @param {string} customPrompt - Optional custom prompt
294
+ * @param {string} model - Optional model override
295
+ * @returns {Promise<string>} - The generated summary
296
+ */
297
+ export async function generateSummary(provider, content, apiKey, customPrompt = null, model = null) {
298
+ const providerConfig = LLM_PROVIDERS[provider];
299
+
300
+ if (!providerConfig) {
301
+ throw new Error(`Unsupported provider: ${provider}`);
302
+ }
303
+
304
+ const selectedModel = model || providerConfig.defaultModel;
305
+
306
+ const defaultPrompt = `You are a helpful RAG (Retrieval-Augmented Generation) assistant. Analyze the following documents and provide a concise summary that:
307
+
308
+ 1. Identifies key terms, concepts, and topics covered
309
+ 2. Highlights the main themes and subject areas
310
+ 3. Describes what kind of questions this knowledge base can answer
311
+ 4. Lists important entities, processes, or procedures mentioned
312
+
313
+ IMPORTANT: Generate the summary in the SAME LANGUAGE as the original document. If the document is in French, write the summary in French. If in German, write in German. Match the source language exactly.
314
+
315
+ Keep the summary clear, structured, and focused on what information is available in these documents.`;
316
+
317
+ const systemInstruction = customPrompt || defaultPrompt;
318
+
319
+ switch (provider) {
320
+ case 'openai':
321
+ return await generateSummaryWithOpenAI(content, apiKey, systemInstruction, selectedModel, providerConfig);
322
+
323
+ case 'anthropic':
324
+ return await generateSummaryWithAnthropic(content, apiKey, systemInstruction, selectedModel, providerConfig);
325
+
326
+ case 'bedrock': {
327
+ // For Bedrock, apiKey is actually JSON credentials
328
+ let credentials;
329
+ try {
330
+ credentials = JSON.parse(apiKey);
331
+ } catch (e) {
332
+ throw new Error('Invalid Bedrock credentials format. Please reconfigure your AWS credentials.');
333
+ }
334
+ if (!credentials.region) {
335
+ credentials.region = 'us-east-1'; // Fallback region
336
+ }
337
+ return await generateSummaryWithBedrock(content, credentials, systemInstruction, selectedModel);
338
+ }
339
+
340
+ case 'ollama': {
341
+ const host = resolveOllamaHost(apiKey);
342
+ return await generateSummaryWithOllama(content, host, systemInstruction, selectedModel);
343
+ }
344
+
345
+ default:
346
+ throw new Error(`Provider ${provider} not implemented`);
347
+ }
348
+ }
349
+
350
+ /**
351
+ * Generate summary using OpenAI API
352
+ */
353
+ async function generateSummaryWithOpenAI(content, apiKey, systemInstruction, model, config) {
354
+ const url = `${config.baseURL}/chat/completions`;
355
+
356
+ const response = await fetch(url, {
357
+ method: 'POST',
358
+ headers: {
359
+ 'Authorization': `Bearer ${apiKey}`,
360
+ 'Content-Type': 'application/json',
361
+ },
362
+ body: JSON.stringify({
363
+ model: model,
364
+ messages: [
365
+ {
366
+ role: 'system',
367
+ content: systemInstruction
368
+ },
369
+ {
370
+ role: 'user',
371
+ content: content
372
+ }
373
+ ],
374
+ max_tokens: 4096
375
+ }),
376
+ });
377
+
378
+ if (!response.ok) {
379
+ const errorData = await response.json();
380
+ throw new Error(`OpenAI API error: ${errorData.error?.message || response.statusText}`);
381
+ }
382
+
383
+ const data = await response.json();
384
+ return data.choices?.[0]?.message?.content || 'No summary generated';
385
+ }
386
+
387
+ /**
388
+ * Generate summary using Anthropic API
389
+ */
390
+ async function generateSummaryWithAnthropic(content, apiKey, systemInstruction, model, config) {
391
+ const url = `${config.baseURL}${config.endpoint}`;
392
+
393
+ // If content is empty, use systemInstruction as the user message
394
+ // Anthropic API requires non-empty content in all messages
395
+ const hasContent = content && content.trim().length > 0;
396
+
397
+ const body = {
398
+ model: model,
399
+ max_tokens: 4096,
400
+ messages: [
401
+ {
402
+ role: 'user',
403
+ content: hasContent ? content : systemInstruction
404
+ }
405
+ ]
406
+ };
407
+
408
+ // Only include system prompt if we have separate content
409
+ if (hasContent) {
410
+ body.system = systemInstruction;
411
+ }
412
+
413
+ const response = await fetch(url, {
414
+ method: 'POST',
415
+ headers: {
416
+ 'x-api-key': apiKey,
417
+ 'anthropic-version': '2023-06-01',
418
+ 'Content-Type': 'application/json',
419
+ },
420
+ body: JSON.stringify(body),
421
+ });
422
+
423
+ if (!response.ok) {
424
+ const errorData = await response.json();
425
+ throw new Error(`Anthropic API error: ${errorData.error?.message || response.statusText}`);
426
+ }
427
+
428
+ const data = await response.json();
429
+ return data.content?.[0]?.text || 'No summary generated';
430
+ }
431
+
432
+ /**
433
+ * Generate summary using AWS Bedrock API
434
+ */
435
+ async function generateSummaryWithBedrock(content, credentials, systemInstruction, model) {
436
+ const { BedrockRuntimeClient, ConverseCommand } = await import('@aws-sdk/client-bedrock-runtime');
437
+
438
+ const clientConfig = { region: credentials.region };
439
+
440
+ // Only set explicit credentials if not using IAM role
441
+ if (!credentials.useIamRole && credentials.accessKeyId) {
442
+ clientConfig.credentials = {
443
+ accessKeyId: credentials.accessKeyId,
444
+ secretAccessKey: credentials.secretAccessKey,
445
+ };
446
+ }
447
+
448
+ const client = new BedrockRuntimeClient(clientConfig);
449
+
450
+ // Build the full model ID with geographic prefix for cross-region inference
451
+ const fullModelId = buildBedrockModelId(model, credentials.region);
452
+
453
+ // If content is empty, use systemInstruction as the user message
454
+ const hasContent = content && content.trim().length > 0;
455
+
456
+ const command = new ConverseCommand({
457
+ modelId: fullModelId,
458
+ system: hasContent ? [{ text: systemInstruction }] : undefined,
459
+ messages: [{ role: 'user', content: [{ text: hasContent ? content : systemInstruction }] }],
460
+ inferenceConfig: { maxTokens: 4096 },
461
+ });
462
+
463
+ try {
464
+ const response = await client.send(command);
465
+ const textBlock = response.output?.message?.content?.find(b => b.text);
466
+ return textBlock?.text || 'No summary generated';
467
+ } catch (error) {
468
+ // Provide more helpful error messages for common Bedrock errors
469
+ if (error.name === 'AccessDeniedException') {
470
+ throw new Error(`Bedrock access denied: ${error.message}. Check your AWS credentials and model access permissions.`);
471
+ }
472
+ if (error.name === 'ValidationException') {
473
+ throw new Error(`Bedrock validation error: ${error.message}. Model ID: ${fullModelId}`);
474
+ }
475
+ if (error.name === 'ResourceNotFoundException') {
476
+ throw new Error(`Bedrock model not found: ${fullModelId}. Ensure the model is available in region ${credentials.region}.`);
477
+ }
478
+ if (error.name === 'ThrottlingException') {
479
+ throw new Error('Bedrock rate limit exceeded. Please try again in a few moments.');
480
+ }
481
+ throw new Error(`Bedrock API error: ${error.message}`);
482
+ }
483
+ }
484
+
485
+ /**
486
+ * Strip hybrid-reasoning scratchpad tags from Ollama model output. qwen3
487
+ * (and other hybrid-thinking models) emit `<think>...</think>` blocks before
488
+ * their actual answer; downstream call sites want clean prose. We pass
489
+ * `think: false` in the request as the canonical suppression, but keep this
490
+ * as defense in depth for models that ignore it or for older Ollama versions.
491
+ */
492
+ function stripReasoningTags(text) {
493
+ if (!text) return text;
494
+ // Strip both well-formed and unterminated <think> blocks.
495
+ return text
496
+ .replace(/<think\b[^>]*>[\s\S]*?<\/think>/gi, '')
497
+ .replace(/<think\b[^>]*>[\s\S]*$/i, '')
498
+ .trim();
499
+ }
500
+
501
+ /**
502
+ * Generate summary via Ollama /api/chat. Free-text return; no JSON-mode hint
503
+ * because the call sites in tool-executors want prose digests, not structured
504
+ * output. Caller passes the resolved host directly.
505
+ */
506
+ async function generateSummaryWithOllama(content, host, systemInstruction, model) {
507
+ const url = `${host.replace(/\/$/, '')}/api/chat`;
508
+ const hasContent = content && content.trim().length > 0;
509
+
510
+ // Ollama doesn't expose a separate `system` field; the system role inside
511
+ // messages is the supported shape and matches what the bot-runtime adapter
512
+ // sends in lite-template/helper/llm-client.js.
513
+ const response = await fetch(url, {
514
+ method: 'POST',
515
+ headers: { 'Content-Type': 'application/json' },
516
+ body: JSON.stringify({
517
+ model,
518
+ stream: false,
519
+ // Hybrid-reasoning models (qwen3) emit <think> scratchpad unless told
520
+ // to skip it. This flag is Ollama's official switch; ignored by
521
+ // non-thinking models, so it's safe to set unconditionally.
522
+ think: false,
523
+ messages: [
524
+ { role: 'system', content: systemInstruction },
525
+ { role: 'user', content: hasContent ? content : systemInstruction },
526
+ ],
527
+ options: {
528
+ // Bump the context window above Ollama's default 2048 — the chat
529
+ // builder's summary prompts can run past 4K when documents are large.
530
+ num_ctx: 16384,
531
+ },
532
+ }),
533
+ });
534
+
535
+ if (!response.ok) {
536
+ const errorText = await response.text().catch(() => '');
537
+ throw new Error(`Ollama API error (${response.status}): ${errorText || response.statusText}`);
538
+ }
539
+
540
+ const data = await response.json();
541
+ const raw = data?.message?.content;
542
+ if (!raw || typeof raw !== 'string') {
543
+ throw new Error('Ollama response contained no message.content');
544
+ }
545
+ return stripReasoningTags(raw) || 'No summary generated';
546
+ }
547
+
548
+ /**
549
+ * Generate a structured object using specified LLM provider.
550
+ *
551
+ * Unlike generateSummary (free-text return), this routes each provider
552
+ * through its native structured-output primitive against a caller-supplied
553
+ * JSON schema and returns a parsed object. The model cannot return prose
554
+ * or malformed JSON — schema validity is enforced at the API contract.
555
+ *
556
+ * openai — Chat Completions response_format: json_schema (strict)
557
+ * anthropic — tool_choice forcing a specific tool whose input_schema = schema
558
+ * bedrock — Converse toolConfig with toolChoice forcing the same shape
559
+ *
560
+ * @param {string} provider One of: openai, anthropic, bedrock
561
+ * @param {string} content User-role content (the NL request)
562
+ * @param {string} apiKey API key or JSON-encoded Bedrock credentials
563
+ * @param {string} systemInstruction System prompt
564
+ * @param {object} schema JSON schema describing the expected object
565
+ * @param {string} [model] Optional model override
566
+ * @returns {Promise<object>} Parsed object conforming to `schema`
567
+ */
568
+ export async function generateStructured(provider, content, apiKey, systemInstruction, schema, model = null) {
569
+ const providerConfig = LLM_PROVIDERS[provider];
570
+ if (!providerConfig) {
571
+ throw new Error(`Unsupported provider: ${provider}`);
572
+ }
573
+ const selectedModel = model || providerConfig.defaultModel;
574
+
575
+ switch (provider) {
576
+ case 'openai':
577
+ return await generateStructuredWithOpenAI(content, apiKey, systemInstruction, selectedModel, providerConfig, schema);
578
+
579
+ case 'anthropic':
580
+ return await generateStructuredWithAnthropic(content, apiKey, systemInstruction, selectedModel, providerConfig, schema);
581
+
582
+ case 'bedrock': {
583
+ let credentials;
584
+ try {
585
+ credentials = JSON.parse(apiKey);
586
+ } catch (e) {
587
+ throw new Error('Invalid Bedrock credentials format. Please reconfigure your AWS credentials.');
588
+ }
589
+ if (!credentials.region) {
590
+ credentials.region = 'us-east-1';
591
+ }
592
+ return await generateStructuredWithBedrock(content, credentials, systemInstruction, selectedModel, schema);
593
+ }
594
+
595
+ case 'ollama': {
596
+ const host = resolveOllamaHost(apiKey);
597
+ return await generateStructuredWithOllama(content, host, systemInstruction, selectedModel, schema);
598
+ }
599
+
600
+ default:
601
+ throw new Error(`Provider ${provider} not implemented`);
602
+ }
603
+ }
604
+
605
+ /**
606
+ * Generate a structured object via OpenAI Chat Completions response_format.
607
+ * Caller is responsible for passing a strict-mode-compatible schema.
608
+ */
609
+ async function generateStructuredWithOpenAI(content, apiKey, systemInstruction, model, config, schema) {
610
+ const url = `${config.baseURL}/chat/completions`;
611
+
612
+ const response = await fetch(url, {
613
+ method: 'POST',
614
+ headers: {
615
+ 'Authorization': `Bearer ${apiKey}`,
616
+ 'Content-Type': 'application/json',
617
+ },
618
+ body: JSON.stringify({
619
+ model,
620
+ messages: [
621
+ { role: 'system', content: systemInstruction },
622
+ { role: 'user', content },
623
+ ],
624
+ max_tokens: 4096,
625
+ response_format: {
626
+ type: 'json_schema',
627
+ json_schema: { name: 'form_structure', schema, strict: true },
628
+ },
629
+ }),
630
+ });
631
+
632
+ if (!response.ok) {
633
+ const errorData = await response.json().catch(() => ({}));
634
+ throw new Error(`OpenAI API error: ${errorData.error?.message || response.statusText}`);
635
+ }
636
+
637
+ const data = await response.json();
638
+ const choice = data.choices?.[0];
639
+
640
+ if (choice?.finish_reason === 'length') {
641
+ throw new Error('OpenAI hit max_tokens before completing structured output');
642
+ }
643
+ if (choice?.message?.refusal) {
644
+ throw new Error(`OpenAI refused: ${choice.message.refusal}`);
645
+ }
646
+
647
+ const text = choice?.message?.content;
648
+ if (!text || typeof text !== 'string') {
649
+ throw new Error('OpenAI response contained no content');
650
+ }
651
+ return JSON.parse(text);
652
+ }
653
+
654
+ /**
655
+ * Generate a structured object via Anthropic forced tool use. Schema is
656
+ * consumed verbatim — Anthropic's tool input_schema validator accepts the
657
+ * canonical (non-strict) shape.
658
+ */
659
+ async function generateStructuredWithAnthropic(content, apiKey, systemInstruction, model, config, schema) {
660
+ const url = `${config.baseURL}${config.endpoint}`;
661
+
662
+ const response = await fetch(url, {
663
+ method: 'POST',
664
+ headers: {
665
+ 'x-api-key': apiKey,
666
+ 'anthropic-version': '2023-06-01',
667
+ 'Content-Type': 'application/json',
668
+ },
669
+ body: JSON.stringify({
670
+ model,
671
+ max_tokens: 4096,
672
+ tools: [{
673
+ name: 'generate_form',
674
+ description: 'Return the generated form structure as a structured object.',
675
+ input_schema: schema,
676
+ }],
677
+ tool_choice: { type: 'tool', name: 'generate_form' },
678
+ system: systemInstruction,
679
+ messages: [{ role: 'user', content }],
680
+ }),
681
+ });
682
+
683
+ if (!response.ok) {
684
+ const errorData = await response.json().catch(() => ({}));
685
+ throw new Error(`Anthropic API error: ${errorData.error?.message || response.statusText}`);
686
+ }
687
+
688
+ const data = await response.json();
689
+ const block = data.content?.find((c) => c.type === 'tool_use' && c.name === 'generate_form');
690
+
691
+ if (data.stop_reason === 'max_tokens' && !block) {
692
+ throw new Error('Anthropic hit max_tokens before completing tool_use');
693
+ }
694
+ if (!block) {
695
+ throw new Error('Anthropic response contained no generate_form tool_use block');
696
+ }
697
+ return block.input;
698
+ }
699
+
700
+ /**
701
+ * Generate a structured object via Bedrock Converse tool use. Mirrors the
702
+ * error-mapping behavior of generateSummaryWithBedrock so credential and
703
+ * model-access failures surface the same way across both code paths.
704
+ */
705
+ async function generateStructuredWithBedrock(content, credentials, systemInstruction, model, schema) {
706
+ const { BedrockRuntimeClient, ConverseCommand } = await import('@aws-sdk/client-bedrock-runtime');
707
+
708
+ const clientConfig = { region: credentials.region };
709
+ if (!credentials.useIamRole && credentials.accessKeyId) {
710
+ clientConfig.credentials = {
711
+ accessKeyId: credentials.accessKeyId,
712
+ secretAccessKey: credentials.secretAccessKey,
713
+ };
714
+ }
715
+
716
+ const client = new BedrockRuntimeClient(clientConfig);
717
+ const fullModelId = buildBedrockModelId(model, credentials.region);
718
+
719
+ const command = new ConverseCommand({
720
+ modelId: fullModelId,
721
+ system: [{ text: systemInstruction }],
722
+ messages: [{ role: 'user', content: [{ text: content }] }],
723
+ inferenceConfig: { maxTokens: 4096 },
724
+ toolConfig: {
725
+ tools: [{
726
+ toolSpec: {
727
+ name: 'generate_form',
728
+ description: 'Return the generated form structure as a structured object.',
729
+ inputSchema: { json: schema },
730
+ },
731
+ }],
732
+ toolChoice: { tool: { name: 'generate_form' } },
733
+ },
734
+ });
735
+
736
+ try {
737
+ const result = await client.send(command);
738
+
739
+ if (result.stopReason === 'max_tokens') {
740
+ throw new Error('Bedrock hit max_tokens before completing tool use');
741
+ }
742
+ const block = result.output?.message?.content?.find((c) => c.toolUse?.name === 'generate_form');
743
+ if (!block) {
744
+ throw new Error('Bedrock response contained no generate_form toolUse block');
745
+ }
746
+ return block.toolUse.input;
747
+ } catch (error) {
748
+ if (error.name === 'AccessDeniedException') {
749
+ throw new Error(`Bedrock access denied: ${error.message}. Check your AWS credentials and model access permissions.`);
750
+ }
751
+ if (error.name === 'ValidationException') {
752
+ throw new Error(`Bedrock validation error: ${error.message}. Model ID: ${fullModelId}`);
753
+ }
754
+ if (error.name === 'ResourceNotFoundException') {
755
+ throw new Error(`Bedrock model not found: ${fullModelId}. Ensure the model is available in region ${credentials.region}.`);
756
+ }
757
+ if (error.name === 'ThrottlingException') {
758
+ throw new Error('Bedrock rate limit exceeded. Please try again in a few moments.');
759
+ }
760
+ throw error;
761
+ }
762
+ }
763
+
764
+ /**
765
+ * Generate a structured object via Ollama /api/chat with grammar-constrained
766
+ * sampling against the caller-supplied JSON schema.
767
+ *
768
+ * Ollama 0.5+ compiles the `format` schema into a GBNF grammar at the daemon
769
+ * and constrains token sampling to it — the model cannot emit non-conforming
770
+ * output. We initially tried pairing `format` with forced tool use to match
771
+ * the OpenAI/Anthropic response shape, but that combo caused the model to
772
+ * emit the constrained JSON into `message.content` and bypass the tool-call
773
+ * channel entirely. So this path drops the tool wrapper and reads
774
+ * `message.content` directly.
775
+ *
776
+ * Caveats:
777
+ * - Requires Ollama ≥ 0.5.0. Older daemons silently ignore the schema and
778
+ * return free-form JSON, which surfaces here as a parse error if the
779
+ * model's free-form output doesn't match what the caller expects.
780
+ * - No prompt caching. Every call re-processes the full system prompt +
781
+ * schema. Acceptable for one-shot structured calls.
782
+ */
783
+ async function generateStructuredWithOllama(content, host, systemInstruction, model, schema) {
784
+ const url = `${host.replace(/\/$/, '')}/api/chat`;
785
+
786
+ const response = await fetch(url, {
787
+ method: 'POST',
788
+ headers: { 'Content-Type': 'application/json' },
789
+ body: JSON.stringify({
790
+ model,
791
+ stream: false,
792
+ // Suppress hybrid-reasoning scratchpad on qwen3 — wasted tokens on the
793
+ // structured-output path where the model should emit the JSON directly.
794
+ think: false,
795
+ messages: [
796
+ { role: 'system', content: systemInstruction },
797
+ { role: 'user', content },
798
+ ],
799
+ // Grammar-constrained sampling against the caller's schema. Daemon-side
800
+ // guarantee that `message.content` parses as a JSON object matching the
801
+ // schema. No `tools` — pairing `format` with forced tool use causes the
802
+ // constrained output to land in `content` instead of the tool call.
803
+ format: schema,
804
+ options: {
805
+ num_ctx: 16384,
806
+ },
807
+ }),
808
+ });
809
+
810
+ if (!response.ok) {
811
+ const errorText = await response.text().catch(() => '');
812
+ throw new Error(`Ollama API error (${response.status}): ${errorText || response.statusText}`);
813
+ }
814
+
815
+ const data = await response.json();
816
+ const raw = data?.message?.content;
817
+ if (!raw || typeof raw !== 'string') {
818
+ throw new Error('Ollama response contained no message.content');
819
+ }
820
+ // Defense in depth — grammar-constrained output should never carry markdown
821
+ // fences, but some daemon/model combos emit them anyway. Cheap to strip.
822
+ const jsonString = raw.replace(/```json|```/g, '').trim();
823
+ try {
824
+ return JSON.parse(jsonString);
825
+ } catch (e) {
826
+ throw new Error(`Ollama returned malformed JSON: ${e.message}. Content: ${jsonString.slice(0, 200)}`);
827
+ }
828
+ }