twinclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/README.md +66 -0
  2. package/bin/npm-twinclaw.js +17 -0
  3. package/bin/run-twinbot-cli.js +36 -0
  4. package/bin/twinbot.js +4 -0
  5. package/bin/twinclaw.js +4 -0
  6. package/dist/api/handlers/browser.js +160 -0
  7. package/dist/api/handlers/callback.js +80 -0
  8. package/dist/api/handlers/config-validate.js +19 -0
  9. package/dist/api/handlers/health.js +117 -0
  10. package/dist/api/handlers/local-state-backup.js +118 -0
  11. package/dist/api/handlers/persona-state.js +59 -0
  12. package/dist/api/handlers/skill-packages.js +94 -0
  13. package/dist/api/router.js +278 -0
  14. package/dist/api/runtime-event-producer.js +99 -0
  15. package/dist/api/shared.js +82 -0
  16. package/dist/api/websocket-hub.js +305 -0
  17. package/dist/config/config-loader.js +2 -0
  18. package/dist/config/env-schema.js +202 -0
  19. package/dist/config/env-validator.js +223 -0
  20. package/dist/config/identity-bootstrap.js +115 -0
  21. package/dist/config/json-config.js +344 -0
  22. package/dist/config/workspace.js +186 -0
  23. package/dist/core/channels-cli.js +77 -0
  24. package/dist/core/cli.js +119 -0
  25. package/dist/core/context-assembly.js +33 -0
  26. package/dist/core/doctor.js +365 -0
  27. package/dist/core/gateway-cli.js +323 -0
  28. package/dist/core/gateway.js +416 -0
  29. package/dist/core/heartbeat.js +54 -0
  30. package/dist/core/install-cli.js +320 -0
  31. package/dist/core/lane-executor.js +134 -0
  32. package/dist/core/logs-cli.js +70 -0
  33. package/dist/core/onboarding.js +760 -0
  34. package/dist/core/pairing-cli.js +78 -0
  35. package/dist/core/secret-vault-cli.js +204 -0
  36. package/dist/core/types.js +1 -0
  37. package/dist/index.js +404 -0
  38. package/dist/interfaces/dispatcher.js +214 -0
  39. package/dist/interfaces/telegram_handler.js +82 -0
  40. package/dist/interfaces/tui-dashboard.js +53 -0
  41. package/dist/interfaces/whatsapp_handler.js +94 -0
  42. package/dist/release/cli.js +97 -0
  43. package/dist/release/mvp-gate-cli.js +118 -0
  44. package/dist/release/twinbot-config-schema.js +162 -0
  45. package/dist/release/twinclaw-config-schema.js +162 -0
  46. package/dist/services/block-chunker.js +174 -0
  47. package/dist/services/browser-service.js +334 -0
  48. package/dist/services/context-lifecycle.js +314 -0
  49. package/dist/services/db.js +1055 -0
  50. package/dist/services/delivery-tracker.js +110 -0
  51. package/dist/services/dm-pairing.js +245 -0
  52. package/dist/services/embedding-service.js +125 -0
  53. package/dist/services/file-watcher.js +125 -0
  54. package/dist/services/inbound-debounce.js +92 -0
  55. package/dist/services/incident-manager.js +516 -0
  56. package/dist/services/job-scheduler.js +176 -0
  57. package/dist/services/local-state-backup.js +682 -0
  58. package/dist/services/mcp-client-adapter.js +291 -0
  59. package/dist/services/mcp-server-manager.js +143 -0
  60. package/dist/services/model-router.js +927 -0
  61. package/dist/services/mvp-gate.js +845 -0
  62. package/dist/services/orchestration-service.js +422 -0
  63. package/dist/services/persona-state.js +256 -0
  64. package/dist/services/policy-engine.js +92 -0
  65. package/dist/services/proactive-notifier.js +94 -0
  66. package/dist/services/queue-service.js +146 -0
  67. package/dist/services/release-pipeline.js +652 -0
  68. package/dist/services/runtime-budget-governor.js +415 -0
  69. package/dist/services/secret-vault.js +704 -0
  70. package/dist/services/semantic-memory.js +249 -0
  71. package/dist/services/skill-package-manager.js +806 -0
  72. package/dist/services/skill-registry.js +122 -0
  73. package/dist/services/streaming-output.js +75 -0
  74. package/dist/services/stt-service.js +39 -0
  75. package/dist/services/tts-service.js +44 -0
  76. package/dist/skills/builtin.js +250 -0
  77. package/dist/skills/shell.js +87 -0
  78. package/dist/skills/types.js +1 -0
  79. package/dist/types/api.js +1 -0
  80. package/dist/types/context-budget.js +1 -0
  81. package/dist/types/doctor.js +1 -0
  82. package/dist/types/file-watcher.js +1 -0
  83. package/dist/types/incident.js +1 -0
  84. package/dist/types/local-state-backup.js +1 -0
  85. package/dist/types/mcp.js +1 -0
  86. package/dist/types/messaging.js +1 -0
  87. package/dist/types/model-routing.js +1 -0
  88. package/dist/types/mvp-gate.js +2 -0
  89. package/dist/types/orchestration.js +1 -0
  90. package/dist/types/persona-state.js +22 -0
  91. package/dist/types/policy.js +1 -0
  92. package/dist/types/reasoning-graph.js +1 -0
  93. package/dist/types/release.js +1 -0
  94. package/dist/types/reliability.js +1 -0
  95. package/dist/types/runtime-budget.js +1 -0
  96. package/dist/types/scheduler.js +1 -0
  97. package/dist/types/secret-vault.js +1 -0
  98. package/dist/types/skill-packages.js +1 -0
  99. package/dist/types/websocket.js +14 -0
  100. package/dist/utils/logger.js +57 -0
  101. package/dist/utils/retry.js +61 -0
  102. package/dist/utils/secret-scan.js +208 -0
  103. package/mcp-servers.json +179 -0
  104. package/package.json +81 -0
  105. package/skill-packages.json +92 -0
  106. package/skill-packages.lock.json +5 -0
  107. package/src/skills/builtin.ts +275 -0
  108. package/src/skills/shell.ts +118 -0
  109. package/src/skills/types.ts +30 -0
  110. package/src/types/api.ts +252 -0
  111. package/src/types/blessed-contrib.d.ts +4 -0
  112. package/src/types/context-budget.ts +76 -0
  113. package/src/types/doctor.ts +29 -0
  114. package/src/types/file-watcher.ts +26 -0
  115. package/src/types/incident.ts +57 -0
  116. package/src/types/local-state-backup.ts +121 -0
  117. package/src/types/mcp.ts +106 -0
  118. package/src/types/messaging.ts +35 -0
  119. package/src/types/model-routing.ts +61 -0
  120. package/src/types/mvp-gate.ts +99 -0
  121. package/src/types/orchestration.ts +65 -0
  122. package/src/types/persona-state.ts +61 -0
  123. package/src/types/policy.ts +27 -0
  124. package/src/types/reasoning-graph.ts +58 -0
  125. package/src/types/release.ts +115 -0
  126. package/src/types/reliability.ts +43 -0
  127. package/src/types/runtime-budget.ts +85 -0
  128. package/src/types/scheduler.ts +47 -0
  129. package/src/types/secret-vault.ts +62 -0
  130. package/src/types/skill-packages.ts +81 -0
  131. package/src/types/sqlite-vec.d.ts +5 -0
  132. package/src/types/websocket.ts +122 -0
@@ -0,0 +1,162 @@
1
+ function isRecord(value) {
2
+ return typeof value === 'object' && value !== null && !Array.isArray(value);
3
+ }
4
+ function readRequiredObject(parent, key, path, errors) {
5
+ const value = parent[key];
6
+ if (!isRecord(value)) {
7
+ errors.push(`${path} must be an object.`);
8
+ return null;
9
+ }
10
+ return value;
11
+ }
12
+ function readRequiredString(parent, key, path, errors, allowEmpty = false) {
13
+ const value = parent[key];
14
+ if (typeof value !== 'string') {
15
+ errors.push(`${path} must be a string.`);
16
+ return null;
17
+ }
18
+ if (!allowEmpty && value.trim().length === 0) {
19
+ errors.push(`${path} must be a non-empty string.`);
20
+ }
21
+ return value;
22
+ }
23
+ function readRequiredBoolean(parent, key, path, errors) {
24
+ const value = parent[key];
25
+ if (typeof value !== 'boolean') {
26
+ errors.push(`${path} must be a boolean.`);
27
+ return null;
28
+ }
29
+ return value;
30
+ }
31
+ function readRequiredIntegerInRange(parent, key, path, min, max, errors) {
32
+ const value = parent[key];
33
+ if (typeof value !== 'number' || !Number.isInteger(value)) {
34
+ errors.push(`${path} must be an integer.`);
35
+ return null;
36
+ }
37
+ if (value < min || value > max) {
38
+ errors.push(`${path} must be between ${min} and ${max}.`);
39
+ }
40
+ return value;
41
+ }
42
+ function readStringArray(parent, key, path, errors) {
43
+ const value = parent[key];
44
+ if (!Array.isArray(value) || value.some((entry) => typeof entry !== 'string')) {
45
+ errors.push(`${path} must be an array of strings.`);
46
+ return null;
47
+ }
48
+ return value;
49
+ }
50
+ function validateRuntime(root, errors) {
51
+ const runtime = readRequiredObject(root, 'runtime', 'runtime', errors);
52
+ if (!runtime) {
53
+ return;
54
+ }
55
+ readRequiredString(runtime, 'apiSecret', 'runtime.apiSecret', errors, false);
56
+ readRequiredIntegerInRange(runtime, 'apiPort', 'runtime.apiPort', 1, 65535, errors);
57
+ readStringArray(runtime, 'secretVaultRequired', 'runtime.secretVaultRequired', errors);
58
+ }
59
+ function validateModels(root, errors) {
60
+ const models = readRequiredObject(root, 'models', 'models', errors);
61
+ if (!models) {
62
+ return;
63
+ }
64
+ const modalApiKey = readRequiredString(models, 'modalApiKey', 'models.modalApiKey', errors, true);
65
+ const openRouterApiKey = readRequiredString(models, 'openRouterApiKey', 'models.openRouterApiKey', errors, true);
66
+ const geminiApiKey = readRequiredString(models, 'geminiApiKey', 'models.geminiApiKey', errors, true);
67
+ const hasModelKey = [modalApiKey, openRouterApiKey, geminiApiKey].some((value) => typeof value === 'string' && value.trim().length > 0);
68
+ if (!hasModelKey) {
69
+ errors.push('At least one model API key must be configured in models.*ApiKey.');
70
+ }
71
+ }
72
+ function validateMessaging(root, errors) {
73
+ const messaging = readRequiredObject(root, 'messaging', 'messaging', errors);
74
+ if (!messaging) {
75
+ return;
76
+ }
77
+ const telegram = readRequiredObject(messaging, 'telegram', 'messaging.telegram', errors);
78
+ if (telegram) {
79
+ readRequiredBoolean(telegram, 'enabled', 'messaging.telegram.enabled', errors);
80
+ readRequiredString(telegram, 'botToken', 'messaging.telegram.botToken', errors, true);
81
+ const userId = telegram['userId'];
82
+ if (!(typeof userId === 'number' || userId === null)) {
83
+ errors.push('messaging.telegram.userId must be a number or null.');
84
+ }
85
+ }
86
+ const whatsapp = readRequiredObject(messaging, 'whatsapp', 'messaging.whatsapp', errors);
87
+ if (whatsapp) {
88
+ readRequiredBoolean(whatsapp, 'enabled', 'messaging.whatsapp.enabled', errors);
89
+ readRequiredString(whatsapp, 'phoneNumber', 'messaging.whatsapp.phoneNumber', errors, true);
90
+ }
91
+ const voice = readRequiredObject(messaging, 'voice', 'messaging.voice', errors);
92
+ if (voice) {
93
+ readRequiredString(voice, 'groqApiKey', 'messaging.voice.groqApiKey', errors, true);
94
+ }
95
+ const inbound = readRequiredObject(messaging, 'inbound', 'messaging.inbound', errors);
96
+ if (inbound) {
97
+ readRequiredBoolean(inbound, 'enabled', 'messaging.inbound.enabled', errors);
98
+ readRequiredIntegerInRange(inbound, 'debounceMs', 'messaging.inbound.debounceMs', 0, 60_000, errors);
99
+ }
100
+ const streaming = readRequiredObject(messaging, 'streaming', 'messaging.streaming', errors);
101
+ if (streaming) {
102
+ readRequiredBoolean(streaming, 'blockStreamingDefault', 'messaging.streaming.blockStreamingDefault', errors);
103
+ const breakMode = readRequiredString(streaming, 'blockStreamingBreak', 'messaging.streaming.blockStreamingBreak', errors, false);
104
+ if (breakMode && breakMode !== 'paragraph' && breakMode !== 'sentence') {
105
+ errors.push("messaging.streaming.blockStreamingBreak must be 'paragraph' or 'sentence'.");
106
+ }
107
+ readRequiredIntegerInRange(streaming, 'blockStreamingMinChars', 'messaging.streaming.blockStreamingMinChars', 1, 50_000, errors);
108
+ readRequiredIntegerInRange(streaming, 'blockStreamingMaxChars', 'messaging.streaming.blockStreamingMaxChars', 1, 200_000, errors);
109
+ readRequiredBoolean(streaming, 'blockStreamingCoalesce', 'messaging.streaming.blockStreamingCoalesce', errors);
110
+ readRequiredIntegerInRange(streaming, 'humanDelayMs', 'messaging.streaming.humanDelayMs', 0, 120_000, errors);
111
+ }
112
+ }
113
+ function validateStorage(root, errors) {
114
+ const storage = readRequiredObject(root, 'storage', 'storage', errors);
115
+ if (!storage) {
116
+ return;
117
+ }
118
+ readRequiredIntegerInRange(storage, 'embeddingDim', 'storage.embeddingDim', 1, 1_000_000, errors);
119
+ }
120
+ function validateIntegration(root, errors) {
121
+ const integration = readRequiredObject(root, 'integration', 'integration', errors);
122
+ if (!integration) {
123
+ return;
124
+ }
125
+ const provider = readRequiredString(integration, 'embeddingProvider', 'integration.embeddingProvider', errors, false);
126
+ if (provider && provider !== 'openai' && provider !== 'ollama') {
127
+ errors.push("integration.embeddingProvider must be 'openai' or 'ollama'.");
128
+ }
129
+ readRequiredString(integration, 'embeddingApiKey', 'integration.embeddingApiKey', errors, true);
130
+ readRequiredString(integration, 'openaiApiKey', 'integration.openaiApiKey', errors, true);
131
+ readRequiredString(integration, 'embeddingApiUrl', 'integration.embeddingApiUrl', errors, false);
132
+ readRequiredString(integration, 'embeddingModel', 'integration.embeddingModel', errors, false);
133
+ readRequiredString(integration, 'ollamaBaseUrl', 'integration.ollamaBaseUrl', errors, false);
134
+ readRequiredString(integration, 'ollamaEmbeddingModel', 'integration.ollamaEmbeddingModel', errors, false);
135
+ }
136
+ function validateTools(root, errors) {
137
+ const tools = readRequiredObject(root, 'tools', 'tools', errors);
138
+ if (!tools) {
139
+ return;
140
+ }
141
+ readStringArray(tools, 'allow', 'tools.allow', errors);
142
+ readStringArray(tools, 'deny', 'tools.deny', errors);
143
+ }
144
+ export function validateTwinBotConfigSchema(value) {
145
+ if (!isRecord(value)) {
146
+ return {
147
+ valid: false,
148
+ errors: ['Root config must be a JSON object.'],
149
+ };
150
+ }
151
+ const errors = [];
152
+ validateRuntime(value, errors);
153
+ validateModels(value, errors);
154
+ validateMessaging(value, errors);
155
+ validateStorage(value, errors);
156
+ validateIntegration(value, errors);
157
+ validateTools(value, errors);
158
+ return {
159
+ valid: errors.length === 0,
160
+ errors,
161
+ };
162
+ }
@@ -0,0 +1,162 @@
1
+ function isRecord(value) {
2
+ return typeof value === 'object' && value !== null && !Array.isArray(value);
3
+ }
4
+ function readRequiredObject(parent, key, path, errors) {
5
+ const value = parent[key];
6
+ if (!isRecord(value)) {
7
+ errors.push(`${path} must be an object.`);
8
+ return null;
9
+ }
10
+ return value;
11
+ }
12
+ function readRequiredString(parent, key, path, errors, allowEmpty = false) {
13
+ const value = parent[key];
14
+ if (typeof value !== 'string') {
15
+ errors.push(`${path} must be a string.`);
16
+ return null;
17
+ }
18
+ if (!allowEmpty && value.trim().length === 0) {
19
+ errors.push(`${path} must be a non-empty string.`);
20
+ }
21
+ return value;
22
+ }
23
+ function readRequiredBoolean(parent, key, path, errors) {
24
+ const value = parent[key];
25
+ if (typeof value !== 'boolean') {
26
+ errors.push(`${path} must be a boolean.`);
27
+ return null;
28
+ }
29
+ return value;
30
+ }
31
+ function readRequiredIntegerInRange(parent, key, path, min, max, errors) {
32
+ const value = parent[key];
33
+ if (typeof value !== 'number' || !Number.isInteger(value)) {
34
+ errors.push(`${path} must be an integer.`);
35
+ return null;
36
+ }
37
+ if (value < min || value > max) {
38
+ errors.push(`${path} must be between ${min} and ${max}.`);
39
+ }
40
+ return value;
41
+ }
42
+ function readStringArray(parent, key, path, errors) {
43
+ const value = parent[key];
44
+ if (!Array.isArray(value) || value.some((entry) => typeof entry !== 'string')) {
45
+ errors.push(`${path} must be an array of strings.`);
46
+ return null;
47
+ }
48
+ return value;
49
+ }
50
+ function validateRuntime(root, errors) {
51
+ const runtime = readRequiredObject(root, 'runtime', 'runtime', errors);
52
+ if (!runtime) {
53
+ return;
54
+ }
55
+ readRequiredString(runtime, 'apiSecret', 'runtime.apiSecret', errors, false);
56
+ readRequiredIntegerInRange(runtime, 'apiPort', 'runtime.apiPort', 1, 65535, errors);
57
+ readStringArray(runtime, 'secretVaultRequired', 'runtime.secretVaultRequired', errors);
58
+ }
59
+ function validateModels(root, errors) {
60
+ const models = readRequiredObject(root, 'models', 'models', errors);
61
+ if (!models) {
62
+ return;
63
+ }
64
+ const modalApiKey = readRequiredString(models, 'modalApiKey', 'models.modalApiKey', errors, true);
65
+ const openRouterApiKey = readRequiredString(models, 'openRouterApiKey', 'models.openRouterApiKey', errors, true);
66
+ const geminiApiKey = readRequiredString(models, 'geminiApiKey', 'models.geminiApiKey', errors, true);
67
+ const hasModelKey = [modalApiKey, openRouterApiKey, geminiApiKey].some((value) => typeof value === 'string' && value.trim().length > 0);
68
+ if (!hasModelKey) {
69
+ errors.push('At least one model API key must be configured in models.*ApiKey.');
70
+ }
71
+ }
72
+ function validateMessaging(root, errors) {
73
+ const messaging = readRequiredObject(root, 'messaging', 'messaging', errors);
74
+ if (!messaging) {
75
+ return;
76
+ }
77
+ const telegram = readRequiredObject(messaging, 'telegram', 'messaging.telegram', errors);
78
+ if (telegram) {
79
+ readRequiredBoolean(telegram, 'enabled', 'messaging.telegram.enabled', errors);
80
+ readRequiredString(telegram, 'botToken', 'messaging.telegram.botToken', errors, true);
81
+ const userId = telegram['userId'];
82
+ if (!(typeof userId === 'number' || userId === null)) {
83
+ errors.push('messaging.telegram.userId must be a number or null.');
84
+ }
85
+ }
86
+ const whatsapp = readRequiredObject(messaging, 'whatsapp', 'messaging.whatsapp', errors);
87
+ if (whatsapp) {
88
+ readRequiredBoolean(whatsapp, 'enabled', 'messaging.whatsapp.enabled', errors);
89
+ readRequiredString(whatsapp, 'phoneNumber', 'messaging.whatsapp.phoneNumber', errors, true);
90
+ }
91
+ const voice = readRequiredObject(messaging, 'voice', 'messaging.voice', errors);
92
+ if (voice) {
93
+ readRequiredString(voice, 'groqApiKey', 'messaging.voice.groqApiKey', errors, true);
94
+ }
95
+ const inbound = readRequiredObject(messaging, 'inbound', 'messaging.inbound', errors);
96
+ if (inbound) {
97
+ readRequiredBoolean(inbound, 'enabled', 'messaging.inbound.enabled', errors);
98
+ readRequiredIntegerInRange(inbound, 'debounceMs', 'messaging.inbound.debounceMs', 0, 60_000, errors);
99
+ }
100
+ const streaming = readRequiredObject(messaging, 'streaming', 'messaging.streaming', errors);
101
+ if (streaming) {
102
+ readRequiredBoolean(streaming, 'blockStreamingDefault', 'messaging.streaming.blockStreamingDefault', errors);
103
+ const breakMode = readRequiredString(streaming, 'blockStreamingBreak', 'messaging.streaming.blockStreamingBreak', errors, false);
104
+ if (breakMode && breakMode !== 'paragraph' && breakMode !== 'sentence') {
105
+ errors.push("messaging.streaming.blockStreamingBreak must be 'paragraph' or 'sentence'.");
106
+ }
107
+ readRequiredIntegerInRange(streaming, 'blockStreamingMinChars', 'messaging.streaming.blockStreamingMinChars', 1, 50_000, errors);
108
+ readRequiredIntegerInRange(streaming, 'blockStreamingMaxChars', 'messaging.streaming.blockStreamingMaxChars', 1, 200_000, errors);
109
+ readRequiredBoolean(streaming, 'blockStreamingCoalesce', 'messaging.streaming.blockStreamingCoalesce', errors);
110
+ readRequiredIntegerInRange(streaming, 'humanDelayMs', 'messaging.streaming.humanDelayMs', 0, 120_000, errors);
111
+ }
112
+ }
113
+ function validateStorage(root, errors) {
114
+ const storage = readRequiredObject(root, 'storage', 'storage', errors);
115
+ if (!storage) {
116
+ return;
117
+ }
118
+ readRequiredIntegerInRange(storage, 'embeddingDim', 'storage.embeddingDim', 1, 1_000_000, errors);
119
+ }
120
+ function validateIntegration(root, errors) {
121
+ const integration = readRequiredObject(root, 'integration', 'integration', errors);
122
+ if (!integration) {
123
+ return;
124
+ }
125
+ const provider = readRequiredString(integration, 'embeddingProvider', 'integration.embeddingProvider', errors, false);
126
+ if (provider && provider !== 'openai' && provider !== 'ollama') {
127
+ errors.push("integration.embeddingProvider must be 'openai' or 'ollama'.");
128
+ }
129
+ readRequiredString(integration, 'embeddingApiKey', 'integration.embeddingApiKey', errors, true);
130
+ readRequiredString(integration, 'openaiApiKey', 'integration.openaiApiKey', errors, true);
131
+ readRequiredString(integration, 'embeddingApiUrl', 'integration.embeddingApiUrl', errors, false);
132
+ readRequiredString(integration, 'embeddingModel', 'integration.embeddingModel', errors, false);
133
+ readRequiredString(integration, 'ollamaBaseUrl', 'integration.ollamaBaseUrl', errors, false);
134
+ readRequiredString(integration, 'ollamaEmbeddingModel', 'integration.ollamaEmbeddingModel', errors, false);
135
+ }
136
+ function validateTools(root, errors) {
137
+ const tools = readRequiredObject(root, 'tools', 'tools', errors);
138
+ if (!tools) {
139
+ return;
140
+ }
141
+ readStringArray(tools, 'allow', 'tools.allow', errors);
142
+ readStringArray(tools, 'deny', 'tools.deny', errors);
143
+ }
144
+ export function validateTwinclawConfigSchema(value) {
145
+ if (!isRecord(value)) {
146
+ return {
147
+ valid: false,
148
+ errors: ['Root config must be a JSON object.'],
149
+ };
150
+ }
151
+ const errors = [];
152
+ validateRuntime(value, errors);
153
+ validateModels(value, errors);
154
+ validateMessaging(value, errors);
155
+ validateStorage(value, errors);
156
+ validateIntegration(value, errors);
157
+ validateTools(value, errors);
158
+ return {
159
+ valid: errors.length === 0,
160
+ errors,
161
+ };
162
+ }
@@ -0,0 +1,174 @@
1
+ const DEFAULT_OPTIONS = {
2
+ minChars: 50,
3
+ maxChars: 800,
4
+ breakOn: 'paragraph',
5
+ coalesce: true,
6
+ };
7
+ const CODE_FENCE_PATTERN = /```[\s\S]*?```/g;
8
+ const CODE_FENCE_START = '```';
9
+ const CODE_FENCE_END = '```';
10
+ export class EmbeddedBlockChunker {
11
+ #minChars;
12
+ #maxChars;
13
+ #breakOn;
14
+ #coalesce;
15
+ constructor(options = {}) {
16
+ this.#minChars = Math.max(1, Math.floor(options.minChars ?? DEFAULT_OPTIONS.minChars));
17
+ this.#maxChars = Math.max(this.#minChars + 1, Math.floor(options.maxChars ?? DEFAULT_OPTIONS.maxChars));
18
+ this.#breakOn = options.breakOn ?? DEFAULT_OPTIONS.breakOn;
19
+ this.#coalesce = options.coalesce ?? DEFAULT_OPTIONS.coalesce;
20
+ }
21
+ get minChars() {
22
+ return this.#minChars;
23
+ }
24
+ get maxChars() {
25
+ return this.#maxChars;
26
+ }
27
+ chunk(text) {
28
+ if (!text || text.trim().length === 0) {
29
+ return [];
30
+ }
31
+ const result = this.#chunkText(text);
32
+ if (this.#coalesce) {
33
+ return this.#coalesceChunks(result.chunks);
34
+ }
35
+ return result.chunks;
36
+ }
37
+ #chunkText(text) {
38
+ const codeBlockRanges = this.#findCodeBlockRanges(text);
39
+ if (this.#breakOn === 'paragraph') {
40
+ return this.#chunkByParagraph(text, codeBlockRanges);
41
+ }
42
+ return this.#chunkBySentence(text, codeBlockRanges);
43
+ }
44
+ #findCodeBlockRanges(text) {
45
+ const ranges = [];
46
+ let match;
47
+ CODE_FENCE_PATTERN.lastIndex = 0;
48
+ while ((match = CODE_FENCE_PATTERN.exec(text)) !== null) {
49
+ ranges.push({ start: match.index, end: match.index + match[0].length });
50
+ }
51
+ return ranges;
52
+ }
53
+ #isInsideCodeBlock(index, ranges) {
54
+ for (const range of ranges) {
55
+ if (index >= range.start && index < range.end) {
56
+ return true;
57
+ }
58
+ }
59
+ return false;
60
+ }
61
+ #chunkByParagraph(text, codeBlockRanges) {
62
+ const paragraphs = text.split(/\n\n+/);
63
+ const chunks = [];
64
+ let currentChunk = '';
65
+ let wasSplit = false;
66
+ for (const paragraph of paragraphs) {
67
+ const trimmed = paragraph.trim();
68
+ if (!trimmed)
69
+ continue;
70
+ const wouldExceed = currentChunk.length + trimmed.length + 2 > this.#maxChars;
71
+ if (wouldExceed && currentChunk.length >= this.#minChars) {
72
+ chunks.push(currentChunk.trim());
73
+ currentChunk = trimmed;
74
+ wasSplit = true;
75
+ }
76
+ else if (wouldExceed && currentChunk.length > 0) {
77
+ const sentenceChunks = this.#chunkBySentence(trimmed, codeBlockRanges);
78
+ if (sentenceChunks.wasSplit) {
79
+ wasSplit = true;
80
+ }
81
+ for (const sentence of sentenceChunks.chunks) {
82
+ if (currentChunk.length + sentence.length + 1 <= this.#maxChars) {
83
+ currentChunk += (currentChunk ? ' ' : '') + sentence;
84
+ }
85
+ else {
86
+ if (currentChunk)
87
+ chunks.push(currentChunk.trim());
88
+ currentChunk = sentence;
89
+ wasSplit = true;
90
+ }
91
+ }
92
+ }
93
+ else {
94
+ currentChunk += (currentChunk ? '\n\n' : '') + trimmed;
95
+ }
96
+ }
97
+ if (currentChunk.trim()) {
98
+ chunks.push(currentChunk.trim());
99
+ }
100
+ return { chunks, wasSplit };
101
+ }
102
+ #chunkBySentence(text, codeBlockRanges) {
103
+ const sentenceEndPattern = /([.!?])\s+/g;
104
+ const chunks = [];
105
+ let currentChunk = '';
106
+ let wasSplit = false;
107
+ let lastIndex = 0;
108
+ let match;
109
+ const globalIndex = text;
110
+ sentenceEndPattern.lastIndex = 0;
111
+ while ((match = sentenceEndPattern.exec(globalIndex)) !== null) {
112
+ const sentenceEnd = match.index + 1;
113
+ const sentence = globalIndex.slice(lastIndex, sentenceEnd).trim();
114
+ if (!sentence) {
115
+ lastIndex = match.index + match[0].length;
116
+ continue;
117
+ }
118
+ const wouldExceed = currentChunk.length + sentence.length + 1 > this.#maxChars;
119
+ if (wouldExceed && currentChunk.length >= this.#minChars) {
120
+ chunks.push(currentChunk.trim());
121
+ currentChunk = sentence;
122
+ wasSplit = true;
123
+ }
124
+ else {
125
+ currentChunk += (currentChunk ? ' ' : '') + sentence;
126
+ }
127
+ lastIndex = match.index + match[0].length;
128
+ }
129
+ const remaining = globalIndex.slice(lastIndex).trim();
130
+ if (remaining) {
131
+ if (currentChunk.length + remaining.length + 1 <= this.#maxChars) {
132
+ currentChunk += (currentChunk ? ' ' : '') + remaining;
133
+ }
134
+ else {
135
+ if (currentChunk)
136
+ chunks.push(currentChunk.trim());
137
+ currentChunk = remaining;
138
+ wasSplit = true;
139
+ }
140
+ }
141
+ if (currentChunk.trim()) {
142
+ chunks.push(currentChunk.trim());
143
+ }
144
+ return { chunks, wasSplit };
145
+ }
146
+ #coalesceChunks(chunks) {
147
+ if (chunks.length <= 1)
148
+ return chunks;
149
+ const result = [];
150
+ for (const chunk of chunks) {
151
+ const lastChunk = result[result.length - 1];
152
+ if (lastChunk && lastChunk.length < this.#minChars && lastChunk.length + chunk.length + 1 <= this.#maxChars) {
153
+ result[result.length - 1] = lastChunk + ' ' + chunk;
154
+ }
155
+ else if (chunk.length < this.#minChars && result.length === 0) {
156
+ result.push(chunk);
157
+ }
158
+ else if (chunk.length < this.#minChars && result.length > 0) {
159
+ result[result.length - 1] = result[result.length - 1] + ' ' + chunk;
160
+ }
161
+ else {
162
+ result.push(chunk);
163
+ }
164
+ }
165
+ return result;
166
+ }
167
+ static ensureCodeFenceClosed(text) {
168
+ const openCount = (text.match(/```/g) || []).length;
169
+ if (openCount % 2 === 1) {
170
+ return text + '\n```';
171
+ }
172
+ return text;
173
+ }
174
+ }