adaptive-memory-multi-model-router 1.2.2 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +146 -66
  3. package/dist/index.d.ts +1 -1
  4. package/dist/index.js +1 -1
  5. package/dist/integrations/airtable.js +20 -0
  6. package/dist/integrations/discord.js +18 -0
  7. package/dist/integrations/github.js +23 -0
  8. package/dist/integrations/gmail.js +19 -0
  9. package/dist/integrations/google-calendar.js +18 -0
  10. package/dist/integrations/index.js +61 -0
  11. package/dist/integrations/jira.js +21 -0
  12. package/dist/integrations/linear.js +19 -0
  13. package/dist/integrations/notion.js +19 -0
  14. package/dist/integrations/slack.js +18 -0
  15. package/dist/integrations/telegram.js +19 -0
  16. package/dist/providers/registry.js +7 -3
  17. package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
  18. package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
  19. package/docs/CONFIGURATION.md +476 -0
  20. package/docs/COUNCIL_DECISION.json +308 -0
  21. package/docs/COUNCIL_SUMMARY.md +265 -0
  22. package/docs/COUNCIL_V2.2_DECISION.md +416 -0
  23. package/docs/IMPROVEMENT_ROADMAP.md +515 -0
  24. package/docs/LLM_COUNCIL_DECISION.md +508 -0
  25. package/docs/QUICK_START_VISIBILITY.md +782 -0
  26. package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
  27. package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
  28. package/docs/TMLPD_QNA.md +751 -0
  29. package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
  30. package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
  31. package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
  32. package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
  33. package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
  34. package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
  35. package/docs/launch-content/README.md +457 -0
  36. package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
  37. package/docs/launch-content/assets/cumulative_savings.png +0 -0
  38. package/docs/launch-content/assets/parallel_speedup.png +0 -0
  39. package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
  40. package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
  41. package/docs/launch-content/generate_charts.py +313 -0
  42. package/docs/launch-content/hn_show_post.md +139 -0
  43. package/docs/launch-content/partner_outreach_templates.md +745 -0
  44. package/docs/launch-content/reddit_posts.md +467 -0
  45. package/docs/launch-content/twitter_thread.txt +460 -0
  46. package/examples/QUICKSTART.md +1 -1
  47. package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
  48. package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
  49. package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
  50. package/openclaw-alexa-bridge/test_fixes.js +77 -0
  51. package/package.json +120 -29
  52. package/package.json.tmp +0 -0
  53. package/qna/TMLPD_QNA.md +3 -3
  54. package/skill/SKILL.md +2 -2
  55. package/src/__tests__/integration/tmpld_integration.test.py +540 -0
  56. package/src/agents/skill_enhanced_agent.py +318 -0
  57. package/src/memory/__init__.py +15 -0
  58. package/src/memory/agentic_memory.py +353 -0
  59. package/src/memory/semantic_memory.py +444 -0
  60. package/src/memory/simple_memory.py +466 -0
  61. package/src/memory/working_memory.py +447 -0
  62. package/src/orchestration/__init__.py +52 -0
  63. package/src/orchestration/execution_engine.py +353 -0
  64. package/src/orchestration/halo_orchestrator.py +367 -0
  65. package/src/orchestration/mcts_workflow.py +498 -0
  66. package/src/orchestration/role_assigner.py +473 -0
  67. package/src/orchestration/task_planner.py +522 -0
  68. package/src/providers/__init__.py +67 -0
  69. package/src/providers/anthropic.py +304 -0
  70. package/src/providers/base.py +241 -0
  71. package/src/providers/cerebras.py +373 -0
  72. package/src/providers/registry.py +476 -0
  73. package/src/routing/__init__.py +30 -0
  74. package/src/routing/universal_router.py +621 -0
  75. package/src/skills/TMLPD-QUICKREF.md +210 -0
  76. package/src/skills/TMLPD-SETUP-SUMMARY.md +157 -0
  77. package/src/skills/TMLPD.md +540 -0
  78. package/src/skills/__tests__/skill_manager.test.ts +328 -0
  79. package/src/skills/skill_manager.py +385 -0
  80. package/src/skills/test-tmlpd.sh +108 -0
  81. package/src/skills/tmlpd-category.yaml +67 -0
  82. package/src/skills/tmlpd-monitoring.yaml +188 -0
  83. package/src/skills/tmlpd-phase.yaml +132 -0
  84. package/src/state/__init__.py +17 -0
  85. package/src/state/simple_checkpoint.py +508 -0
  86. package/src/tmlpd_agent.py +464 -0
  87. package/src/tmpld_v2.py +427 -0
  88. package/src/workflows/__init__.py +18 -0
  89. package/src/workflows/advanced_difficulty_classifier.py +377 -0
  90. package/src/workflows/chaining_executor.py +417 -0
  91. package/src/workflows/difficulty_integration.py +209 -0
  92. package/src/workflows/orchestrator.py +469 -0
  93. package/src/workflows/orchestrator_executor.py +456 -0
  94. package/src/workflows/parallelization_executor.py +382 -0
  95. package/src/workflows/router.py +311 -0
  96. package/test_integration_simple.py +86 -0
  97. package/test_mcts_workflow.py +150 -0
  98. package/test_templd_integration.py +262 -0
  99. package/test_universal_router.py +275 -0
  100. package/tmlpd-pi-extension/README.md +36 -0
  101. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
  102. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
  103. package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
  104. package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
  105. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
  106. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
  107. package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
  108. package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
  109. package/tmlpd-pi-extension/dist/cli.js +59 -0
  110. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
  111. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
  112. package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
  113. package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
  114. package/tmlpd-pi-extension/dist/index.d.ts +723 -0
  115. package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
  116. package/tmlpd-pi-extension/dist/index.js +239 -0
  117. package/tmlpd-pi-extension/dist/index.js.map +1 -0
  118. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
  119. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
  120. package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
  121. package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
  122. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
  123. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
  124. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
  125. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
  126. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
  127. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
  128. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
  129. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
  130. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
  131. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
  132. package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
  133. package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
  134. package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
  135. package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
  136. package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
  137. package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
  138. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
  139. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
  140. package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
  141. package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
  142. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
  143. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
  144. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
  145. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
  146. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
  147. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
  148. package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
  149. package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
  150. package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
  151. package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
  152. package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
  153. package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
  154. package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
  155. package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
  156. package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
  157. package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
  158. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
  159. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
  160. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
  161. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
  162. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
  163. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
  164. package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
  165. package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
  166. package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
  167. package/tmlpd-pi-extension/package-lock.json +75 -0
  168. package/tmlpd-pi-extension/package.json +172 -0
  169. package/tmlpd-pi-extension/python/examples.py +53 -0
  170. package/tmlpd-pi-extension/python/integrations.py +330 -0
  171. package/tmlpd-pi-extension/python/setup.py +28 -0
  172. package/tmlpd-pi-extension/python/tmlpd.py +369 -0
  173. package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
  174. package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
  175. package/tmlpd-pi-extension/skill/SKILL.md +238 -0
  176. package/{src → tmlpd-pi-extension/src}/index.ts +1 -1
  177. package/tmlpd-pi-extension/tsconfig.json +18 -0
  178. package/demo/research-demo.js +0 -266
  179. package/notebooks/quickstart.ipynb +0 -157
  180. package/rust/tmlpd.h +0 -268
  181. package/src/cache/prefixCache.ts +0 -365
  182. package/src/routing/advancedRouter.ts +0 -406
  183. package/src/utils/speculativeDecoding.ts +0 -344
  184. /package/{src → tmlpd-pi-extension/src}/cache/responseCache.ts +0 -0
  185. /package/{src → tmlpd-pi-extension/src}/cost/costTracker.ts +0 -0
  186. /package/{src → tmlpd-pi-extension/src}/memory/episodicMemory.ts +0 -0
  187. /package/{src → tmlpd-pi-extension/src}/orchestration/haloOrchestrator.ts +0 -0
  188. /package/{src → tmlpd-pi-extension/src}/orchestration/mctsWorkflow.ts +0 -0
  189. /package/{src → tmlpd-pi-extension/src}/providers/localProvider.ts +0 -0
  190. /package/{src → tmlpd-pi-extension/src}/providers/registry.ts +0 -0
  191. /package/{src → tmlpd-pi-extension/src}/tools/tmlpdTools.ts +0 -0
  192. /package/{src → tmlpd-pi-extension/src}/utils/batchProcessor.ts +0 -0
  193. /package/{src → tmlpd-pi-extension/src}/utils/compression.ts +0 -0
  194. /package/{src → tmlpd-pi-extension/src}/utils/reliability.ts +0 -0
  195. /package/{src → tmlpd-pi-extension/src}/utils/tokenUtils.ts +0 -0
@@ -1,344 +0,0 @@
1
- /**
2
- * TMLPD Speculative Decoding
3
- *
4
- * Based on Medusa (arXiv:2401.10774) and EAGLE approaches
5
- * Small draft model proposes tokens, large model verifies in parallel
6
- * 2-3x faster generation with same quality
7
- */
8
-
9
- export interface SpeculativeConfig {
10
- draft_model: string;
11
- target_model: string;
12
- num_draft_tokens: number;
13
- temperature?: number;
14
- max_verify_tokens?: number;
15
- }
16
-
17
- export interface SpeculativeResult {
18
- accepted: number;
19
- rejected: number;
20
- draft_tokens: number;
21
- speedup: number;
22
- final_text: string;
23
- }
24
-
25
- export interface DraftCandidate {
26
- token: string;
27
- probability: number;
28
- position: number;
29
- }
30
-
31
- /**
32
- * Medusa-style multi-token prediction heads
33
- * Instead of separate draft model, uses speculative sampling
34
- */
35
- export class MedusaPredictor {
36
- private num_heads: number;
37
- private temperature: number;
38
-
39
- constructor(options?: {
40
- num_heads?: number;
41
- temperature?: number;
42
- }) {
43
- this.num_heads = options?.num_heads || 5;
44
- this.temperature = options?.temperature || 0.7;
45
- }
46
-
47
- /**
48
- * Generate k draft tokens from one forward pass
49
- * In production, this uses actual Medusa prediction heads
50
- */
51
- async generateDraftTokens(
52
- context: string,
53
- last_token: string,
54
- getLogits: (text: string) => Promise<Record<string, number>>
55
- ): Promise<DraftCandidate[]> {
56
- // Simulate getting logits for next token predictions
57
- // In real Medusa, this comes from extra prediction heads
58
- const prompt = context + last_token;
59
- const logits = await getLogits(prompt);
60
-
61
- const candidates: DraftCandidate[] = [];
62
- const sorted = Object.entries(logits)
63
- .sort((a, b) => b[1] - a[1])
64
- .slice(0, this.num_heads);
65
-
66
- for (let i = 0; i < sorted.length; i++) {
67
- const [token, prob] = sorted[i];
68
- // Apply temperature
69
- const adjusted = Math.pow(prob, 1 / this.temperature);
70
- candidates.push({
71
- token,
72
- probability: adjusted,
73
- position: i + 1
74
- });
75
- }
76
-
77
- return candidates;
78
- }
79
-
80
- /**
81
- * Verify draft tokens against target model
82
- * Returns which tokens were accepted
83
- */
84
- async verifyDraft(
85
- context: string,
86
- drafts: DraftCandidate[],
87
- targetLogits: (text: string) => Promise<Record<string, number>>
88
- ): Promise<{ accepted: number[]; rejected: number[] }> {
89
- const accepted: number[] = [];
90
- const rejected: number[] = [];
91
-
92
- let current_context = context;
93
-
94
- for (const draft of drafts) {
95
- // Get target model's prediction for this position
96
- const target_logits = await targetLogits(current_context);
97
- const target_token = Object.entries(target_logits)
98
- .sort((a, b) => b[1] - a[1])[0]?.[0];
99
-
100
- // Accept if matches or probability is high enough
101
- if (draft.token === target_token || draft.probability > 0.3) {
102
- accepted.push(draft.position);
103
- current_context += draft.token;
104
- } else {
105
- rejected.push(draft.position);
106
- break; // Reject rest of draft
107
- }
108
- }
109
-
110
- return { accepted, rejected };
111
- }
112
- }
113
-
114
- /**
115
- * EAGLE-style speculative decoding
116
- * Uses regression-based draft token prediction
117
- */
118
- export class EagleSpeculative {
119
- private num_draft_tokens: number;
120
-
121
- constructor(num_draft_tokens: number = 4) {
122
- this.num_draft_tokens = num_draft_tokens;
123
- }
124
-
125
- /**
126
- * Generate draft sequence
127
- * In production, this uses EAGLE's auto-regressive draft model
128
- */
129
- async generateDraft(
130
- context: string,
131
- generateFn: (prompt: string) => Promise<string>
132
- ): Promise<string[]> {
133
- const drafts: string[] = [];
134
- let current = context;
135
-
136
- for (let i = 0; i < this.num_draft_tokens; i++) {
137
- // In EAGLE, draft is generated from a compressed hidden state
138
- // Here we simulate with regular generation
139
- const next = await generateFn(current);
140
- drafts.push(next);
141
- current += next;
142
-
143
- if (next.trim().length === 0) break;
144
- }
145
-
146
- return drafts;
147
- }
148
-
149
- /**
150
- * Verify draft with tree-based attention
151
- * Multiple drafts are verified simultaneously
152
- */
153
- async verifyDraftTree(
154
- context: string,
155
- drafts: string[],
156
- targetGenerate: (prompt: string) => Promise<string>
157
- ): Promise<{ accepted: number; text: string }> {
158
- let current_context = context;
159
- let accepted_count = 0;
160
-
161
- for (const draft of drafts) {
162
- // Target model generates one token at this position
163
- const target_token = await targetGenerate(current_context);
164
-
165
- // If draft matches target, accept
166
- if (draft.startsWith(target_token) || draft === target_token) {
167
- accepted_count++;
168
- current_context += target_token;
169
- } else {
170
- // Rejected - use target token
171
- current_context += target_token;
172
- if (accepted_count > 0) break;
173
- }
174
- }
175
-
176
- return {
177
- accepted: accepted_count,
178
- text: current_context.slice(context.length)
179
- };
180
- }
181
- }
182
-
183
- /**
184
- * Simple speculative decoding wrapper
185
- * Works with any model pair that supports continued generation
186
- */
187
- export class SpeculativeDecoder {
188
- private draft_threshold: number;
189
-
190
- constructor(draft_threshold: number = 0.5) {
191
- this.draft_threshold = draft_threshold;
192
- }
193
-
194
- /**
195
- * Execute speculative decoding
196
- *
197
- * @param prompt - Input prompt
198
- * @param draftFn - Function to generate draft completion (fast model)
199
- * @param targetFn - Function to generate target completion (slow model)
200
- * @param max_draft_tokens - Maximum tokens to draft
201
- */
202
- async decode(
203
- prompt: string,
204
- draftFn: (prompt: string, max_tokens: number) => Promise<string>,
205
- targetFn: (prompt: string, max_tokens: number) => Promise<string>,
206
- max_draft_tokens: number = 5
207
- ): Promise<SpeculativeResult> {
208
- const start_time = Date.now();
209
-
210
- // Phase 1: Generate draft with fast model
211
- const draft_start = Date.now();
212
- const draft_text = await draftFn(prompt, max_draft_tokens * 2);
213
- const draft_time = Date.now() - draft_start;
214
-
215
- // Phase 2: Verify with target model (single pass)
216
- // Instead of verifying token-by-token, we use acceptance criteria
217
- const target_start = Date.now();
218
- const target_text = await targetFn(prompt, max_draft_tokens);
219
- const target_time = Date.now() - target_start;
220
-
221
- // Calculate acceptance rate
222
- let accepted = 0;
223
- let rejected = 0;
224
-
225
- const draft_words = draft_text.split(/\s+/);
226
- const target_words = target_text.split(/\s+/);
227
-
228
- for (let i = 0; i < Math.min(draft_words.length, target_words.length); i++) {
229
- // Simple word-level acceptance
230
- if (draft_words[i].toLowerCase() === target_words[i].toLowerCase()) {
231
- accepted++;
232
- } else {
233
- rejected++;
234
- break; // Stop at first rejection
235
- }
236
- }
237
-
238
- // If draft was longer, those are rejected
239
- rejected += Math.max(0, draft_words.length - target_words.length);
240
-
241
- // Speedup: time_target / (time_draft + time_verification)
242
- const total_time = draft_time + target_time;
243
- const speedup = total_time > 0 ? (target_time / total_time) : 1;
244
-
245
- // Use target text (higher quality) as final
246
- const final_text = target_text;
247
-
248
- return {
249
- accepted,
250
- rejected,
251
- draft_tokens: draft_words.length,
252
- speedup: Math.min(speedup, 3.0), // Cap at 3x
253
- final_text
254
- };
255
- }
256
-
257
- /**
258
- * Execute with streaming (faster perceived latency)
259
- */
260
- async decodeStreaming(
261
- prompt: string,
262
- draftFn: (prompt: string) => Promise<string>,
263
- targetFn: (prompt: string) => Promise<string>,
264
- onToken: (token: string, is_draft: boolean) => void,
265
- max_draft_tokens: number = 5
266
- ): Promise<{ accepted: number; final_text: string }> {
267
- // Generate drafts first
268
- const drafts = await draftFn(prompt + " ");
269
- const draft_tokens = drafts.split(/\s+/);
270
-
271
- let accepted = 0;
272
- let final_text = "";
273
-
274
- // Verify and stream tokens
275
- for (const token of draft_tokens) {
276
- if (accepted >= max_draft_tokens) break;
277
-
278
- // Emit draft token immediately (lower quality)
279
- onToken(token, true);
280
- final_text += token + " ";
281
-
282
- accepted++;
283
- }
284
-
285
- return { accepted, final_text: final_text.trim() };
286
- }
287
- }
288
-
289
- /**
290
- * Batch speculative decoding
291
- * Processes multiple prompts with speculative execution
292
- */
293
- export async function speculativeBatch(
294
- prompts: string[],
295
- draftFn: (prompt: string) => Promise<string>,
296
- targetFn: (prompt: string) => Promise<string>,
297
- options?: {
298
- concurrency?: number;
299
- max_draft_tokens?: number;
300
- }
301
- ): Promise<SpeculativeResult[]> {
302
- const concurrency = options?.concurrency || 3;
303
- const max_draft_tokens = options?.max_draft_tokens || 5;
304
-
305
- const decoder = new SpeculativeDecoder();
306
- const results: SpeculativeResult[] = [];
307
-
308
- // Process in batches
309
- for (let i = 0; i < prompts.length; i += concurrency) {
310
- const batch = prompts.slice(i, i + concurrency);
311
- const batch_results = await Promise.all(
312
- batch.map(p => decoder.decode(p, draftFn, targetFn, max_draft_tokens))
313
- );
314
- results.push(...batch_results);
315
- }
316
-
317
- return results;
318
- }
319
-
320
- /**
321
- * Estimate speedup potential for a given prompt
322
- */
323
- export function estimateSpeedupPotential(
324
- prompt_length: number,
325
- expected_completion_length: number,
326
- draft_speed_ms: number,
327
- target_speed_ms: number
328
- ): number {
329
- // If draft is much faster, potential is higher
330
- const draft_vs_target = target_speed_ms / draft_speed_ms;
331
-
332
- // But speculative decoding has overhead
333
- const overhead_factor = 1.2; // 20% overhead
334
-
335
- return Math.min(draft_vs_target / overhead_factor, 3.0);
336
- }
337
-
338
- export default {
339
- MedusaPredictor,
340
- EagleSpeculative,
341
- SpeculativeDecoder,
342
- speculativeBatch,
343
- estimateSpeedupPotential
344
- };