adaptive-memory-multi-model-router 1.2.2 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +146 -66
  3. package/dist/index.d.ts +1 -1
  4. package/dist/index.js +1 -1
  5. package/dist/integrations/airtable.js +20 -0
  6. package/dist/integrations/discord.js +18 -0
  7. package/dist/integrations/github.js +23 -0
  8. package/dist/integrations/gmail.js +19 -0
  9. package/dist/integrations/google-calendar.js +18 -0
  10. package/dist/integrations/index.js +61 -0
  11. package/dist/integrations/jira.js +21 -0
  12. package/dist/integrations/linear.js +19 -0
  13. package/dist/integrations/notion.js +19 -0
  14. package/dist/integrations/slack.js +18 -0
  15. package/dist/integrations/telegram.js +19 -0
  16. package/dist/providers/registry.js +7 -3
  17. package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
  18. package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
  19. package/docs/CONFIGURATION.md +476 -0
  20. package/docs/COUNCIL_DECISION.json +308 -0
  21. package/docs/COUNCIL_SUMMARY.md +265 -0
  22. package/docs/COUNCIL_V2.2_DECISION.md +416 -0
  23. package/docs/IMPROVEMENT_ROADMAP.md +515 -0
  24. package/docs/LLM_COUNCIL_DECISION.md +508 -0
  25. package/docs/QUICK_START_VISIBILITY.md +782 -0
  26. package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
  27. package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
  28. package/docs/TMLPD_QNA.md +751 -0
  29. package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
  30. package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
  31. package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
  32. package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
  33. package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
  34. package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
  35. package/docs/launch-content/README.md +457 -0
  36. package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
  37. package/docs/launch-content/assets/cumulative_savings.png +0 -0
  38. package/docs/launch-content/assets/parallel_speedup.png +0 -0
  39. package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
  40. package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
  41. package/docs/launch-content/generate_charts.py +313 -0
  42. package/docs/launch-content/hn_show_post.md +139 -0
  43. package/docs/launch-content/partner_outreach_templates.md +745 -0
  44. package/docs/launch-content/reddit_posts.md +467 -0
  45. package/docs/launch-content/twitter_thread.txt +460 -0
  46. package/examples/QUICKSTART.md +1 -1
  47. package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
  48. package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
  49. package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
  50. package/openclaw-alexa-bridge/test_fixes.js +77 -0
  51. package/package.json +120 -29
  52. package/package.json.tmp +0 -0
  53. package/qna/TMLPD_QNA.md +3 -3
  54. package/skill/SKILL.md +2 -2
  55. package/src/__tests__/integration/tmpld_integration.test.py +540 -0
  56. package/src/agents/skill_enhanced_agent.py +318 -0
  57. package/src/memory/__init__.py +15 -0
  58. package/src/memory/agentic_memory.py +353 -0
  59. package/src/memory/semantic_memory.py +444 -0
  60. package/src/memory/simple_memory.py +466 -0
  61. package/src/memory/working_memory.py +447 -0
  62. package/src/orchestration/__init__.py +52 -0
  63. package/src/orchestration/execution_engine.py +353 -0
  64. package/src/orchestration/halo_orchestrator.py +367 -0
  65. package/src/orchestration/mcts_workflow.py +498 -0
  66. package/src/orchestration/role_assigner.py +473 -0
  67. package/src/orchestration/task_planner.py +522 -0
  68. package/src/providers/__init__.py +67 -0
  69. package/src/providers/anthropic.py +304 -0
  70. package/src/providers/base.py +241 -0
  71. package/src/providers/cerebras.py +373 -0
  72. package/src/providers/registry.py +476 -0
  73. package/src/routing/__init__.py +30 -0
  74. package/src/routing/universal_router.py +621 -0
  75. package/src/skills/TMLPD-QUICKREF.md +210 -0
  76. package/src/skills/TMLPD-SETUP-SUMMARY.md +157 -0
  77. package/src/skills/TMLPD.md +540 -0
  78. package/src/skills/__tests__/skill_manager.test.ts +328 -0
  79. package/src/skills/skill_manager.py +385 -0
  80. package/src/skills/test-tmlpd.sh +108 -0
  81. package/src/skills/tmlpd-category.yaml +67 -0
  82. package/src/skills/tmlpd-monitoring.yaml +188 -0
  83. package/src/skills/tmlpd-phase.yaml +132 -0
  84. package/src/state/__init__.py +17 -0
  85. package/src/state/simple_checkpoint.py +508 -0
  86. package/src/tmlpd_agent.py +464 -0
  87. package/src/tmpld_v2.py +427 -0
  88. package/src/workflows/__init__.py +18 -0
  89. package/src/workflows/advanced_difficulty_classifier.py +377 -0
  90. package/src/workflows/chaining_executor.py +417 -0
  91. package/src/workflows/difficulty_integration.py +209 -0
  92. package/src/workflows/orchestrator.py +469 -0
  93. package/src/workflows/orchestrator_executor.py +456 -0
  94. package/src/workflows/parallelization_executor.py +382 -0
  95. package/src/workflows/router.py +311 -0
  96. package/test_integration_simple.py +86 -0
  97. package/test_mcts_workflow.py +150 -0
  98. package/test_templd_integration.py +262 -0
  99. package/test_universal_router.py +275 -0
  100. package/tmlpd-pi-extension/README.md +36 -0
  101. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
  102. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
  103. package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
  104. package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
  105. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
  106. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
  107. package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
  108. package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
  109. package/tmlpd-pi-extension/dist/cli.js +59 -0
  110. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
  111. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
  112. package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
  113. package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
  114. package/tmlpd-pi-extension/dist/index.d.ts +723 -0
  115. package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
  116. package/tmlpd-pi-extension/dist/index.js +239 -0
  117. package/tmlpd-pi-extension/dist/index.js.map +1 -0
  118. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
  119. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
  120. package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
  121. package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
  122. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
  123. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
  124. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
  125. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
  126. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
  127. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
  128. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
  129. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
  130. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
  131. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
  132. package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
  133. package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
  134. package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
  135. package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
  136. package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
  137. package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
  138. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
  139. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
  140. package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
  141. package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
  142. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
  143. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
  144. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
  145. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
  146. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
  147. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
  148. package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
  149. package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
  150. package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
  151. package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
  152. package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
  153. package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
  154. package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
  155. package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
  156. package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
  157. package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
  158. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
  159. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
  160. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
  161. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
  162. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
  163. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
  164. package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
  165. package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
  166. package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
  167. package/tmlpd-pi-extension/package-lock.json +75 -0
  168. package/tmlpd-pi-extension/package.json +172 -0
  169. package/tmlpd-pi-extension/python/examples.py +53 -0
  170. package/tmlpd-pi-extension/python/integrations.py +330 -0
  171. package/tmlpd-pi-extension/python/setup.py +28 -0
  172. package/tmlpd-pi-extension/python/tmlpd.py +369 -0
  173. package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
  174. package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
  175. package/tmlpd-pi-extension/skill/SKILL.md +238 -0
  176. package/{src → tmlpd-pi-extension/src}/index.ts +1 -1
  177. package/tmlpd-pi-extension/tsconfig.json +18 -0
  178. package/demo/research-demo.js +0 -266
  179. package/notebooks/quickstart.ipynb +0 -157
  180. package/rust/tmlpd.h +0 -268
  181. package/src/cache/prefixCache.ts +0 -365
  182. package/src/routing/advancedRouter.ts +0 -406
  183. package/src/utils/speculativeDecoding.ts +0 -344
  184. /package/{src → tmlpd-pi-extension/src}/cache/responseCache.ts +0 -0
  185. /package/{src → tmlpd-pi-extension/src}/cost/costTracker.ts +0 -0
  186. /package/{src → tmlpd-pi-extension/src}/memory/episodicMemory.ts +0 -0
  187. /package/{src → tmlpd-pi-extension/src}/orchestration/haloOrchestrator.ts +0 -0
  188. /package/{src → tmlpd-pi-extension/src}/orchestration/mctsWorkflow.ts +0 -0
  189. /package/{src → tmlpd-pi-extension/src}/providers/localProvider.ts +0 -0
  190. /package/{src → tmlpd-pi-extension/src}/providers/registry.ts +0 -0
  191. /package/{src → tmlpd-pi-extension/src}/tools/tmlpdTools.ts +0 -0
  192. /package/{src → tmlpd-pi-extension/src}/utils/batchProcessor.ts +0 -0
  193. /package/{src → tmlpd-pi-extension/src}/utils/compression.ts +0 -0
  194. /package/{src → tmlpd-pi-extension/src}/utils/reliability.ts +0 -0
  195. /package/{src → tmlpd-pi-extension/src}/utils/tokenUtils.ts +0 -0
@@ -0,0 +1,117 @@
1
+ /**
2
+ * TMLPD Speculative Decoding
3
+ *
4
+ * Based on Medusa (arXiv:2401.10774) and EAGLE approaches
5
+ * Small draft model proposes tokens, large model verifies in parallel
6
+ * 2-3x faster generation with same quality
7
+ */
8
+ export interface SpeculativeConfig {
9
+ draft_model: string;
10
+ target_model: string;
11
+ num_draft_tokens: number;
12
+ temperature?: number;
13
+ max_verify_tokens?: number;
14
+ }
15
+ export interface SpeculativeResult {
16
+ accepted: number;
17
+ rejected: number;
18
+ draft_tokens: number;
19
+ speedup: number;
20
+ final_text: string;
21
+ }
22
+ export interface DraftCandidate {
23
+ token: string;
24
+ probability: number;
25
+ position: number;
26
+ }
27
+ /**
28
+ * Medusa-style multi-token prediction heads
29
+ * Instead of separate draft model, uses speculative sampling
30
+ */
31
+ export declare class MedusaPredictor {
32
+ private num_heads;
33
+ private temperature;
34
+ constructor(options?: {
35
+ num_heads?: number;
36
+ temperature?: number;
37
+ });
38
+ /**
39
+ * Generate k draft tokens from one forward pass
40
+ * In production, this uses actual Medusa prediction heads
41
+ */
42
+ generateDraftTokens(context: string, last_token: string, getLogits: (text: string) => Promise<Record<string, number>>): Promise<DraftCandidate[]>;
43
+ /**
44
+ * Verify draft tokens against target model
45
+ * Returns which tokens were accepted
46
+ */
47
+ verifyDraft(context: string, drafts: DraftCandidate[], targetLogits: (text: string) => Promise<Record<string, number>>): Promise<{
48
+ accepted: number[];
49
+ rejected: number[];
50
+ }>;
51
+ }
52
+ /**
53
+ * EAGLE-style speculative decoding
54
+ * Uses regression-based draft token prediction
55
+ */
56
+ export declare class EagleSpeculative {
57
+ private num_draft_tokens;
58
+ constructor(num_draft_tokens?: number);
59
+ /**
60
+ * Generate draft sequence
61
+ * In production, this uses EAGLE's auto-regressive draft model
62
+ */
63
+ generateDraft(context: string, generateFn: (prompt: string) => Promise<string>): Promise<string[]>;
64
+ /**
65
+ * Verify draft with tree-based attention
66
+ * Multiple drafts are verified simultaneously
67
+ */
68
+ verifyDraftTree(context: string, drafts: string[], targetGenerate: (prompt: string) => Promise<string>): Promise<{
69
+ accepted: number;
70
+ text: string;
71
+ }>;
72
+ }
73
+ /**
74
+ * Simple speculative decoding wrapper
75
+ * Works with any model pair that supports continued generation
76
+ */
77
+ export declare class SpeculativeDecoder {
78
+ private draft_threshold;
79
+ constructor(draft_threshold?: number);
80
+ /**
81
+ * Execute speculative decoding
82
+ *
83
+ * @param prompt - Input prompt
84
+ * @param draftFn - Function to generate draft completion (fast model)
85
+ * @param targetFn - Function to generate target completion (slow model)
86
+ * @param max_draft_tokens - Maximum tokens to draft
87
+ */
88
+ decode(prompt: string, draftFn: (prompt: string, max_tokens: number) => Promise<string>, targetFn: (prompt: string, max_tokens: number) => Promise<string>, max_draft_tokens?: number): Promise<SpeculativeResult>;
89
+ /**
90
+ * Execute with streaming (faster perceived latency)
91
+ */
92
+ decodeStreaming(prompt: string, draftFn: (prompt: string) => Promise<string>, targetFn: (prompt: string) => Promise<string>, onToken: (token: string, is_draft: boolean) => void, max_draft_tokens?: number): Promise<{
93
+ accepted: number;
94
+ final_text: string;
95
+ }>;
96
+ }
97
+ /**
98
+ * Batch speculative decoding
99
+ * Processes multiple prompts with speculative execution
100
+ */
101
+ export declare function speculativeBatch(prompts: string[], draftFn: (prompt: string) => Promise<string>, targetFn: (prompt: string) => Promise<string>, options?: {
102
+ concurrency?: number;
103
+ max_draft_tokens?: number;
104
+ }): Promise<SpeculativeResult[]>;
105
+ /**
106
+ * Estimate speedup potential for a given prompt
107
+ */
108
+ export declare function estimateSpeedupPotential(prompt_length: number, expected_completion_length: number, draft_speed_ms: number, target_speed_ms: number): number;
109
+ declare const _default: {
110
+ MedusaPredictor: typeof MedusaPredictor;
111
+ EagleSpeculative: typeof EagleSpeculative;
112
+ SpeculativeDecoder: typeof SpeculativeDecoder;
113
+ speculativeBatch: typeof speculativeBatch;
114
+ estimateSpeedupPotential: typeof estimateSpeedupPotential;
115
+ };
116
+ export default _default;
117
+ //# sourceMappingURL=speculativeDecoding.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"speculativeDecoding.d.ts","sourceRoot":"","sources":["../../src/utils/speculativeDecoding.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,MAAM,WAAW,iBAAiB;IAChC,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;;GAGG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,WAAW,CAAS;gBAEhB,OAAO,CAAC,EAAE;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB;IAKD;;;OAGG;IACG,mBAAmB,CACvB,OAAO,EAAE,MAAM,EACf,UAAU,EAAE,MAAM,EAClB,SAAS,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,GAC3D,OAAO,CAAC,cAAc,EAAE,CAAC;IAyB5B;;;OAGG;IACG,WAAW,CACf,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,cAAc,EAAE,EACxB,YAAY,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,GAC9D,OAAO,CAAC;QAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;CAwBvD;AAED;;;GAGG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,gBAAgB,CAAS;gBAErB,gBAAgB,GAAE,MAAU;IAIxC;;;OAGG;IACG,aAAa,CACjB,OAAO,EAAE,MAAM,EACf,UAAU,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAC9C,OAAO,CAAC,MAAM,EAAE,CAAC;IAiBpB;;;OAGG;IACG,eAAe,CACnB,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,EAAE,EAChB,cAAc,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAClD,OAAO,CAAC;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;CAwB/C;AAED;;;GAGG;AACH,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,eAAe,CAAS;gBAEpB,eAAe,GAAE,MAAY;IAIzC;;;;;;;OAOG;IACG,MAAM,CACV,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,EAChE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,EACjE,gBAAgB,GAAE,MAAU,GAC3B,OAAO,CAAC,iBAAiB,CAAC;IAkD7B;;OAEG;IACG,eAAe,CACnB,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,EAC5C,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,EAC7C,OAAO,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,KAAK,IAAI,EACnD,gBAAgB,GAAE,MAAU,GAC3B,OAAO,CAAC;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC;CAqBrD;AAED;;;GAGG;AACH,wBAAsB,gBAAgB,CACpC,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,EAC5C,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,EAC7C,OAAO,CAAC,EAAE;IACR,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B,GACA,OAAO,CAAC,iBAAiB,EAAE,CAAC,CAiB9B;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CACtC,aAAa,EAAE,MAAM,EACrB,0BAA0B,EAAE,MAAM,EAClC,cAAc,EAAE,MAAM,EACtB,eAAe,EAAE,MAAM,GACtB,MAAM,CAQR;;;;;;;;AAED,wBAME"}
@@ -0,0 +1,246 @@
1
+ "use strict";
2
+ /**
3
+ * TMLPD Speculative Decoding
4
+ *
5
+ * Based on Medusa (arXiv:2401.10774) and EAGLE approaches
6
+ * Small draft model proposes tokens, large model verifies in parallel
7
+ * 2-3x faster generation with same quality
8
+ */
9
+ Object.defineProperty(exports, "__esModule", { value: true });
10
+ exports.SpeculativeDecoder = exports.EagleSpeculative = exports.MedusaPredictor = void 0;
11
+ exports.speculativeBatch = speculativeBatch;
12
+ exports.estimateSpeedupPotential = estimateSpeedupPotential;
13
+ /**
14
+ * Medusa-style multi-token prediction heads
15
+ * Instead of separate draft model, uses speculative sampling
16
+ */
17
+ class MedusaPredictor {
18
+ num_heads;
19
+ temperature;
20
+ constructor(options) {
21
+ this.num_heads = options?.num_heads || 5;
22
+ this.temperature = options?.temperature || 0.7;
23
+ }
24
+ /**
25
+ * Generate k draft tokens from one forward pass
26
+ * In production, this uses actual Medusa prediction heads
27
+ */
28
+ async generateDraftTokens(context, last_token, getLogits) {
29
+ // Simulate getting logits for next token predictions
30
+ // In real Medusa, this comes from extra prediction heads
31
+ const prompt = context + last_token;
32
+ const logits = await getLogits(prompt);
33
+ const candidates = [];
34
+ const sorted = Object.entries(logits)
35
+ .sort((a, b) => b[1] - a[1])
36
+ .slice(0, this.num_heads);
37
+ for (let i = 0; i < sorted.length; i++) {
38
+ const [token, prob] = sorted[i];
39
+ // Apply temperature
40
+ const adjusted = Math.pow(prob, 1 / this.temperature);
41
+ candidates.push({
42
+ token,
43
+ probability: adjusted,
44
+ position: i + 1
45
+ });
46
+ }
47
+ return candidates;
48
+ }
49
+ /**
50
+ * Verify draft tokens against target model
51
+ * Returns which tokens were accepted
52
+ */
53
+ async verifyDraft(context, drafts, targetLogits) {
54
+ const accepted = [];
55
+ const rejected = [];
56
+ let current_context = context;
57
+ for (const draft of drafts) {
58
+ // Get target model's prediction for this position
59
+ const target_logits = await targetLogits(current_context);
60
+ const target_token = Object.entries(target_logits)
61
+ .sort((a, b) => b[1] - a[1])[0]?.[0];
62
+ // Accept if matches or probability is high enough
63
+ if (draft.token === target_token || draft.probability > 0.3) {
64
+ accepted.push(draft.position);
65
+ current_context += draft.token;
66
+ }
67
+ else {
68
+ rejected.push(draft.position);
69
+ break; // Reject rest of draft
70
+ }
71
+ }
72
+ return { accepted, rejected };
73
+ }
74
+ }
75
+ exports.MedusaPredictor = MedusaPredictor;
76
+ /**
77
+ * EAGLE-style speculative decoding
78
+ * Uses regression-based draft token prediction
79
+ */
80
+ class EagleSpeculative {
81
+ num_draft_tokens;
82
+ constructor(num_draft_tokens = 4) {
83
+ this.num_draft_tokens = num_draft_tokens;
84
+ }
85
+ /**
86
+ * Generate draft sequence
87
+ * In production, this uses EAGLE's auto-regressive draft model
88
+ */
89
+ async generateDraft(context, generateFn) {
90
+ const drafts = [];
91
+ let current = context;
92
+ for (let i = 0; i < this.num_draft_tokens; i++) {
93
+ // In EAGLE, draft is generated from a compressed hidden state
94
+ // Here we simulate with regular generation
95
+ const next = await generateFn(current);
96
+ drafts.push(next);
97
+ current += next;
98
+ if (next.trim().length === 0)
99
+ break;
100
+ }
101
+ return drafts;
102
+ }
103
+ /**
104
+ * Verify draft with tree-based attention
105
+ * Multiple drafts are verified simultaneously
106
+ */
107
+ async verifyDraftTree(context, drafts, targetGenerate) {
108
+ let current_context = context;
109
+ let accepted_count = 0;
110
+ for (const draft of drafts) {
111
+ // Target model generates one token at this position
112
+ const target_token = await targetGenerate(current_context);
113
+ // If draft matches target, accept
114
+ if (draft.startsWith(target_token) || draft === target_token) {
115
+ accepted_count++;
116
+ current_context += target_token;
117
+ }
118
+ else {
119
+ // Rejected - use target token
120
+ current_context += target_token;
121
+ if (accepted_count > 0)
122
+ break;
123
+ }
124
+ }
125
+ return {
126
+ accepted: accepted_count,
127
+ text: current_context.slice(context.length)
128
+ };
129
+ }
130
+ }
131
+ exports.EagleSpeculative = EagleSpeculative;
132
+ /**
133
+ * Simple speculative decoding wrapper
134
+ * Works with any model pair that supports continued generation
135
+ */
136
+ class SpeculativeDecoder {
137
+ draft_threshold;
138
+ constructor(draft_threshold = 0.5) {
139
+ this.draft_threshold = draft_threshold;
140
+ }
141
+ /**
142
+ * Execute speculative decoding
143
+ *
144
+ * @param prompt - Input prompt
145
+ * @param draftFn - Function to generate draft completion (fast model)
146
+ * @param targetFn - Function to generate target completion (slow model)
147
+ * @param max_draft_tokens - Maximum tokens to draft
148
+ */
149
+ async decode(prompt, draftFn, targetFn, max_draft_tokens = 5) {
150
+ const start_time = Date.now();
151
+ // Phase 1: Generate draft with fast model
152
+ const draft_start = Date.now();
153
+ const draft_text = await draftFn(prompt, max_draft_tokens * 2);
154
+ const draft_time = Date.now() - draft_start;
155
+ // Phase 2: Verify with target model (single pass)
156
+ // Instead of verifying token-by-token, we use acceptance criteria
157
+ const target_start = Date.now();
158
+ const target_text = await targetFn(prompt, max_draft_tokens);
159
+ const target_time = Date.now() - target_start;
160
+ // Calculate acceptance rate
161
+ let accepted = 0;
162
+ let rejected = 0;
163
+ const draft_words = draft_text.split(/\s+/);
164
+ const target_words = target_text.split(/\s+/);
165
+ for (let i = 0; i < Math.min(draft_words.length, target_words.length); i++) {
166
+ // Simple word-level acceptance
167
+ if (draft_words[i].toLowerCase() === target_words[i].toLowerCase()) {
168
+ accepted++;
169
+ }
170
+ else {
171
+ rejected++;
172
+ break; // Stop at first rejection
173
+ }
174
+ }
175
+ // If draft was longer, those are rejected
176
+ rejected += Math.max(0, draft_words.length - target_words.length);
177
+ // Speedup: time_target / (time_draft + time_verification)
178
+ const total_time = draft_time + target_time;
179
+ const speedup = total_time > 0 ? (target_time / total_time) : 1;
180
+ // Use target text (higher quality) as final
181
+ const final_text = target_text;
182
+ return {
183
+ accepted,
184
+ rejected,
185
+ draft_tokens: draft_words.length,
186
+ speedup: Math.min(speedup, 3.0), // Cap at 3x
187
+ final_text
188
+ };
189
+ }
190
+ /**
191
+ * Execute with streaming (faster perceived latency)
192
+ */
193
+ async decodeStreaming(prompt, draftFn, targetFn, onToken, max_draft_tokens = 5) {
194
+ // Generate drafts first
195
+ const drafts = await draftFn(prompt + " ");
196
+ const draft_tokens = drafts.split(/\s+/);
197
+ let accepted = 0;
198
+ let final_text = "";
199
+ // Verify and stream tokens
200
+ for (const token of draft_tokens) {
201
+ if (accepted >= max_draft_tokens)
202
+ break;
203
+ // Emit draft token immediately (lower quality)
204
+ onToken(token, true);
205
+ final_text += token + " ";
206
+ accepted++;
207
+ }
208
+ return { accepted, final_text: final_text.trim() };
209
+ }
210
+ }
211
+ exports.SpeculativeDecoder = SpeculativeDecoder;
212
+ /**
213
+ * Batch speculative decoding
214
+ * Processes multiple prompts with speculative execution
215
+ */
216
+ async function speculativeBatch(prompts, draftFn, targetFn, options) {
217
+ const concurrency = options?.concurrency || 3;
218
+ const max_draft_tokens = options?.max_draft_tokens || 5;
219
+ const decoder = new SpeculativeDecoder();
220
+ const results = [];
221
+ // Process in batches
222
+ for (let i = 0; i < prompts.length; i += concurrency) {
223
+ const batch = prompts.slice(i, i + concurrency);
224
+ const batch_results = await Promise.all(batch.map(p => decoder.decode(p, draftFn, targetFn, max_draft_tokens)));
225
+ results.push(...batch_results);
226
+ }
227
+ return results;
228
+ }
229
+ /**
230
+ * Estimate speedup potential for a given prompt
231
+ */
232
+ function estimateSpeedupPotential(prompt_length, expected_completion_length, draft_speed_ms, target_speed_ms) {
233
+ // If draft is much faster, potential is higher
234
+ const draft_vs_target = target_speed_ms / draft_speed_ms;
235
+ // But speculative decoding has overhead
236
+ const overhead_factor = 1.2; // 20% overhead
237
+ return Math.min(draft_vs_target / overhead_factor, 3.0);
238
+ }
239
+ exports.default = {
240
+ MedusaPredictor,
241
+ EagleSpeculative,
242
+ SpeculativeDecoder,
243
+ speculativeBatch,
244
+ estimateSpeedupPotential
245
+ };
246
+ //# sourceMappingURL=speculativeDecoding.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"speculativeDecoding.js","sourceRoot":"","sources":["../../src/utils/speculativeDecoding.ts"],"names":[],"mappings":";AAAA;;;;;;GAMG;;;AA8RH,4CAyBC;AAKD,4DAaC;AAjTD;;;GAGG;AACH,MAAa,eAAe;IAClB,SAAS,CAAS;IAClB,WAAW,CAAS;IAE5B,YAAY,OAGX;QACC,IAAI,CAAC,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,CAAC,CAAC;QACzC,IAAI,CAAC,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,GAAG,CAAC;IACjD,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,mBAAmB,CACvB,OAAe,EACf,UAAkB,EAClB,SAA4D;QAE5D,qDAAqD;QACrD,yDAAyD;QACzD,MAAM,MAAM,GAAG,OAAO,GAAG,UAAU,CAAC;QACpC,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,MAAM,CAAC,CAAC;QAEvC,MAAM,UAAU,GAAqB,EAAE,CAAC;QACxC,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC;aAClC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;aAC3B,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YAChC,oBAAoB;YACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC;YACtD,UAAU,CAAC,IAAI,CAAC;gBACd,KAAK;gBACL,WAAW,EAAE,QAAQ;gBACrB,QAAQ,EAAE,CAAC,GAAG,CAAC;aAChB,CAAC,CAAC;QACL,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,WAAW,CACf,OAAe,EACf,MAAwB,EACxB,YAA+D;QAE/D,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,IAAI,eAAe,GAAG,OAAO,CAAC;QAE9B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,kDAAkD;YAClD,MAAM,aAAa,GAAG,MAAM,YAAY,CAAC,eAAe,CAAC,CAAC;YAC1D,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC;iBAC/C,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YAEvC,kDAAkD;YAClD,IAAI,KAAK,CAAC,KAAK,KAAK,YAAY,IAAI,KAAK,CAAC,WAAW,GAAG,GAAG,EAAE,CAAC;gBAC5D,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;gBAC9B,eAAe,IAAI,KAAK,CAAC,KAAK,CAAC;YACjC,CAAC;iBAAM,CAAC;gBACN,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;gBAC9B,MAAM,CAAE,uBAAuB;YACjC,CAAC;QACH,CAAC;QAED,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;IAChC,CAAC;CACF;AA7ED,0CA6EC;AAED;;;GAGG;AACH,MAAa,gBAAgB;IACnB,gBAAgB,CAAS;IAEjC,YAAY,mBAA2B,CAAC;QACtC,IAAI,CAAC,gBAAgB,GAAG,gBAAgB,CAAC;IAC3C,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,aAAa,CACjB,OAAe,EACf,UAA+C;QAE/C,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,OAAO,GAAG,OAAO,CAAC;QAEtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,gBAAgB,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,8DAA8D;YAC9D,2CAA2C;YAC3C,MAAM,IAAI,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;YACvC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClB,OAAO,IAAI,IAAI,CAAC;YAEhB,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;gBAAE,MAAM;QACtC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,eAAe,CACnB,OAAe,EACf,MAAgB,EAChB,cAAmD;QAEnD,IAAI,eAAe,GAAG,OAAO,CAAC;QAC9B,IAAI,cAAc,GAAG,CAAC,CAAC;QAEvB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,oDAAoD;YACpD,MAAM,YAAY,GAAG,MAAM,cAAc,CAAC,eAAe,CAAC,CAAC;YAE3D,kCAAkC;YAClC,IAAI,KAAK,CAAC,UAAU,CAAC,YAAY,CAAC,IAAI,KAAK,KAAK,YAAY,EAAE,CAAC;gBAC7D,cAAc,EAAE,CAAC;gBACjB,eAAe,IAAI,YAAY,CAAC;YAClC,CAAC;iBAAM,CAAC;gBACN,8BAA8B;gBAC9B,eAAe,IAAI,YAAY,CAAC;gBAChC,IAAI,cAAc,GAAG,CAAC;oBAAE,MAAM;YAChC,CAAC;QACH,CAAC;QAED,OAAO;YACL,QAAQ,EAAE,cAAc;YACxB,IAAI,EAAE,eAAe,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;SAC5C,CAAC;IACJ,CAAC;CACF;AA/DD,4CA+DC;AAED;;;GAGG;AACH,MAAa,kBAAkB;IACrB,eAAe,CAAS;IAEhC,YAAY,kBAA0B,GAAG;QACvC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACzC,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,MAAM,CACV,MAAc,EACd,OAAgE,EAChE,QAAiE,EACjE,mBAA2B,CAAC;QAE5B,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE9B,0CAA0C;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC/B,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,MAAM,EAAE,gBAAgB,GAAG,CAAC,CAAC,CAAC;QAC/D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,WAAW,CAAC;QAE5C,kDAAkD;QAClD,kEAAkE;QAClE,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAChC,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;QAC7D,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,YAAY,CAAC;QAE9C,4BAA4B;QAC5B,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,QAAQ,GAAG,CAAC,CAAC;QAEjB,MAAM,WAAW,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,YAAY,GAAG,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAE9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,EAAE,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3E,+BAA+B;YAC/B,IAAI,WAAW,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,KAAK,YAAY,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC;gBACnE,QAAQ,EAAE,CAAC;YACb,CAAC;iBAAM,CAAC;gBACN,QAAQ,EAAE,CAAC;gBACX,MAAM,CAAE,0BAA0B;YACpC,CAAC;QACH,CAAC;QAED,0CAA0C;QAC1C,QAAQ,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;QAElE,0DAA0D;QAC1D,MAAM,UAAU,GAAG,UAAU,GAAG,WAAW,CAAC;QAC5C,MAAM,OAAO,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhE,4CAA4C;QAC5C,MAAM,UAAU,GAAG,WAAW,CAAC;QAE/B,OAAO;YACL,QAAQ;YACR,QAAQ;YACR,YAAY,EAAE,WAAW,CAAC,MAAM;YAChC,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,EAAG,YAAY;YAC9C,UAAU;SACX,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,eAAe,CACnB,MAAc,EACd,OAA4C,EAC5C,QAA6C,EAC7C,OAAmD,EACnD,mBAA2B,CAAC;QAE5B,wBAAwB;QACxB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;QAC3C,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAEzC,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,UAAU,GAAG,EAAE,CAAC;QAEpB,2BAA2B;QAC3B,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;YACjC,IAAI,QAAQ,IAAI,gBAAgB;gBAAE,MAAM;YAExC,+CAA+C;YAC/C,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;YACrB,UAAU,IAAI,KAAK,GAAG,GAAG,CAAC;YAE1B,QAAQ,EAAE,CAAC;QACb,CAAC;QAED,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,UAAU,CAAC,IAAI,EAAE,EAAE,CAAC;IACrD,CAAC;CACF;AApGD,gDAoGC;AAED;;;GAGG;AACI,KAAK,UAAU,gBAAgB,CACpC,OAAiB,EACjB,OAA4C,EAC5C,QAA6C,EAC7C,OAGC;IAED,MAAM,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,CAAC,CAAC;IAC9C,MAAM,gBAAgB,GAAG,OAAO,EAAE,gBAAgB,IAAI,CAAC,CAAC;IAExD,MAAM,OAAO,GAAG,IAAI,kBAAkB,EAAE,CAAC;IACzC,MAAM,OAAO,GAAwB,EAAE,CAAC;IAExC,qBAAqB;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;QACrD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC;QAChD,MAAM,aAAa,GAAG,MAAM,OAAO,CAAC,GAAG,CACrC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,OAAO,EAAE,QAAQ,EAAE,gBAAgB,CAAC,CAAC,CACvE,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;IACjC,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,SAAgB,wBAAwB,CACtC,aAAqB,EACrB,0BAAkC,EAClC,cAAsB,EACtB,eAAuB;IAEvB,+CAA+C;IAC/C,MAAM,eAAe,GAAG,eAAe,GAAG,cAAc,CAAC;IAEzD,wCAAwC;IACxC,MAAM,eAAe,GAAG,GAAG,CAAC,CAAE,eAAe;IAE7C,OAAO,IAAI,CAAC,GAAG,CAAC,eAAe,GAAG,eAAe,EAAE,GAAG,CAAC,CAAC;AAC1D,CAAC;AAED,kBAAe;IACb,eAAe;IACf,gBAAgB;IAChB,kBAAkB;IAClB,gBAAgB;IAChB,wBAAwB;CACzB,CAAC"}
@@ -0,0 +1,50 @@
1
+ /**
2
+ * TMLPD Token Utilities
3
+ *
4
+ * Token counting, cost estimation, and context management.
5
+ */
6
+ export interface TokenCost {
7
+ input_per_1k: number;
8
+ output_per_1k: number;
9
+ }
10
+ export declare const MODEL_COSTS: Record<string, TokenCost>;
11
+ /**
12
+ * Count tokens in text (approximate for English).
13
+ * Based on ~1.3 tokens per word for typical English text.
14
+ */
15
+ export declare function countTokens(text: string, model?: string): number;
16
+ /**
17
+ * Estimate cost for a prompt/completion pair.
18
+ */
19
+ export declare function estimateCost(prompt_tokens: number, completion_tokens: number, model: string): number;
20
+ /**
21
+ * Estimate cost from raw text (approximates both prompt and completion).
22
+ */
23
+ export declare function estimateCostFromText(prompt: string, completion: string, model: string): number;
24
+ /**
25
+ * Get cost info for a model.
26
+ */
27
+ export declare function getModelCost(model: string): TokenCost;
28
+ /**
29
+ * List all supported models with their costs.
30
+ */
31
+ export declare function listModelsByCost(): Array<{
32
+ model: string;
33
+ input: number;
34
+ output: number;
35
+ }>;
36
+ /**
37
+ * Find cheapest models for a given task.
38
+ */
39
+ export declare function findCheapestModels(task: "fast" | "quality" | "balanced" | "coding", count?: number): string[];
40
+ declare const _default: {
41
+ countTokens: typeof countTokens;
42
+ estimateCost: typeof estimateCost;
43
+ estimateCostFromText: typeof estimateCostFromText;
44
+ getModelCost: typeof getModelCost;
45
+ listModelsByCost: typeof listModelsByCost;
46
+ findCheapestModels: typeof findCheapestModels;
47
+ MODEL_COSTS: Record<string, TokenCost>;
48
+ };
49
+ export default _default;
50
+ //# sourceMappingURL=tokenUtils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenUtils.d.ts","sourceRoot":"","sources":["../../src/utils/tokenUtils.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,MAAM,WAAW,SAAS;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;CACvB;AAGD,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CA2BjD,CAAC;AAEF;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,GAAE,MAAiB,GAAG,MAAM,CAqB1E;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,aAAa,EAAE,MAAM,EACrB,iBAAiB,EAAE,MAAM,EACzB,KAAK,EAAE,MAAM,GACZ,MAAM,CAOR;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,MAAM,EAAE,MAAM,EACd,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,MAAM,GACZ,MAAM,CAMR;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,CAErD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC,CAQ1F;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,GAAG,UAAU,GAAG,QAAQ,EAAE,KAAK,GAAE,MAAU,GAAG,MAAM,EAAE,CAYhH;;;;;;;;;;AAED,wBAQE"}
@@ -0,0 +1,124 @@
1
+ "use strict";
2
+ /**
3
+ * TMLPD Token Utilities
4
+ *
5
+ * Token counting, cost estimation, and context management.
6
+ */
7
+ Object.defineProperty(exports, "__esModule", { value: true });
8
+ exports.MODEL_COSTS = void 0;
9
+ exports.countTokens = countTokens;
10
+ exports.estimateCost = estimateCost;
11
+ exports.estimateCostFromText = estimateCostFromText;
12
+ exports.getModelCost = getModelCost;
13
+ exports.listModelsByCost = listModelsByCost;
14
+ exports.findCheapestModels = findCheapestModels;
15
+ // Current provider rates (2024-2025)
16
+ exports.MODEL_COSTS = {
17
+ // OpenAI
18
+ "gpt-4o": { input_per_1k: 2.50, output_per_1k: 10.00 },
19
+ "gpt-4o-mini": { input_per_1k: 0.15, output_per_1k: 0.60 },
20
+ "gpt-4-turbo": { input_per_1k: 10.00, output_per_1k: 30.00 },
21
+ "gpt-3.5-turbo": { input_per_1k: 0.50, output_per_1k: 1.50 },
22
+ // Anthropic
23
+ "claude-3.5-sonnet": { input_per_1k: 3.00, output_per_1k: 15.00 },
24
+ "claude-3-opus": { input_per_1k: 15.00, output_per_1k: 75.00 },
25
+ "claude-3-haiku": { input_per_1k: 0.25, output_per_1k: 1.25 },
26
+ // Google
27
+ "gemini-2.0-flash": { input_per_1k: 0.00, output_per_1k: 0.00 }, // Free
28
+ "gemini-1.5-pro": { input_per_1k: 1.25, output_per_1k: 5.00 },
29
+ "gemini-1.5-flash": { input_per_1k: 0.075, output_per_1k: 0.30 },
30
+ // Groq
31
+ "groq/llama-3.3-70b": { input_per_1k: 0.59, output_per_1k: 0.79 },
32
+ "groq/llama-3.1-8b": { input_per_1k: 0.05, output_per_1k: 0.08 },
33
+ // Cerebras
34
+ "cerebras/llama-3.3-70b": { input_per_1k: 0.60, output_per_1k: 0.60 },
35
+ // Mistral
36
+ "mistral-large": { input_per_1k: 2.00, output_per_1k: 6.00 },
37
+ "mistral-small": { input_per_1k: 0.20, output_per_1k: 0.60 },
38
+ };
39
+ /**
40
+ * Count tokens in text (approximate for English).
41
+ * Based on ~1.3 tokens per word for typical English text.
42
+ */
43
+ function countTokens(text, model = "gpt-4o") {
44
+ if (!text || text.length === 0)
45
+ return 0;
46
+ // Use model-specific approximation if available
47
+ // Otherwise use generic word-based estimate
48
+ const words = text.trim().split(/\s+/).length;
49
+ // Fine-tune based on model family
50
+ if (model.includes("claude")) {
51
+ // Anthropic models: ~1.5 tokens per word
52
+ return Math.ceil(words * 1.5);
53
+ }
54
+ else if (model.includes("gemini")) {
55
+ // Google: ~1.2 tokens per word (SentencePiece)
56
+ return Math.ceil(words * 1.2);
57
+ }
58
+ else if (model.includes("llama")) {
59
+ // Llama: ~1.4 tokens per word (BPE)
60
+ return Math.ceil(words * 1.4);
61
+ }
62
+ // Default: ~1.3 tokens per word (GPT-4 average)
63
+ return Math.ceil(words * 1.3);
64
+ }
65
+ /**
66
+ * Estimate cost for a prompt/completion pair.
67
+ */
68
+ function estimateCost(prompt_tokens, completion_tokens, model) {
69
+ const costs = exports.MODEL_COSTS[model] || exports.MODEL_COSTS["gpt-4o"];
70
+ const input_cost = (prompt_tokens / 1000) * costs.input_per_1k;
71
+ const output_cost = (completion_tokens / 1000) * costs.output_per_1k;
72
+ return input_cost + output_cost;
73
+ }
74
+ /**
75
+ * Estimate cost from raw text (approximates both prompt and completion).
76
+ */
77
+ function estimateCostFromText(prompt, completion, model) {
78
+ const prompt_tokens = countTokens(prompt, model);
79
+ // Completion typically has higher token density
80
+ const completion_tokens = Math.ceil(countTokens(completion, model) * 1.2);
81
+ return estimateCost(prompt_tokens, completion_tokens, model);
82
+ }
83
+ /**
84
+ * Get cost info for a model.
85
+ */
86
+ function getModelCost(model) {
87
+ return exports.MODEL_COSTS[model] || exports.MODEL_COSTS["gpt-4o"];
88
+ }
89
+ /**
90
+ * List all supported models with their costs.
91
+ */
92
+ function listModelsByCost() {
93
+ return Object.entries(exports.MODEL_COSTS)
94
+ .map(([model, cost]) => ({
95
+ model,
96
+ input: cost.input_per_1k,
97
+ output: cost.output_per_1k
98
+ }))
99
+ .sort((a, b) => (a.input + a.output) - (b.input + b.output));
100
+ }
101
+ /**
102
+ * Find cheapest models for a given task.
103
+ */
104
+ function findCheapestModels(task, count = 3) {
105
+ const sorted = listModelsByCost();
106
+ // Different profiles for different needs
107
+ const profiles = {
108
+ fast: sorted.filter(m => m.output < 1.0).slice(0, count).map(m => m.model),
109
+ quality: sorted.filter(m => m.output > 10).slice(0, count).map(m => m.model),
110
+ balanced: sorted.slice(0, count * 2).slice(count, count * 2).map(m => m.model),
111
+ coding: sorted.filter(m => m.model.includes("codex") || m.model.includes("claude") || m.model.includes("llama")).slice(0, count).map(m => m.model)
112
+ };
113
+ return profiles[task] || profiles.balanced;
114
+ }
115
+ exports.default = {
116
+ countTokens,
117
+ estimateCost,
118
+ estimateCostFromText,
119
+ getModelCost,
120
+ listModelsByCost,
121
+ findCheapestModels,
122
+ MODEL_COSTS: exports.MODEL_COSTS
123
+ };
124
+ //# sourceMappingURL=tokenUtils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenUtils.js","sourceRoot":"","sources":["../../src/utils/tokenUtils.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAyCH,kCAqBC;AAKD,oCAWC;AAKD,oDAUC;AAKD,oCAEC;AAKD,4CAQC;AAKD,gDAYC;AA3HD,qCAAqC;AACxB,QAAA,WAAW,GAA8B;IACpD,SAAS;IACT,QAAQ,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE;IACtD,aAAa,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE;IAC1D,aAAa,EAAE,EAAE,YAAY,EAAE,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE;IAC5D,eAAe,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE;IAE5D,YAAY;IACZ,mBAAmB,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE;IACjE,eAAe,EAAE,EAAE,YAAY,EAAE,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE;IAC9D,gBAAgB,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE;IAE7D,SAAS;IACT,kBAAkB,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,EAAG,OAAO;IACzE,gBAAgB,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE;IAC7D,kBAAkB,EAAE,EAAE,YAAY,EAAE,KAAK,EAAE,aAAa,EAAE,IAAI,EAAE;IAEhE,OAAO;IACP,oBAAoB,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE;IACjE,mBAAmB,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE;IAEhE,WAAW;IACX,wBAAwB,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE;IAErE,UAAU;IACV,eAAe,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE;IAC5D,eAAe,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE;CAC7D,CAAC;AAEF;;;GAGG;AACH,SAAgB,WAAW,CAAC,IAAY,EAAE,QAAgB,QAAQ;IAChE,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEzC,gDAAgD;IAChD,4CAA4C;IAC5C,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;IAE9C,kCAAkC;IAClC,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC7B,yCAAyC;QACzC,OAAO,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;IAChC,CAAC;SAAM,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACpC,+CAA+C;QAC/C,OAAO,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;IAChC,CAAC;SAAM,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QACnC,oCAAoC;QACpC,OAAO,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;IAChC,CAAC;IAED,gDAAgD;IAChD,OAAO,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;AAChC,CAAC;AAED;;GAEG;AACH,SAAgB,YAAY,CAC1B,aAAqB,EACrB,iBAAyB,EACzB,KAAa;IAEb,MAAM,KAAK,GAAG,mBAAW,CAAC,KAAK,CAAC,IAAI,mBAAW,CAAC,QAAQ,CAAC,CAAC;IAE1D,MAAM,UAAU,GAAG,CAAC,aAAa,GAAG,IAAI,CAAC,GAAG,KAAK,CAAC,YAAY,CAAC;IAC/D,MAAM,WAAW,GAAG,CAAC,iBAAiB,GAAG,IAAI,CAAC,GAAG,KAAK,CAAC,aAAa,CAAC;IAErE,OAAO,UAAU,GAAG,WAAW,CAAC;AAClC,CAAC;AAED;;GAEG;AACH,SAAgB,oBAAoB,CAClC,MAAc,EACd,UAAkB,EAClB,KAAa;IAEb,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IACjD,gDAAgD;IAChD,MAAM,iBAAiB,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;IAE1E,OAAO,YAAY,CAAC,aAAa,EAAE,iBAAiB,EAAE,KAAK,CAAC,CAAC;AAC/D,CAAC;AAED;;GAEG;AACH,SAAgB,YAAY,CAAC,KAAa;IACxC,OAAO,mBAAW,CAAC,KAAK,CAAC,IAAI,mBAAW,CAAC,QAAQ,CAAC,CAAC;AACrD,CAAC;AAED;;GAEG;AACH,SAAgB,gBAAgB;IAC9B,OAAO,MAAM,CAAC,OAAO,CAAC,mBAAW,CAAC;SAC/B,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;QACvB,KAAK;QACL,KAAK,EAAE,IAAI,CAAC,YAAY;QACxB,MAAM,EAAE,IAAI,CAAC,aAAa;KAC3B,CAAC,CAAC;SACF,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;AACjE,CAAC;AAED;;GAEG;AACH,SAAgB,kBAAkB,CAAC,IAAgD,EAAE,QAAgB,CAAC;IACpG,MAAM,MAAM,GAAG,gBAAgB,EAAE,CAAC;IAElC,yCAAyC;IACzC,MAAM,QAAQ,GAAG;QACf,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QAC1E,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QAC5E,QAAQ,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QAC9E,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;KACnJ,CAAC;IAEF,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC;AAC7C,CAAC;AAED,kBAAe;IACb,WAAW;IACX,YAAY;IACZ,oBAAoB;IACpB,YAAY;IACZ,gBAAgB;IAChB,kBAAkB;IAClB,WAAW,EAAX,mBAAW;CACZ,CAAC"}