adaptive-memory-multi-model-router 1.2.2 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +146 -66
  3. package/dist/index.d.ts +1 -1
  4. package/dist/index.js +1 -1
  5. package/dist/integrations/airtable.js +20 -0
  6. package/dist/integrations/discord.js +18 -0
  7. package/dist/integrations/github.js +23 -0
  8. package/dist/integrations/gmail.js +19 -0
  9. package/dist/integrations/google-calendar.js +18 -0
  10. package/dist/integrations/index.js +61 -0
  11. package/dist/integrations/jira.js +21 -0
  12. package/dist/integrations/linear.js +19 -0
  13. package/dist/integrations/notion.js +19 -0
  14. package/dist/integrations/slack.js +18 -0
  15. package/dist/integrations/telegram.js +19 -0
  16. package/dist/providers/registry.js +7 -3
  17. package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
  18. package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
  19. package/docs/CONFIGURATION.md +476 -0
  20. package/docs/COUNCIL_DECISION.json +308 -0
  21. package/docs/COUNCIL_SUMMARY.md +265 -0
  22. package/docs/COUNCIL_V2.2_DECISION.md +416 -0
  23. package/docs/IMPROVEMENT_ROADMAP.md +515 -0
  24. package/docs/LLM_COUNCIL_DECISION.md +508 -0
  25. package/docs/QUICK_START_VISIBILITY.md +782 -0
  26. package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
  27. package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
  28. package/docs/TMLPD_QNA.md +751 -0
  29. package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
  30. package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
  31. package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
  32. package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
  33. package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
  34. package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
  35. package/docs/launch-content/README.md +457 -0
  36. package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
  37. package/docs/launch-content/assets/cumulative_savings.png +0 -0
  38. package/docs/launch-content/assets/parallel_speedup.png +0 -0
  39. package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
  40. package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
  41. package/docs/launch-content/generate_charts.py +313 -0
  42. package/docs/launch-content/hn_show_post.md +139 -0
  43. package/docs/launch-content/partner_outreach_templates.md +745 -0
  44. package/docs/launch-content/reddit_posts.md +467 -0
  45. package/docs/launch-content/twitter_thread.txt +460 -0
  46. package/examples/QUICKSTART.md +1 -1
  47. package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
  48. package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
  49. package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
  50. package/openclaw-alexa-bridge/test_fixes.js +77 -0
  51. package/package.json +120 -29
  52. package/package.json.tmp +0 -0
  53. package/qna/TMLPD_QNA.md +3 -3
  54. package/skill/SKILL.md +2 -2
  55. package/src/__tests__/integration/tmpld_integration.test.py +540 -0
  56. package/src/agents/skill_enhanced_agent.py +318 -0
  57. package/src/memory/__init__.py +15 -0
  58. package/src/memory/agentic_memory.py +353 -0
  59. package/src/memory/semantic_memory.py +444 -0
  60. package/src/memory/simple_memory.py +466 -0
  61. package/src/memory/working_memory.py +447 -0
  62. package/src/orchestration/__init__.py +52 -0
  63. package/src/orchestration/execution_engine.py +353 -0
  64. package/src/orchestration/halo_orchestrator.py +367 -0
  65. package/src/orchestration/mcts_workflow.py +498 -0
  66. package/src/orchestration/role_assigner.py +473 -0
  67. package/src/orchestration/task_planner.py +522 -0
  68. package/src/providers/__init__.py +67 -0
  69. package/src/providers/anthropic.py +304 -0
  70. package/src/providers/base.py +241 -0
  71. package/src/providers/cerebras.py +373 -0
  72. package/src/providers/registry.py +476 -0
  73. package/src/routing/__init__.py +30 -0
  74. package/src/routing/universal_router.py +621 -0
  75. package/src/skills/TMLPD-QUICKREF.md +210 -0
  76. package/src/skills/TMLPD-SETUP-SUMMARY.md +157 -0
  77. package/src/skills/TMLPD.md +540 -0
  78. package/src/skills/__tests__/skill_manager.test.ts +328 -0
  79. package/src/skills/skill_manager.py +385 -0
  80. package/src/skills/test-tmlpd.sh +108 -0
  81. package/src/skills/tmlpd-category.yaml +67 -0
  82. package/src/skills/tmlpd-monitoring.yaml +188 -0
  83. package/src/skills/tmlpd-phase.yaml +132 -0
  84. package/src/state/__init__.py +17 -0
  85. package/src/state/simple_checkpoint.py +508 -0
  86. package/src/tmlpd_agent.py +464 -0
  87. package/src/tmpld_v2.py +427 -0
  88. package/src/workflows/__init__.py +18 -0
  89. package/src/workflows/advanced_difficulty_classifier.py +377 -0
  90. package/src/workflows/chaining_executor.py +417 -0
  91. package/src/workflows/difficulty_integration.py +209 -0
  92. package/src/workflows/orchestrator.py +469 -0
  93. package/src/workflows/orchestrator_executor.py +456 -0
  94. package/src/workflows/parallelization_executor.py +382 -0
  95. package/src/workflows/router.py +311 -0
  96. package/test_integration_simple.py +86 -0
  97. package/test_mcts_workflow.py +150 -0
  98. package/test_templd_integration.py +262 -0
  99. package/test_universal_router.py +275 -0
  100. package/tmlpd-pi-extension/README.md +36 -0
  101. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
  102. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
  103. package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
  104. package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
  105. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
  106. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
  107. package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
  108. package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
  109. package/tmlpd-pi-extension/dist/cli.js +59 -0
  110. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
  111. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
  112. package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
  113. package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
  114. package/tmlpd-pi-extension/dist/index.d.ts +723 -0
  115. package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
  116. package/tmlpd-pi-extension/dist/index.js +239 -0
  117. package/tmlpd-pi-extension/dist/index.js.map +1 -0
  118. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
  119. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
  120. package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
  121. package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
  122. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
  123. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
  124. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
  125. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
  126. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
  127. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
  128. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
  129. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
  130. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
  131. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
  132. package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
  133. package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
  134. package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
  135. package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
  136. package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
  137. package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
  138. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
  139. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
  140. package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
  141. package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
  142. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
  143. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
  144. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
  145. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
  146. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
  147. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
  148. package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
  149. package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
  150. package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
  151. package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
  152. package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
  153. package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
  154. package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
  155. package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
  156. package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
  157. package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
  158. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
  159. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
  160. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
  161. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
  162. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
  163. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
  164. package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
  165. package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
  166. package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
  167. package/tmlpd-pi-extension/package-lock.json +75 -0
  168. package/tmlpd-pi-extension/package.json +172 -0
  169. package/tmlpd-pi-extension/python/examples.py +53 -0
  170. package/tmlpd-pi-extension/python/integrations.py +330 -0
  171. package/tmlpd-pi-extension/python/setup.py +28 -0
  172. package/tmlpd-pi-extension/python/tmlpd.py +369 -0
  173. package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
  174. package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
  175. package/tmlpd-pi-extension/skill/SKILL.md +238 -0
  176. package/{src → tmlpd-pi-extension/src}/index.ts +1 -1
  177. package/tmlpd-pi-extension/tsconfig.json +18 -0
  178. package/demo/research-demo.js +0 -266
  179. package/notebooks/quickstart.ipynb +0 -157
  180. package/rust/tmlpd.h +0 -268
  181. package/src/cache/prefixCache.ts +0 -365
  182. package/src/routing/advancedRouter.ts +0 -406
  183. package/src/utils/speculativeDecoding.ts +0 -344
  184. /package/{src → tmlpd-pi-extension/src}/cache/responseCache.ts +0 -0
  185. /package/{src → tmlpd-pi-extension/src}/cost/costTracker.ts +0 -0
  186. /package/{src → tmlpd-pi-extension/src}/memory/episodicMemory.ts +0 -0
  187. /package/{src → tmlpd-pi-extension/src}/orchestration/haloOrchestrator.ts +0 -0
  188. /package/{src → tmlpd-pi-extension/src}/orchestration/mctsWorkflow.ts +0 -0
  189. /package/{src → tmlpd-pi-extension/src}/providers/localProvider.ts +0 -0
  190. /package/{src → tmlpd-pi-extension/src}/providers/registry.ts +0 -0
  191. /package/{src → tmlpd-pi-extension/src}/tools/tmlpdTools.ts +0 -0
  192. /package/{src → tmlpd-pi-extension/src}/utils/batchProcessor.ts +0 -0
  193. /package/{src → tmlpd-pi-extension/src}/utils/compression.ts +0 -0
  194. /package/{src → tmlpd-pi-extension/src}/utils/reliability.ts +0 -0
  195. /package/{src → tmlpd-pi-extension/src}/utils/tokenUtils.ts +0 -0
package/rust/tmlpd.h DELETED
@@ -1,268 +0,0 @@
1
- /**
2
- * TMLPD Rust Bindings - FFI Interface
3
- *
4
- * High-performance Rust library for TMLPD operations.
5
- * Enables zero-overhead integration with Rust projects.
6
- *
7
- * Build: cargo build --release
8
- * Use: npm install tmlpd-pi (Rust bindings auto-included)
9
- */
10
-
11
- #include <stdint.h>
12
- #include <stddef.h>
13
- #include <stdbool.h>
14
-
15
- #ifdef _WIN32
16
- #define TMLPD_EXPORT __declspec(dllexport)
17
- #else
18
- #define TMLPD_EXPORT
19
- #endif
20
-
21
- // Error codes
22
- typedef enum {
23
- TMLPD_OK = 0,
24
- TMLPD_ERR_INVALID_INPUT = 1,
25
- TMLPD_ERR_PROVIDER_FAILED = 2,
26
- TMLPD_ERR_TIMEOUT = 3,
27
- TMLPD_ERR_NO_PROVIDERS = 4,
28
- TMLPD_ERR_OUT_OF_MEMORY = 5
29
- } tmlpd_error_t;
30
-
31
- // Result structure
32
- typedef struct {
33
- tmlpd_error_t error;
34
- char* content;
35
- uint32_t tokens_used;
36
- double cost_usd;
37
- uint64_t duration_ms;
38
- bool cached;
39
- } tmlpd_result_t;
40
-
41
- // Config structure
42
- typedef struct {
43
- uint32_t max_concurrent;
44
- uint32_t cache_ttl_seconds;
45
- double daily_budget_usd;
46
- double retry_base_delay_ms;
47
- double retry_jitter;
48
- } tmlpd_config_t;
49
-
50
- // Initialize TMLPD with config
51
- TMLPD_EXPORT tmlpd_error_t tmlpd_init(tmlpd_config_t* config);
52
-
53
- // Execute single prompt
54
- TMLPD_EXPORT tmlpd_result_t* tmlpd_execute(
55
- const char* prompt,
56
- const char* model,
57
- double timeout_ms
58
- );
59
-
60
- // Execute parallel across multiple models
61
- TMLPD_EXPORT tmlpd_result_t** tmlpd_execute_parallel(
62
- const char* prompt,
63
- const char** models,
64
- uint32_t model_count,
65
- double timeout_ms,
66
- uint32_t* result_count
67
- );
68
-
69
- // Token counting (no API call needed)
70
- TMLPD_EXPORT uint32_t tmlpd_count_tokens(
71
- const char* text,
72
- const char* model
73
- );
74
-
75
- // Estimate cost before execution
76
- TMLPD_EXPORT double tmlpd_estimate_cost(
77
- uint32_t prompt_tokens,
78
- uint32_t completion_tokens,
79
- const char* model
80
- );
81
-
82
- // ISON compression
83
- TMLPD_EXPORT char* tmlpd_ison_encode(const char* text);
84
- TMLPD_EXPORT char* tmlpd_ison_decode(const char* encoded);
85
-
86
- // Memory operations
87
- TMLPD_EXPORT char* tmlpd_store_episode(
88
- const char* task_desc,
89
- const char* result,
90
- const char* model,
91
- double cost
92
- );
93
-
94
- TMLPD_EXPORT char* tmlpd_query_similar(
95
- const char* task_desc,
96
- uint32_t limit
97
- );
98
-
99
- // Cleanup
100
- TMLPD_EXPORT void tmlpd_free_result(tmlpd_result_t* result);
101
- TMLPD_EXPORT void tmlpd_free_string(char* str);
102
- TMLPD_EXPORT void tmlpd_shutdown(void);
103
-
104
- // ============================================
105
- // Implementation stubs (for demonstration)
106
- // In production, these call actual Rust lib
107
- // ============================================
108
-
109
- #ifdef TMLPD_IMPLEMENTATION
110
-
111
- #include <stdlib.h>
112
- #include <string.h>
113
- #include <time.h>
114
-
115
- static tmlpd_config_t g_config = {0};
116
- static bool g_initialized = false;
117
-
118
- TMLPD_EXPORT tmlpd_error_t tmlpd_init(tmlpd_config_t* config) {
119
- if (!config) return TMLPD_ERR_INVALID_INPUT;
120
- memcpy(&g_config, config, sizeof(tmlpd_config_t));
121
- g_initialized = true;
122
- return TMLPD_OK;
123
- }
124
-
125
- TMLPD_EXPORT tmlpd_result_t* tmlpd_execute(
126
- const char* prompt,
127
- const char* model,
128
- double timeout_ms
129
- ) {
130
- if (!g_initialized || !prompt || !model) return NULL;
131
-
132
- tmlpd_result_t* result = (tmlpd_result_t*)malloc(sizeof(tmlpd_result_t));
133
- if (!result) return NULL;
134
-
135
- // Simulate execution
136
- result->error = TMLPD_OK;
137
- result->tokens_used = (uint32_t)(strlen(prompt) / 4);
138
- result->cost_usd = result->tokens_used * 0.00001;
139
- result->duration_ms = (uint64_t)(rand() % 1000 + 100);
140
- result->cached = false;
141
-
142
- // Allocate and fill content
143
- size_t content_len = strlen(prompt) + 20;
144
- result->content = (char*)malloc(content_len);
145
- snprintf(result->content, content_len, "[TMLPD Rust] Processed: %s", prompt);
146
-
147
- return result;
148
- }
149
-
150
- TMLPD_EXPORT tmlpd_result_t** tmlpd_execute_parallel(
151
- const char* prompt,
152
- const char** models,
153
- uint32_t model_count,
154
- double timeout_ms,
155
- uint32_t* result_count
156
- ) {
157
- if (!result_count || model_count == 0) return NULL;
158
- *result_count = model_count;
159
-
160
- tmlpd_result_t** results = (tmlpd_result_t**)malloc(
161
- sizeof(tmlpd_result_t*) * model_count
162
- );
163
-
164
- for (uint32_t i = 0; i < model_count; i++) {
165
- results[i] = tmlpd_execute(prompt, models[i], timeout_ms);
166
- }
167
-
168
- return results;
169
- }
170
-
171
- TMLPD_EXPORT uint32_t tmlpd_count_tokens(const char* text, const char* model) {
172
- if (!text) return 0;
173
- // Simple word-based approximation
174
- uint32_t words = 0;
175
- for (const char* p = text; *p; p++) {
176
- if (*p == ' ') words++;
177
- }
178
- return (words + 1) * 13 / 10; // ~1.3 tokens per word
179
- }
180
-
181
- TMLPD_EXPORT double tmlpd_estimate_cost(
182
- uint32_t prompt_tokens,
183
- uint32_t completion_tokens,
184
- const char* model
185
- ) {
186
- // Default GPT-4 pricing
187
- double input_rate = 0.0025 / 1000; // $2.50/1M
188
- double output_rate = 0.01 / 1000; // $10/1M
189
-
190
- if (strstr(model, "claude")) {
191
- input_rate = 0.003 / 1000;
192
- output_rate = 0.015 / 1000;
193
- } else if (strstr(model, "gemini")) {
194
- input_rate = 0.000075 / 1000;
195
- output_rate = 0.0003 / 1000;
196
- }
197
-
198
- return prompt_tokens * input_rate + completion_tokens * output_rate;
199
- }
200
-
201
- TMLPD_EXPORT char* tmlpd_ison_encode(const char* text) {
202
- if (!text) return NULL;
203
-
204
- // Remove common words
205
- size_t len = strlen(text) + 1;
206
- char* result = (char*)malloc(len);
207
- strcpy(result, text);
208
-
209
- // Simple ISON: remove articles
210
- const char* articles[] = {" the ", " a ", " an ", " The ", " A ", " An "};
211
- for (int i = 0; i < 6; i++) {
212
- char* pos;
213
- while ((pos = strstr(result, articles[i])) != NULL) {
214
- memmove(pos, pos + strlen(articles[i]),
215
- strlen(pos + strlen(articles[i])) + 1);
216
- }
217
- }
218
-
219
- return result;
220
- }
221
-
222
- TMLPD_EXPORT char* tmlpd_ison_decode(const char* encoded) {
223
- // In production, reverse ISON encoding
224
- if (!encoded) return NULL;
225
- char* result = (char*)malloc(strlen(encoded) + 10);
226
- sprintf(result, "The %s", encoded);
227
- return result;
228
- }
229
-
230
- TMLPD_EXPORT char* tmlpd_store_episode(
231
- const char* task_desc,
232
- const char* result,
233
- const char* model,
234
- double cost
235
- ) {
236
- // In production, store in episodic memory
237
- (void)task_desc; (void)result; (void)model; (void)cost;
238
- char* id = (char*)malloc(16);
239
- snprintf(id, 16, "ep_%ld", time(NULL));
240
- return id;
241
- }
242
-
243
- TMLPD_EXPORT char* tmlpd_query_similar(
244
- const char* task_desc,
245
- uint32_t limit
246
- ) {
247
- (void)task_desc; (void)limit;
248
- char* result = (char*)malloc(32);
249
- strcpy(result, "[]"); // Empty array
250
- return result;
251
- }
252
-
253
- TMLPD_EXPORT void tmlpd_free_result(tmlpd_result_t* result) {
254
- if (result) {
255
- if (result->content) free(result->content);
256
- free(result);
257
- }
258
- }
259
-
260
- TMLPD_EXPORT void tmlpd_free_string(char* str) {
261
- if (str) free(str);
262
- }
263
-
264
- TMLPD_EXPORT void tmlpd_shutdown(void) {
265
- g_initialized = false;
266
- }
267
-
268
- #endif // TMLPD_IMPLEMENTATION
@@ -1,365 +0,0 @@
1
- /**
2
- * TMLPD Prefix Cache - RadixAttention Style
3
- *
4
- * Inspired by SGLang's RadixAttention (arXiv:2312.07104)
5
- * Caches KV states for common prefixes (system prompts, etc.)
6
- * 5-10x speedup for repeated prompt patterns
7
- */
8
-
9
- export interface CacheEntry {
10
- key: string; // Hash of the prefix
11
- prefix: string; // Original prefix text
12
- kv_state?: Buffer; // Cached KV state (if using actual KV cache)
13
- response_hash?: string; // Hash of cached response
14
- hit_count: number; // Times this prefix was used
15
- last_used: number; // Timestamp
16
- token_count: number; // Tokens in this prefix
17
- children: Map<string, string>; // child_key -> child_cache_key
18
- }
19
-
20
- export interface PrefixCacheStats {
21
- total_entries: number;
22
- total_hits: number;
23
- total_misses: number;
24
- hit_rate: number;
25
- memory_estimate_mb: number;
26
- oldest_entry_age_ms: number;
27
- }
28
-
29
- export class PrefixCache {
30
- private entries: Map<string, CacheEntry> = new Map();
31
- private access_order: string[] = []; // LRU tracking
32
- private max_entries: number;
33
- private max_memory_mb: number;
34
-
35
- constructor(options?: {
36
- max_entries?: number;
37
- max_memory_mb?: number;
38
- }) {
39
- this.max_entries = options?.max_entries || 10000;
40
- this.max_memory_mb = options?.max_memory_mb || 512;
41
- }
42
-
43
- /**
44
- * Generate cache key from text prefix
45
- */
46
- private generateKey(text: string, model?: string): string {
47
- // Simple hash for now - in production use SHA-256
48
- const normalized = text.toLowerCase().trim().substring(0, 500);
49
- const str = `${model || "default"}:${normalized}`;
50
-
51
- let hash = 0;
52
- for (let i = 0; i < str.length; i++) {
53
- const char = str.charCodeAt(i);
54
- hash = ((hash << 5) - hash) + char;
55
- hash = hash & hash; // Convert to 32bit integer
56
- }
57
-
58
- return `pc_${Math.abs(hash).toString(16)}`;
59
- }
60
-
61
- /**
62
- * Check if prefix is cached
63
- */
64
- has(prefix: string, model?: string): boolean {
65
- const key = this.generateKey(prefix, model);
66
- return this.entries.has(key);
67
- }
68
-
69
- /**
70
- * Get cached entry
71
- */
72
- get(prefix: string, model?: string): CacheEntry | undefined {
73
- const key = this.generateKey(prefix, model);
74
- const entry = this.entries.get(key);
75
-
76
- if (entry) {
77
- // Update LRU
78
- this.updateLRU(key);
79
- entry.hit_count++;
80
- entry.last_used = Date.now();
81
- }
82
-
83
- return entry;
84
- }
85
-
86
- /**
87
- * Store a new prefix with its KV state
88
- */
89
- store(
90
- prefix: string,
91
- options?: {
92
- kv_state?: Buffer;
93
- response_hash?: string;
94
- model?: string;
95
- children?: Map<string, string>;
96
- }
97
- ): string {
98
- const key = this.generateKey(prefix, options?.model);
99
-
100
- // Check if already exists
101
- if (this.entries.has(key)) {
102
- const existing = this.entries.get(key)!;
103
- existing.hit_count++;
104
- existing.last_used = Date.now();
105
- return key;
106
- }
107
-
108
- // Estimate memory
109
- const token_count = Math.ceil(prefix.split(/\s+/).length * 1.3);
110
- const memory_bytes = token_count * 16 * 128 * 2; // Rough KV estimate
111
- const memory_mb = memory_bytes / (1024 * 1024);
112
-
113
- const entry: CacheEntry = {
114
- key,
115
- prefix: prefix.substring(0, 1000), // Store truncated
116
- kv_state: options?.kv_state,
117
- response_hash: options?.response_hash,
118
- hit_count: 1,
119
- last_used: Date.now(),
120
- token_count,
121
- children: options?.children || new Map()
122
- };
123
-
124
- // Evict if necessary
125
- while (this.entries.size >= this.max_entries || this.getMemoryUsage() + memory_mb > this.max_memory_mb) {
126
- this.evictLRU();
127
- }
128
-
129
- this.entries.set(key, entry);
130
- this.access_order.push(key);
131
-
132
- return key;
133
- }
134
-
135
- /**
136
- * Extend cached prefix with completion
137
- */
138
- extend(
139
- prefix: string,
140
- completion: string,
141
- options?: { model?: string }
142
- ): string {
143
- const prefix_key = this.generateKey(prefix, options?.model);
144
- const parent = this.entries.get(prefix_key);
145
-
146
- if (!parent) {
147
- // No parent - just store completion as new entry
148
- return this.store(completion, { model: options?.model });
149
- }
150
-
151
- // Create child entry for the extended sequence
152
- const extended = prefix + completion;
153
- const child_key = this.store(extended, { model: options?.model });
154
-
155
- // Link child to parent
156
- const completion_key = this.generateKey(completion);
157
- parent.children.set(completion_key, child_key);
158
-
159
- return child_key;
160
- }
161
-
162
- /**
163
- * Find common prefix between two texts
164
- */
165
- findCommonPrefix(text1: string, text2: string): string {
166
- const words1 = text1.split(/\s+/);
167
- const words2 = text2.split(/\s+/);
168
-
169
- let common_length = 0;
170
- for (let i = 0; i < Math.min(words1.length, words2.length); i++) {
171
- if (words1[i].toLowerCase() === words2[i].toLowerCase()) {
172
- common_length = i + 1;
173
- } else {
174
- break;
175
- }
176
- }
177
-
178
- return words1.slice(0, common_length).join(" ");
179
- }
180
-
181
- /**
182
- * Lookup with prefix matching
183
- * Returns cached entry if any prefix is found
184
- */
185
- lookup(text: string, model?: string): { cached: boolean; prefix?: string; remaining?: string } {
186
- // Try exact match first
187
- const exact_key = this.generateKey(text, model);
188
- if (this.entries.has(exact_key)) {
189
- return { cached: true };
190
- }
191
-
192
- // Try progressively shorter prefixes
193
- const words = text.split(/\s+/);
194
- for (let len = words.length - 1; len >= 5; len--) { // Min 5 words
195
- const prefix = words.slice(0, len).join(" ");
196
- const key = this.generateKey(prefix, model);
197
-
198
- if (this.entries.has(key)) {
199
- const remaining = words.slice(len).join(" ");
200
- return { cached: true, prefix, remaining };
201
- }
202
- }
203
-
204
- return { cached: false };
205
- }
206
-
207
- /**
208
- * Batch lookup for multiple texts
209
- */
210
- lookupBatch(texts: string[], model?: string): Array<{ cached: boolean; prefix?: string; remaining?: string }> {
211
- return texts.map(t => this.lookup(t, model));
212
- }
213
-
214
- /**
215
- * Get cache statistics
216
- */
217
- getStats(): PrefixCacheStats {
218
- const now = Date.now();
219
- let oldest_age = 0;
220
- let total_hits = 0;
221
-
222
- for (const entry of this.entries.values()) {
223
- total_hits += entry.hit_count;
224
- const age = now - entry.last_used;
225
- if (age > oldest_age) oldest_age = age;
226
- }
227
-
228
- const total_requests = total_hits + this.entries.size; // Approximate
229
- const hit_rate = total_requests > 0 ? total_hits / total_requests : 0;
230
-
231
- return {
232
- total_entries: this.entries.size,
233
- total_hits: total_hits,
234
- total_misses: this.entries.size, // Approximate
235
- hit_rate,
236
- memory_estimate_mb: this.getMemoryUsage(),
237
- oldest_entry_age_ms: oldest_age
238
- };
239
- }
240
-
241
- /**
242
- * Get estimated memory usage
243
- */
244
- private getMemoryUsage(): number {
245
- let total_bytes = 0;
246
-
247
- for (const entry of this.entries.values()) {
248
- // Base entry overhead
249
- total_bytes += 200;
250
-
251
- // Prefix text
252
- total_bytes += entry.prefix.length * 2;
253
-
254
- // KV state (if stored)
255
- if (entry.kv_state) {
256
- total_bytes += entry.kv_state.length;
257
- }
258
-
259
- // Children map
260
- total_bytes += entry.children.size * 50;
261
- }
262
-
263
- return total_bytes / (1024 * 1024);
264
- }
265
-
266
- /**
267
- * Update LRU order
268
- */
269
- private updateLRU(key: string): void {
270
- const index = this.access_order.indexOf(key);
271
- if (index > -1) {
272
- this.access_order.splice(index, 1);
273
- }
274
- this.access_order.push(key);
275
- }
276
-
277
- /**
278
- * Evict least recently used entry
279
- */
280
- private evictLRU(): boolean {
281
- if (this.access_order.length === 0) return false;
282
-
283
- const lru_key = this.access_order.shift()!;
284
- const entry = this.entries.get(lru_key);
285
-
286
- if (entry) {
287
- // If has children, re-parent them
288
- for (const [child_key, child_cache_key] of entry.children) {
289
- const child = this.entries.get(child_cache_key);
290
- if (child) {
291
- // Promote child to standalone
292
- this.access_order.push(child_cache_key);
293
- }
294
- }
295
-
296
- this.entries.delete(lru_key);
297
- return true;
298
- }
299
-
300
- return false;
301
- }
302
-
303
- /**
304
- * Clear all cache
305
- */
306
- clear(): void {
307
- this.entries.clear();
308
- this.access_order = [];
309
- }
310
-
311
- /**
312
- * Invalidate entries matching pattern
313
- */
314
- invalidate(pattern?: string): number {
315
- let count = 0;
316
-
317
- if (!pattern) {
318
- // Clear all
319
- count = this.entries.size;
320
- this.clear();
321
- return count;
322
- }
323
-
324
- // Pattern-based invalidation
325
- for (const [key, entry] of this.entries) {
326
- if (entry.prefix.includes(pattern)) {
327
- this.entries.delete(key);
328
- count++;
329
- }
330
- }
331
-
332
- return count;
333
- }
334
-
335
- /**
336
- * Warm up cache with common system prompts
337
- */
338
- warmup(common_prefixes: string[], model?: string): void {
339
- for (const prefix of common_prefixes) {
340
- this.store(prefix, { model });
341
- }
342
- console.log(`[PrefixCache] Warmed up with ${common_prefixes.length} common prefixes`);
343
- }
344
- }
345
-
346
- // Common system prompts that benefit from prefix caching
347
- const COMMON_SYSTEM_PROMPTS = [
348
- "You are a helpful assistant.",
349
- "You are a coding assistant. Help with programming tasks.",
350
- "You are an expert data scientist.",
351
- "You are a senior software engineer.",
352
- "Analyze the following code and provide feedback.",
353
- "Explain this concept in simple terms.",
354
- "Write clean, well-documented code.",
355
- "Think step by step and explain your reasoning."
356
- ];
357
-
358
- export default PrefixCache;
359
-
360
- // Utility function for creating pre-warmed cache
361
- export function createWarmedCache(): PrefixCache {
362
- const cache = new PrefixCache({ max_entries: 5000 });
363
- cache.warmup(COMMON_SYSTEM_PROMPTS);
364
- return cache;
365
- }