adaptive-memory-multi-model-router 1.2.2 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +146 -66
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/integrations/airtable.js +20 -0
- package/dist/integrations/discord.js +18 -0
- package/dist/integrations/github.js +23 -0
- package/dist/integrations/gmail.js +19 -0
- package/dist/integrations/google-calendar.js +18 -0
- package/dist/integrations/index.js +61 -0
- package/dist/integrations/jira.js +21 -0
- package/dist/integrations/linear.js +19 -0
- package/dist/integrations/notion.js +19 -0
- package/dist/integrations/slack.js +18 -0
- package/dist/integrations/telegram.js +19 -0
- package/dist/providers/registry.js +7 -3
- package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
- package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
- package/docs/CONFIGURATION.md +476 -0
- package/docs/COUNCIL_DECISION.json +308 -0
- package/docs/COUNCIL_SUMMARY.md +265 -0
- package/docs/COUNCIL_V2.2_DECISION.md +416 -0
- package/docs/IMPROVEMENT_ROADMAP.md +515 -0
- package/docs/LLM_COUNCIL_DECISION.md +508 -0
- package/docs/QUICK_START_VISIBILITY.md +782 -0
- package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
- package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
- package/docs/TMLPD_QNA.md +751 -0
- package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
- package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
- package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
- package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
- package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
- package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
- package/docs/launch-content/README.md +457 -0
- package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
- package/docs/launch-content/assets/cumulative_savings.png +0 -0
- package/docs/launch-content/assets/parallel_speedup.png +0 -0
- package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
- package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
- package/docs/launch-content/generate_charts.py +313 -0
- package/docs/launch-content/hn_show_post.md +139 -0
- package/docs/launch-content/partner_outreach_templates.md +745 -0
- package/docs/launch-content/reddit_posts.md +467 -0
- package/docs/launch-content/twitter_thread.txt +460 -0
- package/examples/QUICKSTART.md +1 -1
- package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
- package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
- package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
- package/openclaw-alexa-bridge/test_fixes.js +77 -0
- package/package.json +120 -29
- package/package.json.tmp +0 -0
- package/qna/TMLPD_QNA.md +3 -3
- package/skill/SKILL.md +2 -2
- package/src/__tests__/integration/tmpld_integration.test.py +540 -0
- package/src/agents/skill_enhanced_agent.py +318 -0
- package/src/memory/__init__.py +15 -0
- package/src/memory/agentic_memory.py +353 -0
- package/src/memory/semantic_memory.py +444 -0
- package/src/memory/simple_memory.py +466 -0
- package/src/memory/working_memory.py +447 -0
- package/src/orchestration/__init__.py +52 -0
- package/src/orchestration/execution_engine.py +353 -0
- package/src/orchestration/halo_orchestrator.py +367 -0
- package/src/orchestration/mcts_workflow.py +498 -0
- package/src/orchestration/role_assigner.py +473 -0
- package/src/orchestration/task_planner.py +522 -0
- package/src/providers/__init__.py +67 -0
- package/src/providers/anthropic.py +304 -0
- package/src/providers/base.py +241 -0
- package/src/providers/cerebras.py +373 -0
- package/src/providers/registry.py +476 -0
- package/src/routing/__init__.py +30 -0
- package/src/routing/universal_router.py +621 -0
- package/src/skills/TMLPD-QUICKREF.md +210 -0
- package/src/skills/TMLPD-SETUP-SUMMARY.md +157 -0
- package/src/skills/TMLPD.md +540 -0
- package/src/skills/__tests__/skill_manager.test.ts +328 -0
- package/src/skills/skill_manager.py +385 -0
- package/src/skills/test-tmlpd.sh +108 -0
- package/src/skills/tmlpd-category.yaml +67 -0
- package/src/skills/tmlpd-monitoring.yaml +188 -0
- package/src/skills/tmlpd-phase.yaml +132 -0
- package/src/state/__init__.py +17 -0
- package/src/state/simple_checkpoint.py +508 -0
- package/src/tmlpd_agent.py +464 -0
- package/src/tmpld_v2.py +427 -0
- package/src/workflows/__init__.py +18 -0
- package/src/workflows/advanced_difficulty_classifier.py +377 -0
- package/src/workflows/chaining_executor.py +417 -0
- package/src/workflows/difficulty_integration.py +209 -0
- package/src/workflows/orchestrator.py +469 -0
- package/src/workflows/orchestrator_executor.py +456 -0
- package/src/workflows/parallelization_executor.py +382 -0
- package/src/workflows/router.py +311 -0
- package/test_integration_simple.py +86 -0
- package/test_mcts_workflow.py +150 -0
- package/test_templd_integration.py +262 -0
- package/test_universal_router.py +275 -0
- package/tmlpd-pi-extension/README.md +36 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cli.js +59 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
- package/tmlpd-pi-extension/dist/index.d.ts +723 -0
- package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/index.js +239 -0
- package/tmlpd-pi-extension/dist/index.js.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
- package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
- package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
- package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
- package/tmlpd-pi-extension/package-lock.json +75 -0
- package/tmlpd-pi-extension/package.json +172 -0
- package/tmlpd-pi-extension/python/examples.py +53 -0
- package/tmlpd-pi-extension/python/integrations.py +330 -0
- package/tmlpd-pi-extension/python/setup.py +28 -0
- package/tmlpd-pi-extension/python/tmlpd.py +369 -0
- package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
- package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
- package/tmlpd-pi-extension/skill/SKILL.md +238 -0
- package/{src → tmlpd-pi-extension/src}/index.ts +1 -1
- package/tmlpd-pi-extension/tsconfig.json +18 -0
- package/demo/research-demo.js +0 -266
- package/notebooks/quickstart.ipynb +0 -157
- package/rust/tmlpd.h +0 -268
- package/src/cache/prefixCache.ts +0 -365
- package/src/routing/advancedRouter.ts +0 -406
- package/src/utils/speculativeDecoding.ts +0 -344
- /package/{src → tmlpd-pi-extension/src}/cache/responseCache.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/cost/costTracker.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/memory/episodicMemory.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/orchestration/haloOrchestrator.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/orchestration/mctsWorkflow.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/providers/localProvider.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/providers/registry.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/tools/tmlpdTools.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/batchProcessor.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/compression.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/reliability.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/tokenUtils.ts +0 -0
package/rust/tmlpd.h
DELETED
|
@@ -1,268 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* TMLPD Rust Bindings - FFI Interface
|
|
3
|
-
*
|
|
4
|
-
* High-performance Rust library for TMLPD operations.
|
|
5
|
-
* Enables zero-overhead integration with Rust projects.
|
|
6
|
-
*
|
|
7
|
-
* Build: cargo build --release
|
|
8
|
-
* Use: npm install tmlpd-pi (Rust bindings auto-included)
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
#include <stdint.h>
|
|
12
|
-
#include <stddef.h>
|
|
13
|
-
#include <stdbool.h>
|
|
14
|
-
|
|
15
|
-
#ifdef _WIN32
|
|
16
|
-
#define TMLPD_EXPORT __declspec(dllexport)
|
|
17
|
-
#else
|
|
18
|
-
#define TMLPD_EXPORT
|
|
19
|
-
#endif
|
|
20
|
-
|
|
21
|
-
// Error codes
|
|
22
|
-
typedef enum {
|
|
23
|
-
TMLPD_OK = 0,
|
|
24
|
-
TMLPD_ERR_INVALID_INPUT = 1,
|
|
25
|
-
TMLPD_ERR_PROVIDER_FAILED = 2,
|
|
26
|
-
TMLPD_ERR_TIMEOUT = 3,
|
|
27
|
-
TMLPD_ERR_NO_PROVIDERS = 4,
|
|
28
|
-
TMLPD_ERR_OUT_OF_MEMORY = 5
|
|
29
|
-
} tmlpd_error_t;
|
|
30
|
-
|
|
31
|
-
// Result structure
|
|
32
|
-
typedef struct {
|
|
33
|
-
tmlpd_error_t error;
|
|
34
|
-
char* content;
|
|
35
|
-
uint32_t tokens_used;
|
|
36
|
-
double cost_usd;
|
|
37
|
-
uint64_t duration_ms;
|
|
38
|
-
bool cached;
|
|
39
|
-
} tmlpd_result_t;
|
|
40
|
-
|
|
41
|
-
// Config structure
|
|
42
|
-
typedef struct {
|
|
43
|
-
uint32_t max_concurrent;
|
|
44
|
-
uint32_t cache_ttl_seconds;
|
|
45
|
-
double daily_budget_usd;
|
|
46
|
-
double retry_base_delay_ms;
|
|
47
|
-
double retry_jitter;
|
|
48
|
-
} tmlpd_config_t;
|
|
49
|
-
|
|
50
|
-
// Initialize TMLPD with config
|
|
51
|
-
TMLPD_EXPORT tmlpd_error_t tmlpd_init(tmlpd_config_t* config);
|
|
52
|
-
|
|
53
|
-
// Execute single prompt
|
|
54
|
-
TMLPD_EXPORT tmlpd_result_t* tmlpd_execute(
|
|
55
|
-
const char* prompt,
|
|
56
|
-
const char* model,
|
|
57
|
-
double timeout_ms
|
|
58
|
-
);
|
|
59
|
-
|
|
60
|
-
// Execute parallel across multiple models
|
|
61
|
-
TMLPD_EXPORT tmlpd_result_t** tmlpd_execute_parallel(
|
|
62
|
-
const char* prompt,
|
|
63
|
-
const char** models,
|
|
64
|
-
uint32_t model_count,
|
|
65
|
-
double timeout_ms,
|
|
66
|
-
uint32_t* result_count
|
|
67
|
-
);
|
|
68
|
-
|
|
69
|
-
// Token counting (no API call needed)
|
|
70
|
-
TMLPD_EXPORT uint32_t tmlpd_count_tokens(
|
|
71
|
-
const char* text,
|
|
72
|
-
const char* model
|
|
73
|
-
);
|
|
74
|
-
|
|
75
|
-
// Estimate cost before execution
|
|
76
|
-
TMLPD_EXPORT double tmlpd_estimate_cost(
|
|
77
|
-
uint32_t prompt_tokens,
|
|
78
|
-
uint32_t completion_tokens,
|
|
79
|
-
const char* model
|
|
80
|
-
);
|
|
81
|
-
|
|
82
|
-
// ISON compression
|
|
83
|
-
TMLPD_EXPORT char* tmlpd_ison_encode(const char* text);
|
|
84
|
-
TMLPD_EXPORT char* tmlpd_ison_decode(const char* encoded);
|
|
85
|
-
|
|
86
|
-
// Memory operations
|
|
87
|
-
TMLPD_EXPORT char* tmlpd_store_episode(
|
|
88
|
-
const char* task_desc,
|
|
89
|
-
const char* result,
|
|
90
|
-
const char* model,
|
|
91
|
-
double cost
|
|
92
|
-
);
|
|
93
|
-
|
|
94
|
-
TMLPD_EXPORT char* tmlpd_query_similar(
|
|
95
|
-
const char* task_desc,
|
|
96
|
-
uint32_t limit
|
|
97
|
-
);
|
|
98
|
-
|
|
99
|
-
// Cleanup
|
|
100
|
-
TMLPD_EXPORT void tmlpd_free_result(tmlpd_result_t* result);
|
|
101
|
-
TMLPD_EXPORT void tmlpd_free_string(char* str);
|
|
102
|
-
TMLPD_EXPORT void tmlpd_shutdown(void);
|
|
103
|
-
|
|
104
|
-
// ============================================
|
|
105
|
-
// Implementation stubs (for demonstration)
|
|
106
|
-
// In production, these call actual Rust lib
|
|
107
|
-
// ============================================
|
|
108
|
-
|
|
109
|
-
#ifdef TMLPD_IMPLEMENTATION
|
|
110
|
-
|
|
111
|
-
#include <stdlib.h>
|
|
112
|
-
#include <string.h>
|
|
113
|
-
#include <time.h>
|
|
114
|
-
|
|
115
|
-
static tmlpd_config_t g_config = {0};
|
|
116
|
-
static bool g_initialized = false;
|
|
117
|
-
|
|
118
|
-
TMLPD_EXPORT tmlpd_error_t tmlpd_init(tmlpd_config_t* config) {
|
|
119
|
-
if (!config) return TMLPD_ERR_INVALID_INPUT;
|
|
120
|
-
memcpy(&g_config, config, sizeof(tmlpd_config_t));
|
|
121
|
-
g_initialized = true;
|
|
122
|
-
return TMLPD_OK;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
TMLPD_EXPORT tmlpd_result_t* tmlpd_execute(
|
|
126
|
-
const char* prompt,
|
|
127
|
-
const char* model,
|
|
128
|
-
double timeout_ms
|
|
129
|
-
) {
|
|
130
|
-
if (!g_initialized || !prompt || !model) return NULL;
|
|
131
|
-
|
|
132
|
-
tmlpd_result_t* result = (tmlpd_result_t*)malloc(sizeof(tmlpd_result_t));
|
|
133
|
-
if (!result) return NULL;
|
|
134
|
-
|
|
135
|
-
// Simulate execution
|
|
136
|
-
result->error = TMLPD_OK;
|
|
137
|
-
result->tokens_used = (uint32_t)(strlen(prompt) / 4);
|
|
138
|
-
result->cost_usd = result->tokens_used * 0.00001;
|
|
139
|
-
result->duration_ms = (uint64_t)(rand() % 1000 + 100);
|
|
140
|
-
result->cached = false;
|
|
141
|
-
|
|
142
|
-
// Allocate and fill content
|
|
143
|
-
size_t content_len = strlen(prompt) + 20;
|
|
144
|
-
result->content = (char*)malloc(content_len);
|
|
145
|
-
snprintf(result->content, content_len, "[TMLPD Rust] Processed: %s", prompt);
|
|
146
|
-
|
|
147
|
-
return result;
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
TMLPD_EXPORT tmlpd_result_t** tmlpd_execute_parallel(
|
|
151
|
-
const char* prompt,
|
|
152
|
-
const char** models,
|
|
153
|
-
uint32_t model_count,
|
|
154
|
-
double timeout_ms,
|
|
155
|
-
uint32_t* result_count
|
|
156
|
-
) {
|
|
157
|
-
if (!result_count || model_count == 0) return NULL;
|
|
158
|
-
*result_count = model_count;
|
|
159
|
-
|
|
160
|
-
tmlpd_result_t** results = (tmlpd_result_t**)malloc(
|
|
161
|
-
sizeof(tmlpd_result_t*) * model_count
|
|
162
|
-
);
|
|
163
|
-
|
|
164
|
-
for (uint32_t i = 0; i < model_count; i++) {
|
|
165
|
-
results[i] = tmlpd_execute(prompt, models[i], timeout_ms);
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
return results;
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
TMLPD_EXPORT uint32_t tmlpd_count_tokens(const char* text, const char* model) {
|
|
172
|
-
if (!text) return 0;
|
|
173
|
-
// Simple word-based approximation
|
|
174
|
-
uint32_t words = 0;
|
|
175
|
-
for (const char* p = text; *p; p++) {
|
|
176
|
-
if (*p == ' ') words++;
|
|
177
|
-
}
|
|
178
|
-
return (words + 1) * 13 / 10; // ~1.3 tokens per word
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
TMLPD_EXPORT double tmlpd_estimate_cost(
|
|
182
|
-
uint32_t prompt_tokens,
|
|
183
|
-
uint32_t completion_tokens,
|
|
184
|
-
const char* model
|
|
185
|
-
) {
|
|
186
|
-
// Default GPT-4 pricing
|
|
187
|
-
double input_rate = 0.0025 / 1000; // $2.50/1M
|
|
188
|
-
double output_rate = 0.01 / 1000; // $10/1M
|
|
189
|
-
|
|
190
|
-
if (strstr(model, "claude")) {
|
|
191
|
-
input_rate = 0.003 / 1000;
|
|
192
|
-
output_rate = 0.015 / 1000;
|
|
193
|
-
} else if (strstr(model, "gemini")) {
|
|
194
|
-
input_rate = 0.000075 / 1000;
|
|
195
|
-
output_rate = 0.0003 / 1000;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
return prompt_tokens * input_rate + completion_tokens * output_rate;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
TMLPD_EXPORT char* tmlpd_ison_encode(const char* text) {
|
|
202
|
-
if (!text) return NULL;
|
|
203
|
-
|
|
204
|
-
// Remove common words
|
|
205
|
-
size_t len = strlen(text) + 1;
|
|
206
|
-
char* result = (char*)malloc(len);
|
|
207
|
-
strcpy(result, text);
|
|
208
|
-
|
|
209
|
-
// Simple ISON: remove articles
|
|
210
|
-
const char* articles[] = {" the ", " a ", " an ", " The ", " A ", " An "};
|
|
211
|
-
for (int i = 0; i < 6; i++) {
|
|
212
|
-
char* pos;
|
|
213
|
-
while ((pos = strstr(result, articles[i])) != NULL) {
|
|
214
|
-
memmove(pos, pos + strlen(articles[i]),
|
|
215
|
-
strlen(pos + strlen(articles[i])) + 1);
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
return result;
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
TMLPD_EXPORT char* tmlpd_ison_decode(const char* encoded) {
|
|
223
|
-
// In production, reverse ISON encoding
|
|
224
|
-
if (!encoded) return NULL;
|
|
225
|
-
char* result = (char*)malloc(strlen(encoded) + 10);
|
|
226
|
-
sprintf(result, "The %s", encoded);
|
|
227
|
-
return result;
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
TMLPD_EXPORT char* tmlpd_store_episode(
|
|
231
|
-
const char* task_desc,
|
|
232
|
-
const char* result,
|
|
233
|
-
const char* model,
|
|
234
|
-
double cost
|
|
235
|
-
) {
|
|
236
|
-
// In production, store in episodic memory
|
|
237
|
-
(void)task_desc; (void)result; (void)model; (void)cost;
|
|
238
|
-
char* id = (char*)malloc(16);
|
|
239
|
-
snprintf(id, 16, "ep_%ld", time(NULL));
|
|
240
|
-
return id;
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
TMLPD_EXPORT char* tmlpd_query_similar(
|
|
244
|
-
const char* task_desc,
|
|
245
|
-
uint32_t limit
|
|
246
|
-
) {
|
|
247
|
-
(void)task_desc; (void)limit;
|
|
248
|
-
char* result = (char*)malloc(32);
|
|
249
|
-
strcpy(result, "[]"); // Empty array
|
|
250
|
-
return result;
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
TMLPD_EXPORT void tmlpd_free_result(tmlpd_result_t* result) {
|
|
254
|
-
if (result) {
|
|
255
|
-
if (result->content) free(result->content);
|
|
256
|
-
free(result);
|
|
257
|
-
}
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
TMLPD_EXPORT void tmlpd_free_string(char* str) {
|
|
261
|
-
if (str) free(str);
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
TMLPD_EXPORT void tmlpd_shutdown(void) {
|
|
265
|
-
g_initialized = false;
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
#endif // TMLPD_IMPLEMENTATION
|
package/src/cache/prefixCache.ts
DELETED
|
@@ -1,365 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* TMLPD Prefix Cache - RadixAttention Style
|
|
3
|
-
*
|
|
4
|
-
* Inspired by SGLang's RadixAttention (arXiv:2312.07104)
|
|
5
|
-
* Caches KV states for common prefixes (system prompts, etc.)
|
|
6
|
-
* 5-10x speedup for repeated prompt patterns
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
export interface CacheEntry {
|
|
10
|
-
key: string; // Hash of the prefix
|
|
11
|
-
prefix: string; // Original prefix text
|
|
12
|
-
kv_state?: Buffer; // Cached KV state (if using actual KV cache)
|
|
13
|
-
response_hash?: string; // Hash of cached response
|
|
14
|
-
hit_count: number; // Times this prefix was used
|
|
15
|
-
last_used: number; // Timestamp
|
|
16
|
-
token_count: number; // Tokens in this prefix
|
|
17
|
-
children: Map<string, string>; // child_key -> child_cache_key
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export interface PrefixCacheStats {
|
|
21
|
-
total_entries: number;
|
|
22
|
-
total_hits: number;
|
|
23
|
-
total_misses: number;
|
|
24
|
-
hit_rate: number;
|
|
25
|
-
memory_estimate_mb: number;
|
|
26
|
-
oldest_entry_age_ms: number;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
export class PrefixCache {
|
|
30
|
-
private entries: Map<string, CacheEntry> = new Map();
|
|
31
|
-
private access_order: string[] = []; // LRU tracking
|
|
32
|
-
private max_entries: number;
|
|
33
|
-
private max_memory_mb: number;
|
|
34
|
-
|
|
35
|
-
constructor(options?: {
|
|
36
|
-
max_entries?: number;
|
|
37
|
-
max_memory_mb?: number;
|
|
38
|
-
}) {
|
|
39
|
-
this.max_entries = options?.max_entries || 10000;
|
|
40
|
-
this.max_memory_mb = options?.max_memory_mb || 512;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
/**
|
|
44
|
-
* Generate cache key from text prefix
|
|
45
|
-
*/
|
|
46
|
-
private generateKey(text: string, model?: string): string {
|
|
47
|
-
// Simple hash for now - in production use SHA-256
|
|
48
|
-
const normalized = text.toLowerCase().trim().substring(0, 500);
|
|
49
|
-
const str = `${model || "default"}:${normalized}`;
|
|
50
|
-
|
|
51
|
-
let hash = 0;
|
|
52
|
-
for (let i = 0; i < str.length; i++) {
|
|
53
|
-
const char = str.charCodeAt(i);
|
|
54
|
-
hash = ((hash << 5) - hash) + char;
|
|
55
|
-
hash = hash & hash; // Convert to 32bit integer
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
return `pc_${Math.abs(hash).toString(16)}`;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
/**
|
|
62
|
-
* Check if prefix is cached
|
|
63
|
-
*/
|
|
64
|
-
has(prefix: string, model?: string): boolean {
|
|
65
|
-
const key = this.generateKey(prefix, model);
|
|
66
|
-
return this.entries.has(key);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
/**
|
|
70
|
-
* Get cached entry
|
|
71
|
-
*/
|
|
72
|
-
get(prefix: string, model?: string): CacheEntry | undefined {
|
|
73
|
-
const key = this.generateKey(prefix, model);
|
|
74
|
-
const entry = this.entries.get(key);
|
|
75
|
-
|
|
76
|
-
if (entry) {
|
|
77
|
-
// Update LRU
|
|
78
|
-
this.updateLRU(key);
|
|
79
|
-
entry.hit_count++;
|
|
80
|
-
entry.last_used = Date.now();
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
return entry;
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
/**
|
|
87
|
-
* Store a new prefix with its KV state
|
|
88
|
-
*/
|
|
89
|
-
store(
|
|
90
|
-
prefix: string,
|
|
91
|
-
options?: {
|
|
92
|
-
kv_state?: Buffer;
|
|
93
|
-
response_hash?: string;
|
|
94
|
-
model?: string;
|
|
95
|
-
children?: Map<string, string>;
|
|
96
|
-
}
|
|
97
|
-
): string {
|
|
98
|
-
const key = this.generateKey(prefix, options?.model);
|
|
99
|
-
|
|
100
|
-
// Check if already exists
|
|
101
|
-
if (this.entries.has(key)) {
|
|
102
|
-
const existing = this.entries.get(key)!;
|
|
103
|
-
existing.hit_count++;
|
|
104
|
-
existing.last_used = Date.now();
|
|
105
|
-
return key;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Estimate memory
|
|
109
|
-
const token_count = Math.ceil(prefix.split(/\s+/).length * 1.3);
|
|
110
|
-
const memory_bytes = token_count * 16 * 128 * 2; // Rough KV estimate
|
|
111
|
-
const memory_mb = memory_bytes / (1024 * 1024);
|
|
112
|
-
|
|
113
|
-
const entry: CacheEntry = {
|
|
114
|
-
key,
|
|
115
|
-
prefix: prefix.substring(0, 1000), // Store truncated
|
|
116
|
-
kv_state: options?.kv_state,
|
|
117
|
-
response_hash: options?.response_hash,
|
|
118
|
-
hit_count: 1,
|
|
119
|
-
last_used: Date.now(),
|
|
120
|
-
token_count,
|
|
121
|
-
children: options?.children || new Map()
|
|
122
|
-
};
|
|
123
|
-
|
|
124
|
-
// Evict if necessary
|
|
125
|
-
while (this.entries.size >= this.max_entries || this.getMemoryUsage() + memory_mb > this.max_memory_mb) {
|
|
126
|
-
this.evictLRU();
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
this.entries.set(key, entry);
|
|
130
|
-
this.access_order.push(key);
|
|
131
|
-
|
|
132
|
-
return key;
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
/**
|
|
136
|
-
* Extend cached prefix with completion
|
|
137
|
-
*/
|
|
138
|
-
extend(
|
|
139
|
-
prefix: string,
|
|
140
|
-
completion: string,
|
|
141
|
-
options?: { model?: string }
|
|
142
|
-
): string {
|
|
143
|
-
const prefix_key = this.generateKey(prefix, options?.model);
|
|
144
|
-
const parent = this.entries.get(prefix_key);
|
|
145
|
-
|
|
146
|
-
if (!parent) {
|
|
147
|
-
// No parent - just store completion as new entry
|
|
148
|
-
return this.store(completion, { model: options?.model });
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
// Create child entry for the extended sequence
|
|
152
|
-
const extended = prefix + completion;
|
|
153
|
-
const child_key = this.store(extended, { model: options?.model });
|
|
154
|
-
|
|
155
|
-
// Link child to parent
|
|
156
|
-
const completion_key = this.generateKey(completion);
|
|
157
|
-
parent.children.set(completion_key, child_key);
|
|
158
|
-
|
|
159
|
-
return child_key;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
/**
|
|
163
|
-
* Find common prefix between two texts
|
|
164
|
-
*/
|
|
165
|
-
findCommonPrefix(text1: string, text2: string): string {
|
|
166
|
-
const words1 = text1.split(/\s+/);
|
|
167
|
-
const words2 = text2.split(/\s+/);
|
|
168
|
-
|
|
169
|
-
let common_length = 0;
|
|
170
|
-
for (let i = 0; i < Math.min(words1.length, words2.length); i++) {
|
|
171
|
-
if (words1[i].toLowerCase() === words2[i].toLowerCase()) {
|
|
172
|
-
common_length = i + 1;
|
|
173
|
-
} else {
|
|
174
|
-
break;
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
return words1.slice(0, common_length).join(" ");
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
/**
|
|
182
|
-
* Lookup with prefix matching
|
|
183
|
-
* Returns cached entry if any prefix is found
|
|
184
|
-
*/
|
|
185
|
-
lookup(text: string, model?: string): { cached: boolean; prefix?: string; remaining?: string } {
|
|
186
|
-
// Try exact match first
|
|
187
|
-
const exact_key = this.generateKey(text, model);
|
|
188
|
-
if (this.entries.has(exact_key)) {
|
|
189
|
-
return { cached: true };
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// Try progressively shorter prefixes
|
|
193
|
-
const words = text.split(/\s+/);
|
|
194
|
-
for (let len = words.length - 1; len >= 5; len--) { // Min 5 words
|
|
195
|
-
const prefix = words.slice(0, len).join(" ");
|
|
196
|
-
const key = this.generateKey(prefix, model);
|
|
197
|
-
|
|
198
|
-
if (this.entries.has(key)) {
|
|
199
|
-
const remaining = words.slice(len).join(" ");
|
|
200
|
-
return { cached: true, prefix, remaining };
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
return { cached: false };
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
/**
|
|
208
|
-
* Batch lookup for multiple texts
|
|
209
|
-
*/
|
|
210
|
-
lookupBatch(texts: string[], model?: string): Array<{ cached: boolean; prefix?: string; remaining?: string }> {
|
|
211
|
-
return texts.map(t => this.lookup(t, model));
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
/**
|
|
215
|
-
* Get cache statistics
|
|
216
|
-
*/
|
|
217
|
-
getStats(): PrefixCacheStats {
|
|
218
|
-
const now = Date.now();
|
|
219
|
-
let oldest_age = 0;
|
|
220
|
-
let total_hits = 0;
|
|
221
|
-
|
|
222
|
-
for (const entry of this.entries.values()) {
|
|
223
|
-
total_hits += entry.hit_count;
|
|
224
|
-
const age = now - entry.last_used;
|
|
225
|
-
if (age > oldest_age) oldest_age = age;
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
const total_requests = total_hits + this.entries.size; // Approximate
|
|
229
|
-
const hit_rate = total_requests > 0 ? total_hits / total_requests : 0;
|
|
230
|
-
|
|
231
|
-
return {
|
|
232
|
-
total_entries: this.entries.size,
|
|
233
|
-
total_hits: total_hits,
|
|
234
|
-
total_misses: this.entries.size, // Approximate
|
|
235
|
-
hit_rate,
|
|
236
|
-
memory_estimate_mb: this.getMemoryUsage(),
|
|
237
|
-
oldest_entry_age_ms: oldest_age
|
|
238
|
-
};
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
/**
|
|
242
|
-
* Get estimated memory usage
|
|
243
|
-
*/
|
|
244
|
-
private getMemoryUsage(): number {
|
|
245
|
-
let total_bytes = 0;
|
|
246
|
-
|
|
247
|
-
for (const entry of this.entries.values()) {
|
|
248
|
-
// Base entry overhead
|
|
249
|
-
total_bytes += 200;
|
|
250
|
-
|
|
251
|
-
// Prefix text
|
|
252
|
-
total_bytes += entry.prefix.length * 2;
|
|
253
|
-
|
|
254
|
-
// KV state (if stored)
|
|
255
|
-
if (entry.kv_state) {
|
|
256
|
-
total_bytes += entry.kv_state.length;
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
// Children map
|
|
260
|
-
total_bytes += entry.children.size * 50;
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
return total_bytes / (1024 * 1024);
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
/**
|
|
267
|
-
* Update LRU order
|
|
268
|
-
*/
|
|
269
|
-
private updateLRU(key: string): void {
|
|
270
|
-
const index = this.access_order.indexOf(key);
|
|
271
|
-
if (index > -1) {
|
|
272
|
-
this.access_order.splice(index, 1);
|
|
273
|
-
}
|
|
274
|
-
this.access_order.push(key);
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
/**
|
|
278
|
-
* Evict least recently used entry
|
|
279
|
-
*/
|
|
280
|
-
private evictLRU(): boolean {
|
|
281
|
-
if (this.access_order.length === 0) return false;
|
|
282
|
-
|
|
283
|
-
const lru_key = this.access_order.shift()!;
|
|
284
|
-
const entry = this.entries.get(lru_key);
|
|
285
|
-
|
|
286
|
-
if (entry) {
|
|
287
|
-
// If has children, re-parent them
|
|
288
|
-
for (const [child_key, child_cache_key] of entry.children) {
|
|
289
|
-
const child = this.entries.get(child_cache_key);
|
|
290
|
-
if (child) {
|
|
291
|
-
// Promote child to standalone
|
|
292
|
-
this.access_order.push(child_cache_key);
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
this.entries.delete(lru_key);
|
|
297
|
-
return true;
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
return false;
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
/**
|
|
304
|
-
* Clear all cache
|
|
305
|
-
*/
|
|
306
|
-
clear(): void {
|
|
307
|
-
this.entries.clear();
|
|
308
|
-
this.access_order = [];
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
/**
|
|
312
|
-
* Invalidate entries matching pattern
|
|
313
|
-
*/
|
|
314
|
-
invalidate(pattern?: string): number {
|
|
315
|
-
let count = 0;
|
|
316
|
-
|
|
317
|
-
if (!pattern) {
|
|
318
|
-
// Clear all
|
|
319
|
-
count = this.entries.size;
|
|
320
|
-
this.clear();
|
|
321
|
-
return count;
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
// Pattern-based invalidation
|
|
325
|
-
for (const [key, entry] of this.entries) {
|
|
326
|
-
if (entry.prefix.includes(pattern)) {
|
|
327
|
-
this.entries.delete(key);
|
|
328
|
-
count++;
|
|
329
|
-
}
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
return count;
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
/**
|
|
336
|
-
* Warm up cache with common system prompts
|
|
337
|
-
*/
|
|
338
|
-
warmup(common_prefixes: string[], model?: string): void {
|
|
339
|
-
for (const prefix of common_prefixes) {
|
|
340
|
-
this.store(prefix, { model });
|
|
341
|
-
}
|
|
342
|
-
console.log(`[PrefixCache] Warmed up with ${common_prefixes.length} common prefixes`);
|
|
343
|
-
}
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
// Common system prompts that benefit from prefix caching
|
|
347
|
-
const COMMON_SYSTEM_PROMPTS = [
|
|
348
|
-
"You are a helpful assistant.",
|
|
349
|
-
"You are a coding assistant. Help with programming tasks.",
|
|
350
|
-
"You are an expert data scientist.",
|
|
351
|
-
"You are a senior software engineer.",
|
|
352
|
-
"Analyze the following code and provide feedback.",
|
|
353
|
-
"Explain this concept in simple terms.",
|
|
354
|
-
"Write clean, well-documented code.",
|
|
355
|
-
"Think step by step and explain your reasoning."
|
|
356
|
-
];
|
|
357
|
-
|
|
358
|
-
export default PrefixCache;
|
|
359
|
-
|
|
360
|
-
// Utility function for creating pre-warmed cache
|
|
361
|
-
export function createWarmedCache(): PrefixCache {
|
|
362
|
-
const cache = new PrefixCache({ max_entries: 5000 });
|
|
363
|
-
cache.warmup(COMMON_SYSTEM_PROMPTS);
|
|
364
|
-
return cache;
|
|
365
|
-
}
|