adaptive-memory-multi-model-router 2.14.49 → 2.14.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (605) hide show
  1. package/.dockerignore +82 -0
  2. package/.env.example +303 -0
  3. package/.github/DISCUSSIONS_WELCOME.md +27 -0
  4. package/.github/DISCUSSION_TEMPLATE.yml +5 -0
  5. package/.github/FUNDING.yml +2 -0
  6. package/.github/ISSUE_TEMPLATE/bug_report.md +94 -0
  7. package/.github/ISSUE_TEMPLATE/config.yml +17 -0
  8. package/.github/ISSUE_TEMPLATE/feature_request.md +71 -0
  9. package/.github/PULL_REQUEST_TEMPLATE.md +71 -0
  10. package/.github/dependabot.yml +9 -0
  11. package/.github/workflows/ci.yml +263 -0
  12. package/.github/workflows/codeql.yml +38 -0
  13. package/.github/workflows/npm-publish.yml +20 -0
  14. package/.github/workflows/pages.yml +37 -0
  15. package/.github/workflows/stale.yml +54 -0
  16. package/.publish-tick +1 -0
  17. package/.well-known/ai-plugin.json +16 -0
  18. package/AGENT_COUNCIL_FINDINGS.md +142 -0
  19. package/ARCHITECTURE.md +346 -0
  20. package/AUDIT_REPORT.md +28 -0
  21. package/CODE_OF_CONDUCT.md +128 -0
  22. package/CONTRIBUTING.md +50 -0
  23. package/CONTRIBUTORS.md +20 -0
  24. package/Dockerfile +53 -0
  25. package/Dockerfile.proxy +33 -0
  26. package/HEALTH_REPORT.md +118 -0
  27. package/IMPROVEMENT_PLAN.md +107 -0
  28. package/LANDING.md +43 -0
  29. package/LAUNCH-PAIN-DRIVEN.md +339 -0
  30. package/LAUNCH.md +337 -0
  31. package/LAUNCH_CHECKLIST.md +141 -0
  32. package/LAUNCH_SNAPSHOT.md +260 -0
  33. package/MANIFESTO.md +41 -0
  34. package/POPULARITY_BOOSTERS.md +285 -0
  35. package/PR_STATUS_REPORT.md +148 -0
  36. package/README.md +25 -14
  37. package/REDESIGN.md +95 -0
  38. package/RUNKIT.md +83 -0
  39. package/SECURITY.md +29 -0
  40. package/SUBMISSIONS.md +43 -0
  41. package/_schema.html +53 -0
  42. package/ai-plugin.json +16 -0
  43. package/articles/AI_AGENT_LLM_ROUTING.md +150 -0
  44. package/articles/CHINESE_DIRECTORIES.md +100 -0
  45. package/articles/CHINESE_SUBMISSIONS_READY.md +322 -0
  46. package/articles/COMPETITOR_ALERTS.md +31 -0
  47. package/articles/COMPLETE_POSTING_DIRECTORY.md +147 -0
  48. package/articles/CONTENT_STRUCTURE.md +292 -0
  49. package/articles/DEVTO_COST_GUIDE.md +473 -0
  50. package/articles/DEVTO_FINAL.md +416 -0
  51. package/articles/DEVTO_MULTI_PROVIDER.md +542 -0
  52. package/articles/DEVTO_READY.md +255 -0
  53. package/articles/DEVTO_V2_ANNOUNCEMENT.md +160 -0
  54. package/articles/DEVTO_VIRAL_GROWTH.md +280 -0
  55. package/articles/FRESH_devto.md +460 -0
  56. package/articles/FRESH_devto_2026_05.md +73 -0
  57. package/articles/FRESH_hackernews.md +14 -0
  58. package/articles/FRESH_reddit_ml.md +90 -0
  59. package/articles/FRESH_reddit_node.md +198 -0
  60. package/articles/FRESH_reddit_sideproject.md +72 -0
  61. package/articles/FRESH_reddit_webdev.md +130 -0
  62. package/articles/FROM_ZERO_TO_10K.md +107 -0
  63. package/articles/HN_10X_BETTER.md +430 -0
  64. package/articles/HN_ACCOUNT_GUIDE.md +21 -0
  65. package/articles/HN_CHINESE_STYLE.md +308 -0
  66. package/articles/HN_FINAL.md +148 -0
  67. package/articles/HN_POSTED_VERSION.md +56 -0
  68. package/articles/HN_POST_READY.md +137 -0
  69. package/articles/HN_RESEARCH.md +364 -0
  70. package/articles/HN_SHOW_routerarena.md +17 -0
  71. package/articles/HN_TIMING_GUIDE.md +52 -0
  72. package/articles/INDIEHACKERS_POST.md +52 -0
  73. package/articles/INDIEHACKERS_READY.md +120 -0
  74. package/articles/LLM_BENCHMARK_DEEP_DIVE.md +153 -0
  75. package/articles/MASTER_POSTING_DIRECTORY.md +189 -0
  76. package/articles/NEWSLETTER_SEND_NOW.md +259 -0
  77. package/articles/NEWSLETTER_SUBMISSIONS.md +112 -0
  78. package/articles/PAIN-DRIVEN-devto-v2.md +308 -0
  79. package/articles/PAIN-DRIVEN-devto-v3.md +268 -0
  80. package/articles/PAIN-DRIVEN-devto.md +242 -0
  81. package/articles/PAIN-DRIVEN-hackernews-v2.md +138 -0
  82. package/articles/PAIN-DRIVEN-hackernews-v3.md +151 -0
  83. package/articles/PAIN-DRIVEN-hackernews.md +131 -0
  84. package/articles/PAIN-DRIVEN-reddit-v2.md +301 -0
  85. package/articles/PAIN-DRIVEN-reddit-v3.md +236 -0
  86. package/articles/PAIN-DRIVEN-reddit.md +218 -0
  87. package/articles/PAIN-DRIVEN-twitter-v2.md +110 -0
  88. package/articles/PAIN-DRIVEN-twitter-v3.md +121 -0
  89. package/articles/PAIN-DRIVEN-twitter.md +120 -0
  90. package/articles/PORTKEY_VS_A3M.md +147 -0
  91. package/articles/POSTING_KIT_2026_05.md +67 -0
  92. package/articles/PRESS_KIT_routerarena.md +77 -0
  93. package/articles/PRODUCTHUNT_LISTING.md +48 -0
  94. package/articles/PRODUCTHUNT_READY.md +106 -0
  95. package/articles/PR_PLAN_vault.md +125 -0
  96. package/articles/REDDIT_FINAL.md +232 -0
  97. package/articles/REDDIT_POST.md +67 -0
  98. package/articles/REDDIT_SUBMISSION_READY.md +348 -0
  99. package/articles/ROUTERARENA_9677.md +78 -0
  100. package/articles/ROUTERARENA_LEADER.md +45 -0
  101. package/articles/SHOW_HN_FINAL.md +29 -0
  102. package/articles/TWEETS_10K_DOWNLOADS.md +47 -0
  103. package/articles/TWEETS_BENCHMARK_FIRST.md +46 -0
  104. package/articles/TWEETS_MCP_PLAY.md +51 -0
  105. package/articles/TWEETS_SEQUENTIAL_BROKEN.md +49 -0
  106. package/articles/TWEETS_WHY_BUILD.md +54 -0
  107. package/articles/TWEETS_routerarena_leader.md +53 -0
  108. package/articles/TWEET_STORM_READY.md +165 -0
  109. package/articles/TWITTER_FINAL.md +167 -0
  110. package/articles/WHY_10X_BETTER.md +261 -0
  111. package/articles/WHY_CHINESE_STYLE_BETTER.md +323 -0
  112. package/articles/ai-discoverability-llm-routing.md +210 -0
  113. package/articles/devto-llm-routing.md +138 -0
  114. package/articles/hackernews-show-hn.md +54 -0
  115. package/articles/hashnode-llm-cost-optimization.md +125 -0
  116. package/articles/hn_show_2026_05.md +11 -0
  117. package/articles/medium-building-llm-router.md +205 -0
  118. package/articles/reddit-ml.md +76 -0
  119. package/articles/twitter-thread-cost-savings.md +50 -0
  120. package/articles/youtube-tutorial-script.md +262 -0
  121. package/assets/a3m_3blue1brown.mp4 +0 -0
  122. package/assets/banner.svg +109 -0
  123. package/assets/chart-cost-v2.svg +91 -0
  124. package/assets/chart-cost-v3.svg +143 -0
  125. package/assets/chart-features-v2.svg +132 -0
  126. package/assets/chart-features-v3.svg +211 -0
  127. package/assets/chart-growth-v2.svg +122 -0
  128. package/assets/chart-growth-v3.svg +189 -0
  129. package/assets/cost-comparison.svg +134 -0
  130. package/assets/cost-simple.svg +64 -0
  131. package/assets/demo-hn.gif +0 -0
  132. package/assets/feature-matrix.svg +136 -0
  133. package/assets/growth-chart-animated.svg +76 -0
  134. package/assets/growth-chart.svg +82 -0
  135. package/assets/growth-simple.svg +69 -0
  136. package/assets/hero-diagram.svg +81 -0
  137. package/assets/logo-new.svg +21 -0
  138. package/assets/logo.svg +68 -0
  139. package/assets/provider-comparison.svg +121 -0
  140. package/assets/social-preview-new.svg +100 -0
  141. package/assets/social-preview.svg +194 -0
  142. package/assets/social-v2.svg +130 -0
  143. package/assets/social-v3.svg +212 -0
  144. package/benchmark-provider-results.json +245 -0
  145. package/benchmark-results.json +54 -0
  146. package/council-votes/architecture-vote.md +121 -0
  147. package/council-votes/coverage-vote.md +93 -0
  148. package/data/adaptive-benchmark.json +92 -0
  149. package/data/benchmark-results.json +47 -0
  150. package/data/labeled-benchmark.json +88 -0
  151. package/demo/3blue1brown_video.py +285 -0
  152. package/demo/3blue1brown_video_v2.py +310 -0
  153. package/demo/IMPROVED_PROMPTS.md +229 -0
  154. package/demo/VEO3_PROMPTS.md +269 -0
  155. package/demo/VIDEO_PRODUCTION_GUIDE.md +333 -0
  156. package/demo/a3m_3blue1brown.mp4 +0 -0
  157. package/demo/asciinema-demo.sh +195 -0
  158. package/demo/demo-hn.tape +74 -0
  159. package/demo/demo-script.md +53 -0
  160. package/demo/demo-script.sh +62 -0
  161. package/demo/demo.svg +75 -0
  162. package/demo/frame1_ai_data_center.png +0 -0
  163. package/demo/frame1_sunset_video.mp4 +0 -0
  164. package/demo/frame2_cost_comparison.png +0 -0
  165. package/demo/frame2_cost_comparison_fallback.png +0 -0
  166. package/demo/frame3_parallel_execution.png +0 -0
  167. package/demo/frame3_parallel_execution_fallback.png +0 -0
  168. package/demo/frame4_providers.png +0 -0
  169. package/demo/frame4_providers_fallback.png +0 -0
  170. package/demo/frame5_endcard.png +0 -0
  171. package/demo/frame5_endcard_fallback.png +0 -0
  172. package/demo/new_frame1_hook.png +0 -0
  173. package/demo/new_frame2_proof.png +0 -0
  174. package/demo/new_frame3_wow.png +0 -0
  175. package/demo/new_frame4_social.png +0 -0
  176. package/demo/new_frame5_cta.png +0 -0
  177. package/demo/package.json +13 -0
  178. package/demo/product-video-final.mp4 +0 -0
  179. package/demo/product-video-hype-v1.mp4 +0 -0
  180. package/demo/product-video-v1.mp4 +0 -0
  181. package/demo/public/index.html +762 -0
  182. package/demo/recording.cast +55 -0
  183. package/demo/server.js +405 -0
  184. package/demo-new.tape +71 -0
  185. package/demo-real.sh +198 -0
  186. package/demo-simple.tape +205 -0
  187. package/demo.html +520 -0
  188. package/demo.sh +85 -0
  189. package/demo.tape +259 -0
  190. package/dist/analytics/costAnalytics.d.ts.map +1 -0
  191. package/dist/analytics/costAnalytics.js.map +1 -0
  192. package/dist/benchmark/comprehensive.js.map +1 -0
  193. package/dist/benchmark/reproducible.d.ts.map +1 -0
  194. package/dist/benchmark/reproducible.js.map +1 -0
  195. package/dist/cache/prefixCache.d.ts.map +1 -0
  196. package/dist/cache/prefixCache.js.map +1 -0
  197. package/dist/cache/responseCache.d.ts.map +1 -0
  198. package/dist/cache/responseCache.js.map +1 -0
  199. package/dist/cache/semanticCache.d.ts.map +1 -0
  200. package/dist/cache/semanticCache.js.map +1 -0
  201. package/dist/cli/setupWizard.d.ts.map +1 -0
  202. package/dist/cli/setupWizard.js.map +1 -0
  203. package/dist/cost/budgetEnforcer.d.ts.map +1 -0
  204. package/dist/cost/budgetEnforcer.js.map +1 -0
  205. package/dist/cost/costTracker.d.ts.map +1 -0
  206. package/dist/cost/costTracker.js.map +1 -0
  207. package/dist/ensemble/multiRoundDialog.js.map +1 -0
  208. package/dist/ensemble/shapleyValue.js.map +1 -0
  209. package/dist/integrations/langchainAdapter.d.ts.map +1 -0
  210. package/dist/integrations/langchainAdapter.js.map +1 -0
  211. package/dist/integrations/oauth.d.ts.map +1 -0
  212. package/dist/integrations/oauth.js.map +1 -0
  213. package/dist/integrations/scienceAdapter.js.map +1 -0
  214. package/dist/memory/autoFetch.d.ts.map +1 -0
  215. package/dist/memory/autoFetch.js.map +1 -0
  216. package/dist/memory/episodicMemory.d.ts.map +1 -0
  217. package/dist/memory/episodicMemory.js.map +1 -0
  218. package/dist/memory/hybridMemory.js.map +1 -0
  219. package/dist/memory/memoryTree.d.ts.map +1 -0
  220. package/dist/memory/memoryTree.js.map +1 -0
  221. package/dist/memory/obsidianVault.d.ts.map +1 -0
  222. package/dist/memory/obsidianVault.js.map +1 -0
  223. package/dist/memory/reasoningBank.js.map +1 -0
  224. package/dist/observability/changeWatch.d.ts.map +1 -0
  225. package/dist/observability/changeWatch.js.map +1 -0
  226. package/dist/observability/fatigueDetector.d.ts.map +1 -0
  227. package/dist/observability/fatigueDetector.js.map +1 -0
  228. package/dist/observability/index.d.ts.map +1 -0
  229. package/dist/observability/index.js.map +1 -0
  230. package/dist/observability/metrics.d.ts.map +1 -0
  231. package/dist/observability/metrics.js.map +1 -0
  232. package/dist/observability/middleware.d.ts.map +1 -0
  233. package/dist/observability/middleware.js.map +1 -0
  234. package/dist/observability/tracer.d.ts.map +1 -0
  235. package/dist/observability/tracer.js.map +1 -0
  236. package/dist/observability/types.d.ts.map +1 -0
  237. package/dist/observability/types.js.map +1 -0
  238. package/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
  239. package/dist/orchestration/haloOrchestrator.js.map +1 -0
  240. package/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
  241. package/dist/orchestration/mctsWorkflow.js.map +1 -0
  242. package/dist/providers/localProvider.d.ts.map +1 -0
  243. package/dist/providers/localProvider.js.map +1 -0
  244. package/dist/providers/providerConfig.d.ts.map +1 -0
  245. package/dist/providers/providerConfig.js.map +1 -0
  246. package/dist/providers/registry.d.ts.map +1 -0
  247. package/dist/providers/registry.js.map +1 -0
  248. package/dist/routing/advancedRouter.d.ts.map +1 -0
  249. package/dist/routing/advancedRouter.js +1 -1
  250. package/dist/routing/advancedRouter.js.map +1 -0
  251. package/dist/routing/crossModelValidation.d.ts.map +1 -0
  252. package/dist/routing/crossModelValidation.js.map +1 -0
  253. package/dist/routing/providerHealth.d.ts.map +1 -0
  254. package/dist/routing/providerHealth.js.map +1 -0
  255. package/dist/routing/providerRetry.d.ts.map +1 -0
  256. package/dist/routing/providerRetry.js.map +1 -0
  257. package/dist/scripts/banner.js +29 -0
  258. package/dist/security/guardrails.d.ts.map +1 -0
  259. package/dist/security/guardrails.js.map +1 -0
  260. package/dist/server/dashboard.d.ts.map +1 -0
  261. package/dist/server/dashboard.js.map +1 -0
  262. package/dist/server/modelMapper.d.ts.map +1 -0
  263. package/dist/server/modelMapper.js.map +1 -0
  264. package/dist/server/proxyServer.d.ts.map +1 -0
  265. package/dist/server/proxyServer.js.map +1 -0
  266. package/dist/skills/__tests__/skill_manager.test.d.ts +2 -0
  267. package/dist/skills/__tests__/skill_manager.test.d.ts.map +1 -0
  268. package/dist/skills/__tests__/skill_manager.test.js +268 -0
  269. package/dist/skills/__tests__/skill_manager.test.js.map +1 -0
  270. package/dist/tools/tmlpdTools.d.ts.map +1 -0
  271. package/dist/tools/tmlpdTools.js.map +1 -0
  272. package/dist/tui/dashboard.d.ts.map +1 -0
  273. package/dist/tui/dashboard.js.map +1 -0
  274. package/dist/tui/index.d.ts.map +1 -0
  275. package/dist/tui/index.js.map +1 -0
  276. package/dist/utils/batchProcessor.d.ts.map +1 -0
  277. package/dist/utils/batchProcessor.js.map +1 -0
  278. package/dist/utils/compression.d.ts.map +1 -0
  279. package/dist/utils/compression.js.map +1 -0
  280. package/dist/utils/costUtils.d.ts.map +1 -0
  281. package/dist/utils/costUtils.js.map +1 -0
  282. package/dist/utils/reliability.d.ts.map +1 -0
  283. package/dist/utils/reliability.js.map +1 -0
  284. package/dist/utils/sorting.d.ts.map +1 -0
  285. package/dist/utils/sorting.js.map +1 -0
  286. package/dist/utils/speculativeDecoding.d.ts.map +1 -0
  287. package/dist/utils/speculativeDecoding.js.map +1 -0
  288. package/dist/utils/tokenUtils.d.ts.map +1 -0
  289. package/dist/utils/tokenUtils.js.map +1 -0
  290. package/docs/.nojekyll +0 -0
  291. package/docs/ANALYSIS_PRINCIPLES.md +162 -0
  292. package/docs/API.md +855 -0
  293. package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
  294. package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
  295. package/docs/BENCHMARK.md +170 -0
  296. package/docs/CHINESE_PROVIDER_RELIABILITY.md +37 -0
  297. package/docs/CITATIONS.md +74 -0
  298. package/docs/CLAIMS_AND_EVIDENCE.md +58 -0
  299. package/docs/CONFIGURATION.md +476 -0
  300. package/docs/COUNCIL_DECISION.json +816 -0
  301. package/docs/COUNCIL_SUMMARY.md +319 -0
  302. package/docs/COUNCIL_V2.2_DECISION.md +416 -0
  303. package/docs/ENGINEERING_SPEC.md +55 -0
  304. package/docs/FACTORY_RESET.md +34 -0
  305. package/docs/GEO.md +66 -0
  306. package/docs/GEO_OPTIMIZATION.md +30 -0
  307. package/docs/GEO_ROOT_CAUSE.md +136 -0
  308. package/docs/GEO_STATUS.md +85 -0
  309. package/docs/GEO_TEST_RESULTS.md +176 -0
  310. package/docs/HN_CHECKLIST.md +38 -0
  311. package/docs/HN_FOUNDER_COMMENT.md +17 -0
  312. package/docs/HN_SUBMISSION_FINAL.md +180 -0
  313. package/docs/HN_SUBMISSION_V3.md +56 -0
  314. package/docs/IMPROVEMENT_ROADMAP.md +515 -0
  315. package/docs/INTEGRATIONS.md +420 -0
  316. package/docs/LANGCHAIN_INTEGRATION.md +147 -0
  317. package/docs/LLM_COUNCIL_DECISION.md +508 -0
  318. package/docs/MIDDLEWARE_CHAIN.md +35 -0
  319. package/docs/PROMO_CHECKLIST.md +200 -0
  320. package/docs/QUICKSTART.md +271 -0
  321. package/docs/QUICK_START.md +43 -0
  322. package/docs/QUICK_START_VISIBILITY.md +782 -0
  323. package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
  324. package/docs/RELEASE_CHECKLIST.md +32 -0
  325. package/docs/REPRODUCIBILITY.md +63 -0
  326. package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
  327. package/docs/ROUTING_RUBRIC.md +197 -0
  328. package/docs/SEO_AUDIT.md +186 -0
  329. package/docs/SOCIAL_LISTENING.md +219 -0
  330. package/docs/TMLPD_QNA.md +751 -0
  331. package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
  332. package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
  333. package/docs/UPDATE_TOPICS.md +15 -0
  334. package/docs/USE_CASES.md +59 -0
  335. package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
  336. package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
  337. package/docs/VERCEL_AI_SDK.md +209 -0
  338. package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
  339. package/docs/_config.yml +49 -0
  340. package/docs/ai-plugin.json +16 -0
  341. package/docs/api.html +513 -0
  342. package/docs/architecture-diagram.md +40 -0
  343. package/docs/benchmark-chart.png +0 -0
  344. package/docs/benchmark.html +387 -0
  345. package/docs/blog/routerarena-9677.html +92 -0
  346. package/docs/blog/routerarena-number-one.html +73 -0
  347. package/docs/cli-cheatsheet.md +339 -0
  348. package/docs/compare.md +109 -0
  349. package/docs/comparison-litellm.md +88 -0
  350. package/docs/comparison.md +108 -0
  351. package/docs/cost-chart-ascii.md +42 -0
  352. package/docs/cost-comparison-chart.svg +88 -0
  353. package/docs/curl-examples.md +247 -0
  354. package/docs/demo-auto.html +264 -0
  355. package/docs/demo.html +416 -0
  356. package/docs/geo/GENERATIVE_ENGINE_OPTIMIZATION.md +232 -0
  357. package/docs/index.html +507 -0
  358. package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
  359. package/docs/launch-content/README.md +457 -0
  360. package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
  361. package/docs/launch-content/assets/cumulative_savings.png +0 -0
  362. package/docs/launch-content/assets/parallel_speedup.png +0 -0
  363. package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
  364. package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
  365. package/docs/launch-content/generate_charts.py +313 -0
  366. package/docs/launch-content/hn_show_post.md +139 -0
  367. package/docs/launch-content/partner_outreach_templates.md +745 -0
  368. package/docs/launch-content/reddit_posts.md +467 -0
  369. package/docs/launch-content/twitter_thread.txt +460 -0
  370. package/{llms.txt.bak → docs/llms.txt} +6 -6
  371. package/docs/npm-downloads-chart.svg +43 -0
  372. package/docs/openapi.json +139 -0
  373. package/docs/openapi.yaml +1318 -0
  374. package/docs/quick-start.html +366 -0
  375. package/docs/robots.txt +52 -0
  376. package/docs/sitemap.xml +57 -0
  377. package/docs/styles.css +682 -0
  378. package/docs/well-known/ai-plugin.json +16 -0
  379. package/docs/wellknown/ai-plugin.json +16 -0
  380. package/docs-site/assets/og-banner.svg +194 -0
  381. package/docs-site/index.html +632 -0
  382. package/eval/README.md +46 -0
  383. package/eval/baselines/main.json +12 -0
  384. package/eval/benchmark_dataset.jsonl +16 -0
  385. package/eval/check_golden_routes.js +64 -0
  386. package/eval/datasets/catalog.json +33 -0
  387. package/eval/datasets/slices/cn_provider_reliability_v1.jsonl +3 -0
  388. package/eval/datasets/slices/cost_pressure_v1.jsonl +3 -0
  389. package/eval/datasets/slices/safety_guardrails_v1.jsonl +3 -0
  390. package/eval/evals.json +199 -0
  391. package/eval/fault_injection_thresholds.json +3 -0
  392. package/eval/generate_report.js +128 -0
  393. package/eval/golden_routes.json +114 -0
  394. package/eval/lib/experiment_registry.js +24 -0
  395. package/eval/run_eval.js +197 -0
  396. package/eval/run_fault_injection.js +201 -0
  397. package/eval/run_shadow_eval.js +85 -0
  398. package/eval/thresholds.json +9 -0
  399. package/examples/QUICKSTART.md +183 -0
  400. package/examples/README.md +61 -0
  401. package/examples/a3m-sdk.js +124 -0
  402. package/examples/basic-route.js +54 -0
  403. package/examples/chat-loop.js +202 -0
  404. package/examples/classify-then-route.js +102 -0
  405. package/examples/cost-compare.js +120 -0
  406. package/examples/ensemble.js +160 -0
  407. package/examples/whatsapp-telegram-bridge-demo.js +302 -0
  408. package/examples/whatsapp-telegram-bridge.js +269 -0
  409. package/hf-space/README.md +23 -0
  410. package/hf-space/app.py +240 -0
  411. package/hf-space/requirements.txt +1 -0
  412. package/huggingface_space/README.md +35 -0
  413. package/huggingface_space/app.py +126 -0
  414. package/huggingface_space/create_space.py +208 -0
  415. package/huggingface_space/requirements.txt +1 -0
  416. package/index.html +1 -1
  417. package/mcp-server/README.md +188 -0
  418. package/mcp-server/package.json +29 -0
  419. package/mcp-server/src/index.ts +744 -0
  420. package/mcp-server/tsconfig.json +19 -0
  421. package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
  422. package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
  423. package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
  424. package/openclaw-alexa-bridge/test_fixes.js +77 -0
  425. package/package.json +76 -272
  426. package/playground/README.md +51 -0
  427. package/playground/codesandbox.json +12 -0
  428. package/playground/index.js +39 -0
  429. package/proxy/README.md +227 -0
  430. package/proxy/package-lock.json +831 -0
  431. package/proxy/package.json +17 -0
  432. package/proxy/rate-limit.js +145 -0
  433. package/proxy/rate-limit.test.js +311 -0
  434. package/proxy/server.js +970 -0
  435. package/python/README.md +102 -0
  436. package/python/a3m/__init__.py +6 -0
  437. package/python/a3m/client.py +190 -0
  438. package/python/a3m/models.py +40 -0
  439. package/python/a3m/sync_client.py +61 -0
  440. package/python/examples.py +53 -0
  441. package/python/integrations.py +330 -0
  442. package/python/pyproject.toml +23 -0
  443. package/python/setup.py +28 -0
  444. package/python/tmlpd.py +369 -0
  445. package/qna/REDDIT_GAP_ANALYSIS.md +299 -0
  446. package/qna/TMLPD_QNA.md +751 -0
  447. package/research/FINDING_001_safety.md +28 -0
  448. package/research/FINDING_002_error_diversity.md +32 -0
  449. package/research/FINDING_003_confidence_weighted_voting.md +32 -0
  450. package/research/FINDING_004_cross_model_semantic_detection.md +37 -0
  451. package/research/FINDING_005_knowledge_gap_orthogonality.md +34 -0
  452. package/research/HALLUCINATION_RESEARCH.md +27 -0
  453. package/research/ensemble-voting.md +324 -0
  454. package/research/loss-functions.md +545 -0
  455. package/research-log.md +49 -0
  456. package/scripts/banner.js +29 -0
  457. package/scripts/benchmark-local-routerarena.ts +176 -0
  458. package/scripts/benchmark.js +145 -0
  459. package/scripts/benchmark.sh +61 -0
  460. package/scripts/compare-providers.sh +230 -0
  461. package/scripts/content-planner.js +25 -0
  462. package/scripts/create-labeled-benchmark.ts +105 -0
  463. package/scripts/cross_post.py +443 -0
  464. package/scripts/local-router-benchmark.ts +154 -0
  465. package/scripts/post-all.sh +41 -0
  466. package/scripts/publish_fcc.py +106 -0
  467. package/scripts/push-to-gitee.sh +25 -0
  468. package/scripts/routerarena_ensemble.js +144 -0
  469. package/scripts/routing-benchmark-v2.js +373 -0
  470. package/scripts/routing-benchmark-v3.js +118 -0
  471. package/scripts/routing-benchmark.js +462 -0
  472. package/scripts/run-labeled-benchmark.mjs +104 -0
  473. package/scripts/run-mmlu-benchmark.js +176 -0
  474. package/scripts/run-provider-benchmark.js +244 -0
  475. package/scripts/update-npm-badges.js +158 -0
  476. package/skill/SKILL.md +238 -0
  477. package/src/__tests__/integration/tmpld_integration.test.py +540 -0
  478. package/src/ensemble.ts +2 -0
  479. package/src/routing/advancedRouter.ts +1 -1
  480. package/src/skills/__tests__/skill_manager.test.ts +328 -0
  481. package/submissions/benchmarks/ALL_PLATFORMS_SUBMISSION.md +94 -0
  482. package/submissions/benchmarks/LLMROUTERBENCH_SUBMISSION.md +121 -0
  483. package/submissions/benchmarks/MMRBENCH_SUBMISSION.md +94 -0
  484. package/submissions/benchmarks/ROUTERARENA_UPDATE.md +83 -0
  485. package/submissions/benchmarks/ROUTERBENCH_SUBMISSION.md +225 -0
  486. package/test-council/1-structure-tests.test.js +353 -0
  487. package/test-council/1-structure-tests.test.ts +353 -0
  488. package/test-council/2-edge-case-tests.test.ts +361 -0
  489. package/test-council/3-performance-tests.test.ts +652 -0
  490. package/test-council/4-integration-tests.test.ts +391 -0
  491. package/test-council/5-agent-council-eval.test.ts +413 -0
  492. package/test-council/AGENT_COUNCIL_ARCHITECTURE.md +349 -0
  493. package/test-council/TEST_COUNCIL_REPORT.md +201 -0
  494. package/test-council/agents/edge-case-agent.ts +363 -0
  495. package/test-council/agents/performance-agent.ts +426 -0
  496. package/test-council/agents/structure-agent.ts +227 -0
  497. package/test-council/council.md +183 -0
  498. package/tests/__mocks__/tokenUtils.ts +8 -0
  499. package/tests/memory/episodicMemory.test.ts +227 -0
  500. package/tests/package-lock.json +1785 -0
  501. package/tests/package.json +19 -0
  502. package/tests/routing/ensembleVoting.test.ts +236 -0
  503. package/tests/routing/providerRetry.test.ts +360 -0
  504. package/tests/routing/queryTypePresets.test.ts +208 -0
  505. package/tests/security/guardrailEngine.test.ts +700 -0
  506. package/tests/tsconfig.json +21 -0
  507. package/tests/vitest.config.ts +18 -0
  508. package/tmlpd-pi-extension/README.md +66 -0
  509. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
  510. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
  511. package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
  512. package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
  513. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
  514. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
  515. package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
  516. package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
  517. package/tmlpd-pi-extension/dist/cli.js +59 -0
  518. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
  519. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
  520. package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
  521. package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
  522. package/tmlpd-pi-extension/dist/index.d.ts +723 -0
  523. package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
  524. package/tmlpd-pi-extension/dist/index.js +239 -0
  525. package/tmlpd-pi-extension/dist/index.js.map +1 -0
  526. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
  527. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
  528. package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
  529. package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
  530. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
  531. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
  532. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
  533. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
  534. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
  535. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
  536. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
  537. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
  538. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
  539. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
  540. package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
  541. package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
  542. package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
  543. package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
  544. package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
  545. package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
  546. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
  547. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
  548. package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
  549. package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
  550. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
  551. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
  552. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
  553. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
  554. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
  555. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
  556. package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
  557. package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
  558. package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
  559. package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
  560. package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
  561. package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
  562. package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
  563. package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
  564. package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
  565. package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
  566. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
  567. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
  568. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
  569. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
  570. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
  571. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
  572. package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
  573. package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
  574. package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
  575. package/tmlpd-pi-extension/package-lock.json +79 -0
  576. package/tmlpd-pi-extension/package.json +172 -0
  577. package/tmlpd-pi-extension/python/examples.py +53 -0
  578. package/tmlpd-pi-extension/python/integrations.py +330 -0
  579. package/tmlpd-pi-extension/python/setup.py +28 -0
  580. package/tmlpd-pi-extension/python/tmlpd.py +369 -0
  581. package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
  582. package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
  583. package/tmlpd-pi-extension/skill/SKILL.md +238 -0
  584. package/tmlpd-pi-extension/src/cache/responseCache.ts +147 -0
  585. package/tmlpd-pi-extension/src/cost/costTracker.ts +302 -0
  586. package/tmlpd-pi-extension/src/index.ts +232 -0
  587. package/tmlpd-pi-extension/src/memory/episodicMemory.ts +257 -0
  588. package/tmlpd-pi-extension/src/orchestration/haloOrchestrator.ts +266 -0
  589. package/tmlpd-pi-extension/src/orchestration/mctsWorkflow.ts +262 -0
  590. package/tmlpd-pi-extension/src/providers/localProvider.ts +406 -0
  591. package/tmlpd-pi-extension/src/providers/registry.ts +164 -0
  592. package/tmlpd-pi-extension/src/routing/ensembleVoting.ts +159 -0
  593. package/tmlpd-pi-extension/src/routing/queryTypePresets.ts +136 -0
  594. package/tmlpd-pi-extension/src/tools/tmlpdTools.ts +433 -0
  595. package/tmlpd-pi-extension/src/utils/batchProcessor.ts +232 -0
  596. package/tmlpd-pi-extension/src/utils/compression.ts +325 -0
  597. package/tmlpd-pi-extension/src/utils/reliability.ts +221 -0
  598. package/tmlpd-pi-extension/src/utils/tokenUtils.ts +145 -0
  599. package/tmlpd-pi-extension/tsconfig.json +18 -0
  600. package/tsconfig.build.json +29 -0
  601. package/tsconfig.json +18 -0
  602. package/README.md.bak +0 -1185
  603. package/src/routing/advancedRouter.ts.bak +0 -650
  604. package/test.js.bak +0 -376
  605. /package/{llms-full.txt.bak → docs/llms-full.txt} +0 -0
@@ -0,0 +1,238 @@
1
+ ---
2
+ name: tmlpd
3
+ description: Research-backed Multi-LLM Router with parallel execution, streaming, caching, token compression (ISON), local provider support (Ollama/vLLM/LM Studio), batch processing. Based on arXiv research: RouteLLM routing, RadixAttention prefix caching, Medusa/EAGLE speculative decoding. Python bindings for LangChain/LlamaIndex/AutoGen/CrewAI. 120+ keywords for LLM/ML discoverability. Use for multi-model comparison, cost optimization, batch processing, local privacy, context compression, adaptive routing.
4
+ ---
5
+
6
+ # TMLPD PI Extension
7
+
8
+ **Research-backed Multi-LLM Router** with advanced optimization features.
9
+
10
+ ## Direct Imports (TypeScript)
11
+
12
+ ```typescript
13
+ import {
14
+ createTMLPD, // Core instance
15
+ HALOOrchestrator, // Hierarchical orchestration
16
+ EpisodicMemoryStore, // Learn from past tasks
17
+ // Advanced Routing (RouteLLM-style)
18
+ routeQuery, // Learned routing decision
19
+ routeBatch, // Batch routing
20
+ extractQueryFeatures, // Feature extraction
21
+ MODEL_PROFILES, // Model cost/quality profiles
22
+ // Prefix Cache (RadixAttention-style)
23
+ PrefixCache, // 5-10x speedup for shared prompts
24
+ createWarmedCache, // Pre-warmed cache
25
+ // Speculative Decoding (Medusa/EAGLE)
26
+ SpeculativeDecoder, // 2-3x faster generation
27
+ estimateSpeedupPotential,
28
+ // Compression
29
+ isonEncode, // 20-40% token reduction
30
+ truncateMessages, // Context window management
31
+ // Local providers
32
+ createOllamaProvider, // Ollama
33
+ createVLLMProvider, // vLLM
34
+ // Batch processing
35
+ BatchProcessor, // Priority queuing
36
+ TMLPD_PI_TOOLS // 13 PI tool definitions
37
+ } from "tmlpd-pi";
38
+ ```
39
+
40
+ ## Direct Imports (Python)
41
+
42
+ ```python
43
+ from tmlpd import (
44
+ TMLPDLite, # Lite client (sync, no deps)
45
+ TMLPDClient, # Async production client
46
+ TaskType, # CODING, FAST, PREMIUM, etc.
47
+ quick_process # One-liner function
48
+ )
49
+ ```
50
+
51
+ ## 13 PI Tools
52
+
53
+ | Tool | Input | Output |
54
+ |------|-------|--------|
55
+ | `tmlpd_execute` | `{prompt, models?}` | `{content, model, cost}` |
56
+ | `tmlpd_execute_single` | `{prompt, model?}` | `{content, model}` |
57
+ | `tmlpd_cost_summary` | `{}` | `{total_cost, by_provider}` |
58
+ | `tmlpd_cache_stats` | `{}` | `{hits, misses, hit_rate}` |
59
+ | `tmlpd_provider_status` | `{}` | `{ready_providers}` |
60
+ | `tmlpd_invalidate_cache` | `{model?}` | `{invalidated}` |
61
+ | `tmlpd_get_budget` | `{}` | `{daily, monthly}` |
62
+ | `tmlpd_halo_execute` | `{task, max_concurrent?}` | `{success, results}` |
63
+ | `tmlpd_episodic_query` | `{task, limit?}` | `EpisodicEntry[]` |
64
+ | `tmlpd_count_tokens` | `{text, model?}` | `{tokens}` |
65
+ | `tmlpd_compress_context` | `{messages, strategy?}` | `{compressed, ratio}` |
66
+ | `tmlpd_local_generate` | `{prompt, runtime, model?}` | `{content, cost:0}` |
67
+ | `tmlpd_batch_execute` | `{prompts, concurrency?}` | `BatchResult[]` |
68
+
69
+ ## Research-Backed Features (arXiv)
70
+
71
+ ### RouteLLM-Style Learned Routing (arXiv:2404.06035)
72
+
73
+ ```typescript
74
+ // Automatic cost-quality tradeoff routing
75
+ const decision = routeQuery('Write a Python async function');
76
+ // Returns: { primary_model, fallback_models, confidence, reasoning }
77
+
78
+ const features = extractQueryFeatures(prompt);
79
+ // Extracts: complexity, has_code, has_math, is_multilingual, etc.
80
+
81
+ // MODEL_PROFILES contains cost/latency/quality for each provider
82
+ console.log(MODEL_PROFILES['openai/gpt-4o'].quality_score); // 0.95
83
+ ```
84
+
85
+ | Model | Quality | Latency | Best For |
86
+ |-------|---------|---------|----------|
87
+ | gpt-4o | 0.95 | 2000ms | reasoning |
88
+ | gpt-4o-mini | 0.85 | 500ms | fast |
89
+ | claude-3.5-sonnet | 0.96 | 2500ms | creative |
90
+ | gemini-2.0-flash | 0.88 | 800ms | multilingual |
91
+ | groq/llama-3.3-70b | 0.82 | 400ms | fast/budget |
92
+
93
+ ### RadixAttention-Style Prefix Caching (arXiv:2312.07104)
94
+
95
+ ```typescript
96
+ // 5-10x speedup for shared system prompts
97
+ const cache = new PrefixCache({ max_entries: 10000 });
98
+ cache.warmup([
99
+ "You are a helpful assistant.",
100
+ "You are a coding assistant.",
101
+ "Analyze the following code..."
102
+ ]);
103
+
104
+ // Automatic prefix matching
105
+ const result = cache.lookup("You are a helpful assistant. Please explain...");
106
+ // Returns cached if prefix matches
107
+
108
+ const stats = cache.getStats();
109
+ // { total_entries, hit_rate, memory_estimate_mb }
110
+ ```
111
+
112
+ ### Medusa/EAGLE Speculative Decoding (arXiv:2401.10774)
113
+
114
+ ```typescript
115
+ // 2-3x faster generation with same quality
116
+ const decoder = new SpeculativeDecoder();
117
+ const result = await decoder.decode(
118
+ prompt,
119
+ fastModelFn, // Draft model
120
+ slowModelFn, // Target model
121
+ 5 // Max draft tokens
122
+ );
123
+ // { accepted, rejected, speedup, final_text }
124
+
125
+ const speedup = estimateSpeedupPotential(100, 200, 50, 200);
126
+ // Returns estimated speedup (capped at 3x)
127
+ ```
128
+
129
+ ## Token Utilities
130
+
131
+ ```typescript
132
+ // Count tokens (no API call)
133
+ const tokens = countTokens("Your prompt", "claude-3.5-sonnet");
134
+
135
+ // Estimate cost before execution
136
+ const cost = estimateCost(500, 200, "gpt-4o"); // $0.0095
137
+
138
+ // Find cheapest models for task
139
+ const cheap = findCheapestModels("fast", 3);
140
+ ```
141
+
142
+ ## ISON Compression (20-40% token reduction)
143
+
144
+ ```typescript
145
+ // Remove articles, normalize whitespace
146
+ const encoded = isonEncode("The quick brown fox jumps over the lazy dog");
147
+ // "quick brown fox jumps lazy dog"
148
+
149
+ // Truncate long conversations
150
+ const truncated = truncateMessages(messages, 4000, "smart");
151
+ ```
152
+
153
+ ## Local LLM Support
154
+
155
+ ```typescript
156
+ // Zero cost, privacy-preserving
157
+ const ollama = createOllamaProvider("llama-3.3-70b");
158
+ const vllm = createVLLMProvider("http://localhost:8000");
159
+
160
+ // Parallel across local + cloud
161
+ const results = await manager.executeParallel("Prompt", {
162
+ models: ["ollama/llama-3.3-70b", "openai/gpt-4o"]
163
+ });
164
+ ```
165
+
166
+ ## Batch Processing
167
+
168
+ ```typescript
169
+ const batch = new BatchProcessor({ concurrency: 5 });
170
+ batch.add({ prompt: "Task 1", priority: "high" });
171
+ batch.add({ prompt: "Task 2", priority: "normal" });
172
+ batch.onProgress((progress, result) => {
173
+ console.log(`Completed: ${progress.completed}/${progress.total}`);
174
+ });
175
+ await batch.execute(executor);
176
+ ```
177
+
178
+ ## Python Task Routing
179
+
180
+ ```python
181
+ from tmlpd import TMLPDLite, TaskType
182
+
183
+ lite = TMLPDLite()
184
+ task = lite.classify_task("Write Python async function")
185
+ # TaskType.CODING
186
+
187
+ models = lite.get_optimal_models(task, 3)
188
+ # ["codex", "claude-minimax", "claude"]
189
+ ```
190
+
191
+ | TaskType | Keywords | Models |
192
+ |----------|----------|--------|
193
+ | CODING | python, javascript, code | codex, claude-minimax |
194
+ | FRONTEND | react, vue, component | codex, claude-minimax |
195
+ | CHINESE | 中文, 汉语 | claude-glm, claude-minimax |
196
+ | FAST | quick, simple | gemini, claude-haiku |
197
+
198
+ ## Framework Integrations
199
+
200
+ ```python
201
+ # LangChain
202
+ class TMLPDLLM(BaseLLM):
203
+ def _call(self, prompt): return lite.process(prompt)["content"]
204
+
205
+ # LlamaIndex
206
+ class TMLPDLLM(LLM):
207
+ def complete(self, prompt): return lite.process(prompt)["content"]
208
+
209
+ # AutoGen
210
+ class TMLPDAgent(AssistantAgent):
211
+ def generate_reply(self, messages):
212
+ return lite.process(messages[-1]["content"])["content"]
213
+ ```
214
+
215
+ ## 120+ Keywords for Discoverability
216
+
217
+ ```
218
+ routellm, prefix-caching, radix-attention, speculative-decoding, medusa, eagle,
219
+ flashattention, pagedattention, kv-cache-quantization, llmlingua, streamingllm,
220
+ tensor-parallelism, continuous-batching, multi-model-orchestration,
221
+ multi-agent-debate, self-consistency, adaptive-router, intelligent-router,
222
+ context-aware-router, task-aware-router, memory-augmented-llm,
223
+ episodic-memory-router, semantic-memory-router, arxiv, research-backed,
224
+ icml, neurips, iclr, token-compression, context-compression
225
+ ```
226
+
227
+ ## npm
228
+
229
+ **Package:** https://npmjs.com/package/tmlpd-pi
230
+ **Version:** 1.2.0 | **Files:** 94 | **Size:** 543KB unpacked
231
+
232
+ ## Reference
233
+
234
+ - RouteLLM: arXiv:2404.06035
235
+ - RadixAttention: arXiv:2312.07104
236
+ - Medusa: arXiv:2401.10774
237
+ - FlashAttention: arXiv:2304.05195
238
+ - PagedAttention: SOSP 2023
@@ -0,0 +1,147 @@
1
+ /**
2
+ * TMLPD Response Cache
3
+ *
4
+ * Caches LLM responses to avoid redundant API calls.
5
+ * Uses content hash for cache key and supports TTL.
6
+ */
7
+
8
+ import * as crypto from "crypto";
9
+
10
+ export interface CacheEntry {
11
+ content: string;
12
+ model: string;
13
+ provider: string;
14
+ tokens: number;
15
+ cost: number;
16
+ cached_at: number;
17
+ expires_at: number;
18
+ }
19
+
20
+ export interface CacheConfig {
21
+ enabled: boolean;
22
+ ttl_seconds: number;
23
+ max_entries: number;
24
+ cache_dir?: string;
25
+ }
26
+
27
+ export class ResponseCache {
28
+ private cache: Map<string, CacheEntry> = new Map();
29
+ private config: CacheConfig;
30
+ private hits = 0;
31
+ private misses = 0;
32
+
33
+ constructor(config: Partial<CacheConfig> = {}) {
34
+ this.config = {
35
+ enabled: config.enabled ?? true,
36
+ ttl_seconds: config.ttl_seconds ?? 3600, // 1 hour default
37
+ max_entries: config.max_entries ?? 1000,
38
+ cache_dir: config.cache_dir,
39
+ };
40
+ }
41
+
42
+ /**
43
+ * Generate cache key from prompt + model
44
+ */
45
+ generateKey(prompt: string, model: string): string {
46
+ const hash = crypto.createHash("sha256");
47
+ hash.update(prompt + "|" + model);
48
+ return hash.digest("hex").substring(0, 32);
49
+ }
50
+
51
+ /**
52
+ * Get cached response if available and not expired
53
+ */
54
+ get(prompt: string, model: string): CacheEntry | null {
55
+ if (!this.config.enabled) return null;
56
+
57
+ const key = this.generateKey(prompt, model);
58
+ const entry = this.cache.get(key);
59
+
60
+ if (!entry) {
61
+ this.misses++;
62
+ return null;
63
+ }
64
+
65
+ // Check expiration
66
+ if (Date.now() > entry.expires_at) {
67
+ this.cache.delete(key);
68
+ this.misses++;
69
+ return null;
70
+ }
71
+
72
+ this.hits++;
73
+ return entry;
74
+ }
75
+
76
+ /**
77
+ * Store response in cache
78
+ */
79
+ set(prompt: string, model: string, response: Partial<CacheEntry>): void {
80
+ if (!this.config.enabled) return;
81
+
82
+ const key = this.generateKey(prompt, model);
83
+ const now = Date.now();
84
+
85
+ // Evict oldest if at capacity
86
+ if (this.cache.size >= this.config.max_entries) {
87
+ this.evictOldest();
88
+ }
89
+
90
+ this.cache.set(key, {
91
+ content: response.content ?? "",
92
+ model: response.model ?? model,
93
+ provider: response.provider ?? "unknown",
94
+ tokens: response.tokens ?? 0,
95
+ cost: response.cost ?? 0,
96
+ cached_at: now,
97
+ expires_at: now + this.config.ttl_seconds * 1000,
98
+ });
99
+ }
100
+
101
+ /**
102
+ * Invalidate cache for specific model or all
103
+ */
104
+ invalidate(model?: string): void {
105
+ if (model) {
106
+ for (const [key, entry] of this.cache.entries()) {
107
+ if (entry.model.includes(model)) {
108
+ this.cache.delete(key);
109
+ }
110
+ }
111
+ } else {
112
+ this.cache.clear();
113
+ }
114
+ }
115
+
116
+ /**
117
+ * Get cache statistics
118
+ */
119
+ getStats(): { hits: number; misses: number; size: number; hit_rate: number } {
120
+ const total = this.hits + this.misses;
121
+ return {
122
+ hits: this.hits,
123
+ misses: this.misses,
124
+ size: this.cache.size,
125
+ hit_rate: total > 0 ? this.hits / total : 0,
126
+ };
127
+ }
128
+
129
+ /**
130
+ * Evict oldest entry by cached_at timestamp
131
+ */
132
+ private evictOldest(): void {
133
+ let oldestKey: string | null = null;
134
+ let oldestTime = Infinity;
135
+
136
+ for (const [key, entry] of this.cache.entries()) {
137
+ if (entry.cached_at < oldestTime) {
138
+ oldestTime = entry.cached_at;
139
+ oldestKey = key;
140
+ }
141
+ }
142
+
143
+ if (oldestKey) {
144
+ this.cache.delete(oldestKey);
145
+ }
146
+ }
147
+ }
@@ -0,0 +1,302 @@
1
+ /**
2
+ * TMLPD Cost Tracker
3
+ *
4
+ * Tracks real-time spending across all providers.
5
+ * Supports per-model budgets, spending alerts, and cost analysis.
6
+ */
7
+
8
+ // Cost per 1M tokens for known models (USD)
9
+ const MODEL_COSTS: Record<string, { input: number; output: number }> = {
10
+ // Anthropic
11
+ "claude-3-5-sonnet-20241022": { input: 3.0, output: 15.0 },
12
+ "claude-3-opus-20240229": { input: 15.0, output: 75.0 },
13
+ "claude-3-sonnet-20240229": { input: 3.0, output: 15.0 },
14
+ "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
15
+ // OpenAI
16
+ "gpt-4o": { input: 2.5, output: 10.0 },
17
+ "gpt-4-turbo": { input: 10.0, output: 30.0 },
18
+ "gpt-4": { input: 30.0, output: 60.0 },
19
+ "gpt-3.5-turbo": { input: 0.5, output: 1.5 },
20
+ // Google
21
+ "gemini-1.5-pro": { input: 1.25, output: 5.0 },
22
+ "gemini-1.5-flash": { input: 0.075, output: 0.3 },
23
+ // Groq
24
+ "llama-3.3-70b-versatile": { input: 0.59, output: 0.79 },
25
+ "llama-3.1-8b-instant": { input: 0.05, output: 0.08 },
26
+ // Cerebras
27
+ "llama-3.3-70b": { input: 0.1, output: 0.1 },
28
+ // Mistral
29
+ "mistral-large-latest": { input: 2.0, output: 6.0 },
30
+ "mistral-small-latest": { input: 0.2, output: 0.6 },
31
+ // xAI
32
+ "grok-2": { input: 2.0, output: 8.0 },
33
+ "grok-2-mini": { input: 0.2, output: 0.8 },
34
+ // OpenRouter (varies by model)
35
+ "openai/gpt-4o": { input: 2.5, output: 10.0 },
36
+ "anthropic/claude-3.5-sonnet": { input: 3.0, output: 15.0 },
37
+ // ZAI (default estimate)
38
+ "glm-5": { input: 0.1, output: 0.3 },
39
+ "glm-4": { input: 0.1, output: 0.3 },
40
+ };
41
+
42
+ export interface BudgetConfig {
43
+ daily_limit?: number;
44
+ monthly_limit?: number;
45
+ per_model_limits?: Record<string, number>;
46
+ }
47
+
48
+ export interface CostAlert {
49
+ type: "daily" | "monthly" | "model" | "budget";
50
+ threshold: number;
51
+ current: number;
52
+ provider?: string;
53
+ model?: string;
54
+ }
55
+
56
+ export interface CostSnapshot {
57
+ provider: string;
58
+ model: string;
59
+ input_tokens: number;
60
+ output_tokens: number;
61
+ input_cost: number;
62
+ output_cost: number;
63
+ total_cost: number;
64
+ timestamp: number;
65
+ }
66
+
67
+ export interface CostSummary {
68
+ total_cost: number;
69
+ by_provider: Record<string, number>;
70
+ by_model: Record<string, number>;
71
+ daily_costs: Record<string, number>;
72
+ monthly_costs: Record<string, number>;
73
+ request_count: number;
74
+ token_count: { input: number; output: number };
75
+ average_cost_per_request: number;
76
+ }
77
+
78
+ export class CostTracker {
79
+ private history: CostSnapshot[] = [];
80
+ private budgets: BudgetConfig;
81
+ private alerts: CostAlert[] = [];
82
+ private alerts_callback: ((alert: CostAlert) => void) | null = null;
83
+ private daily_reset: number;
84
+ private monthly_reset: number;
85
+
86
+ constructor(budgets: BudgetConfig = {}) {
87
+ this.budgets = budgets;
88
+ const now = new Date();
89
+ this.daily_reset = new Date(now.getFullYear(), now.getMonth(), now.getDate() + 1).getTime();
90
+ this.monthly_reset = new Date(now.getFullYear(), now.getMonth() + 1, 1).getTime();
91
+ }
92
+
93
+ /**
94
+ * Calculate cost for a model based on tokens
95
+ */
96
+ calculateCost(model: string, input_tokens: number, output_tokens: number): { input: number; output: number; total: number } {
97
+ const model_key = model.split("/").pop() || model;
98
+ const rates = MODEL_COSTS[model_key] || { input: 1.0, output: 5.0 }; // Default estimate
99
+
100
+ const input_cost = (input_tokens / 1_000_000) * rates.input;
101
+ const output_cost = (output_tokens / 1_000_000) * rates.output;
102
+
103
+ return {
104
+ input: Math.round(input_cost * 1000000) / 1000000, // 6 decimal precision
105
+ output: Math.round(output_cost * 1000000) / 1000000,
106
+ total: Math.round((input_cost + output_cost) * 1000000) / 1000000,
107
+ };
108
+ }
109
+
110
+ /**
111
+ * Record a request's cost
112
+ */
113
+ record(provider: string, model: string, input_tokens: number, output_tokens: number): CostSnapshot {
114
+ const costs = this.calculateCost(model, input_tokens, output_tokens);
115
+ const snapshot: CostSnapshot = {
116
+ provider,
117
+ model,
118
+ input_tokens,
119
+ output_tokens,
120
+ input_cost: costs.input,
121
+ output_cost: costs.output,
122
+ total_cost: costs.total,
123
+ timestamp: Date.now(),
124
+ };
125
+
126
+ this.history.push(snapshot);
127
+ this.checkBudgets(snapshot);
128
+ return snapshot;
129
+ }
130
+
131
+ /**
132
+ * Check budgets and trigger alerts
133
+ */
134
+ private checkBudgets(snapshot: CostSnapshot): void {
135
+ const summary = this.getSummary();
136
+ const today = new Date().toISOString().split("T")[0];
137
+ const month = today.substring(0, 7);
138
+
139
+ // Check daily budget
140
+ if (this.budgets.daily_limit) {
141
+ const daily_cost = summary.daily_costs[today] || 0;
142
+ if (daily_cost >= this.budgets.daily_limit * 0.9) { // Alert at 90%
143
+ this.emitAlert({
144
+ type: "daily",
145
+ threshold: this.budgets.daily_limit,
146
+ current: daily_cost,
147
+ });
148
+ }
149
+ }
150
+
151
+ // Check monthly budget
152
+ if (this.budgets.monthly_limit) {
153
+ const monthly_cost = summary.monthly_costs[month] || 0;
154
+ if (monthly_cost >= this.budgets.monthly_limit * 0.9) {
155
+ this.emitAlert({
156
+ type: "monthly",
157
+ threshold: this.budgets.monthly_limit,
158
+ current: monthly_cost,
159
+ });
160
+ }
161
+ }
162
+
163
+ // Check per-model budgets
164
+ if (this.budgets.per_model_limits) {
165
+ const model_limit = this.budgets.per_model_limits[snapshot.model];
166
+ if (model_limit) {
167
+ const model_cost = summary.by_model[snapshot.model] || 0;
168
+ if (model_cost >= model_limit * 0.9) {
169
+ this.emitAlert({
170
+ type: "model",
171
+ threshold: model_limit,
172
+ current: model_cost,
173
+ model: snapshot.model,
174
+ });
175
+ }
176
+ }
177
+ }
178
+ }
179
+
180
+ /**
181
+ * Emit an alert via callback
182
+ */
183
+ private emitAlert(alert: CostAlert): void {
184
+ // Avoid duplicate alerts for same threshold
185
+ const recent = this.alerts.find(
186
+ (a) =>
187
+ a.type === alert.type &&
188
+ a.threshold === alert.threshold &&
189
+ Date.now() - (a as any)._emitted_at < 3600000 // 1 hour cooldown
190
+ );
191
+ if (recent) return;
192
+
193
+ (alert as any)._emitted_at = Date.now();
194
+ this.alerts.push(alert);
195
+ if (this.alerts_callback) {
196
+ this.alerts_callback(alert);
197
+ }
198
+ }
199
+
200
+ /**
201
+ * Register alert callback
202
+ */
203
+ onAlert(callback: (alert: CostAlert) => void): void {
204
+ this.alerts_callback = callback;
205
+ }
206
+
207
+ /**
208
+ * Get comprehensive cost summary
209
+ */
210
+ getSummary(): CostSummary {
211
+ const nowMs = Date.now();
212
+ const today = new Date().toISOString().split("T")[0];
213
+ const month = today.substring(0, 7);
214
+
215
+ // Reset if new day/month
216
+ const nowDate = new Date(nowMs);
217
+ if (nowMs >= this.daily_reset) {
218
+ this.daily_reset = new Date(nowDate.getFullYear(), nowDate.getMonth(), nowDate.getDate() + 1).getTime();
219
+ }
220
+ if (nowMs >= this.monthly_reset) {
221
+ this.monthly_reset = new Date(nowDate.getFullYear(), nowDate.getMonth() + 1, 1).getTime();
222
+ }
223
+
224
+ const by_provider: Record<string, number> = {};
225
+ const by_model: Record<string, number> = {};
226
+ const daily_costs: Record<string, number> = {};
227
+ const monthly_costs: Record<string, number> = {};
228
+ let total_cost = 0;
229
+ let total_input_tokens = 0;
230
+ let total_output_tokens = 0;
231
+
232
+ for (const entry of this.history) {
233
+ total_cost += entry.total_cost;
234
+ total_input_tokens += entry.input_tokens;
235
+ total_output_tokens += entry.output_tokens;
236
+
237
+ by_provider[entry.provider] = (by_provider[entry.provider] || 0) + entry.total_cost;
238
+ by_model[entry.model] = (by_model[entry.model] || 0) + entry.total_cost;
239
+
240
+ const entry_date = new Date(entry.timestamp).toISOString().split("T")[0];
241
+ const entry_month = entry_date.substring(0, 7);
242
+
243
+ daily_costs[entry_date] = (daily_costs[entry_date] || 0) + entry.total_cost;
244
+ monthly_costs[entry_month] = (monthly_costs[entry_month] || 0) + entry.total_cost;
245
+ }
246
+
247
+ return {
248
+ total_cost: Math.round(total_cost * 1000000) / 1000000,
249
+ by_provider,
250
+ by_model,
251
+ daily_costs,
252
+ monthly_costs,
253
+ request_count: this.history.length,
254
+ token_count: { input: total_input_tokens, output: total_output_tokens },
255
+ average_cost_per_request:
256
+ this.history.length > 0
257
+ ? Math.round((total_cost / this.history.length) * 1000000) / 1000000
258
+ : 0,
259
+ };
260
+ }
261
+
262
+ /**
263
+ * Get remaining budget
264
+ */
265
+ getRemainingBudget(): { daily: number | null; monthly: number | null; per_model: Record<string, number> } {
266
+ const summary = this.getSummary();
267
+ const today = new Date().toISOString().split("T")[0];
268
+ const month = today.substring(0, 7);
269
+
270
+ return {
271
+ daily: this.budgets.daily_limit
272
+ ? Math.max(0, this.budgets.daily_limit - (summary.daily_costs[today] || 0))
273
+ : null,
274
+ monthly: this.budgets.monthly_limit
275
+ ? Math.max(0, this.budgets.monthly_limit - (summary.monthly_costs[month] || 0))
276
+ : null,
277
+ per_model: this.budgets.per_model_limits
278
+ ? Object.fromEntries(
279
+ Object.entries(this.budgets.per_model_limits).map(([model, limit]) => [
280
+ model,
281
+ Math.max(0, limit - (summary.by_model[model] || 0)),
282
+ ])
283
+ )
284
+ : {},
285
+ };
286
+ }
287
+
288
+ /**
289
+ * Reset cost history
290
+ */
291
+ reset(): void {
292
+ this.history = [];
293
+ this.alerts = [];
294
+ }
295
+
296
+ /**
297
+ * Export cost data for analysis
298
+ */
299
+ export(): CostSnapshot[] {
300
+ return [...this.history];
301
+ }
302
+ }