adaptive-memory-multi-model-router 2.14.45 → 2.14.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (605) hide show
  1. package/dist/index.d.ts +4 -0
  2. package/dist/index.js +8 -2
  3. package/dist/memory/hybridMemory.d.ts +71 -0
  4. package/dist/memory/hybridMemory.js +124 -0
  5. package/dist/memory/reasoningBank.d.ts +88 -0
  6. package/dist/memory/reasoningBank.js +303 -0
  7. package/{docs/llms.txt → llms.txt.bak} +6 -6
  8. package/package.json +13 -84
  9. package/src/index.ts +8 -0
  10. package/src/memory/hybridMemory.ts +155 -0
  11. package/src/memory/reasoningBank.ts +335 -0
  12. package/src/routing/advancedRouter.ts.bak +650 -0
  13. package/test.js.bak +376 -0
  14. package/.dockerignore +0 -82
  15. package/.env.example +0 -303
  16. package/.github/DISCUSSIONS_WELCOME.md +0 -27
  17. package/.github/DISCUSSION_TEMPLATE.yml +0 -5
  18. package/.github/FUNDING.yml +0 -2
  19. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -94
  20. package/.github/ISSUE_TEMPLATE/config.yml +0 -17
  21. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -71
  22. package/.github/PULL_REQUEST_TEMPLATE.md +0 -71
  23. package/.github/dependabot.yml +0 -9
  24. package/.github/workflows/auto-publish.yml +0 -51
  25. package/.github/workflows/ci.yml +0 -263
  26. package/.github/workflows/codeql.yml +0 -38
  27. package/.github/workflows/npm-publish.yml +0 -20
  28. package/.github/workflows/pages.yml +0 -37
  29. package/.github/workflows/stale.yml +0 -54
  30. package/.publish-tick +0 -1
  31. package/.well-known/ai-plugin.json +0 -16
  32. package/AGENT_COUNCIL_FINDINGS.md +0 -142
  33. package/ARCHITECTURE.md +0 -346
  34. package/AUDIT_REPORT.md +0 -28
  35. package/CODE_OF_CONDUCT.md +0 -128
  36. package/CONTRIBUTING.md +0 -50
  37. package/CONTRIBUTORS.md +0 -20
  38. package/Dockerfile +0 -53
  39. package/Dockerfile.proxy +0 -33
  40. package/HEALTH_REPORT.md +0 -118
  41. package/IMPROVEMENT_PLAN.md +0 -107
  42. package/LANDING.md +0 -43
  43. package/LAUNCH-PAIN-DRIVEN.md +0 -339
  44. package/LAUNCH.md +0 -337
  45. package/LAUNCH_CHECKLIST.md +0 -141
  46. package/LAUNCH_SNAPSHOT.md +0 -260
  47. package/MANIFESTO.md +0 -41
  48. package/POPULARITY_BOOSTERS.md +0 -285
  49. package/PR_STATUS_REPORT.md +0 -148
  50. package/REDESIGN.md +0 -95
  51. package/RUNKIT.md +0 -83
  52. package/SECURITY.md +0 -29
  53. package/SUBMISSIONS.md +0 -43
  54. package/_schema.html +0 -53
  55. package/ai-plugin.json +0 -16
  56. package/articles/AI_AGENT_LLM_ROUTING.md +0 -150
  57. package/articles/CHINESE_DIRECTORIES.md +0 -100
  58. package/articles/CHINESE_SUBMISSIONS_READY.md +0 -322
  59. package/articles/COMPETITOR_ALERTS.md +0 -31
  60. package/articles/COMPLETE_POSTING_DIRECTORY.md +0 -147
  61. package/articles/CONTENT_STRUCTURE.md +0 -292
  62. package/articles/DEVTO_COST_GUIDE.md +0 -473
  63. package/articles/DEVTO_FINAL.md +0 -416
  64. package/articles/DEVTO_MULTI_PROVIDER.md +0 -542
  65. package/articles/DEVTO_READY.md +0 -255
  66. package/articles/DEVTO_V2_ANNOUNCEMENT.md +0 -160
  67. package/articles/DEVTO_VIRAL_GROWTH.md +0 -280
  68. package/articles/FRESH_devto.md +0 -460
  69. package/articles/FRESH_devto_2026_05.md +0 -73
  70. package/articles/FRESH_hackernews.md +0 -14
  71. package/articles/FRESH_reddit_ml.md +0 -90
  72. package/articles/FRESH_reddit_node.md +0 -198
  73. package/articles/FRESH_reddit_sideproject.md +0 -72
  74. package/articles/FRESH_reddit_webdev.md +0 -130
  75. package/articles/FROM_ZERO_TO_10K.md +0 -107
  76. package/articles/HN_10X_BETTER.md +0 -430
  77. package/articles/HN_ACCOUNT_GUIDE.md +0 -21
  78. package/articles/HN_CHINESE_STYLE.md +0 -308
  79. package/articles/HN_FINAL.md +0 -148
  80. package/articles/HN_POSTED_VERSION.md +0 -56
  81. package/articles/HN_POST_READY.md +0 -137
  82. package/articles/HN_RESEARCH.md +0 -364
  83. package/articles/HN_SHOW_routerarena.md +0 -17
  84. package/articles/HN_TIMING_GUIDE.md +0 -52
  85. package/articles/INDIEHACKERS_POST.md +0 -52
  86. package/articles/INDIEHACKERS_READY.md +0 -120
  87. package/articles/LLM_BENCHMARK_DEEP_DIVE.md +0 -153
  88. package/articles/MASTER_POSTING_DIRECTORY.md +0 -189
  89. package/articles/NEWSLETTER_SEND_NOW.md +0 -259
  90. package/articles/NEWSLETTER_SUBMISSIONS.md +0 -112
  91. package/articles/PAIN-DRIVEN-devto-v2.md +0 -308
  92. package/articles/PAIN-DRIVEN-devto-v3.md +0 -268
  93. package/articles/PAIN-DRIVEN-devto.md +0 -242
  94. package/articles/PAIN-DRIVEN-hackernews-v2.md +0 -138
  95. package/articles/PAIN-DRIVEN-hackernews-v3.md +0 -151
  96. package/articles/PAIN-DRIVEN-hackernews.md +0 -131
  97. package/articles/PAIN-DRIVEN-reddit-v2.md +0 -301
  98. package/articles/PAIN-DRIVEN-reddit-v3.md +0 -236
  99. package/articles/PAIN-DRIVEN-reddit.md +0 -218
  100. package/articles/PAIN-DRIVEN-twitter-v2.md +0 -110
  101. package/articles/PAIN-DRIVEN-twitter-v3.md +0 -121
  102. package/articles/PAIN-DRIVEN-twitter.md +0 -120
  103. package/articles/PORTKEY_VS_A3M.md +0 -147
  104. package/articles/POSTING_KIT_2026_05.md +0 -67
  105. package/articles/PRESS_KIT_routerarena.md +0 -77
  106. package/articles/PRODUCTHUNT_LISTING.md +0 -48
  107. package/articles/PRODUCTHUNT_READY.md +0 -106
  108. package/articles/PR_PLAN_vault.md +0 -125
  109. package/articles/REDDIT_FINAL.md +0 -232
  110. package/articles/REDDIT_POST.md +0 -67
  111. package/articles/REDDIT_SUBMISSION_READY.md +0 -348
  112. package/articles/ROUTERARENA_LEADER.md +0 -45
  113. package/articles/SHOW_HN_FINAL.md +0 -29
  114. package/articles/TWEETS_10K_DOWNLOADS.md +0 -47
  115. package/articles/TWEETS_BENCHMARK_FIRST.md +0 -46
  116. package/articles/TWEETS_MCP_PLAY.md +0 -51
  117. package/articles/TWEETS_SEQUENTIAL_BROKEN.md +0 -49
  118. package/articles/TWEETS_WHY_BUILD.md +0 -54
  119. package/articles/TWEETS_routerarena_leader.md +0 -53
  120. package/articles/TWEET_STORM_READY.md +0 -165
  121. package/articles/TWITTER_FINAL.md +0 -167
  122. package/articles/WHY_10X_BETTER.md +0 -261
  123. package/articles/WHY_CHINESE_STYLE_BETTER.md +0 -323
  124. package/articles/ai-discoverability-llm-routing.md +0 -210
  125. package/articles/devto-llm-routing.md +0 -138
  126. package/articles/hackernews-show-hn.md +0 -54
  127. package/articles/hashnode-llm-cost-optimization.md +0 -125
  128. package/articles/hn_show_2026_05.md +0 -11
  129. package/articles/medium-building-llm-router.md +0 -205
  130. package/articles/reddit-ml.md +0 -76
  131. package/articles/twitter-thread-cost-savings.md +0 -50
  132. package/articles/youtube-tutorial-script.md +0 -262
  133. package/assets/a3m_3blue1brown.mp4 +0 -0
  134. package/assets/banner.svg +0 -109
  135. package/assets/chart-cost-v2.svg +0 -91
  136. package/assets/chart-cost-v3.svg +0 -143
  137. package/assets/chart-features-v2.svg +0 -132
  138. package/assets/chart-features-v3.svg +0 -211
  139. package/assets/chart-growth-v2.svg +0 -122
  140. package/assets/chart-growth-v3.svg +0 -189
  141. package/assets/cost-comparison.svg +0 -134
  142. package/assets/cost-simple.svg +0 -64
  143. package/assets/demo-hn.gif +0 -0
  144. package/assets/feature-matrix.svg +0 -136
  145. package/assets/growth-chart-animated.svg +0 -76
  146. package/assets/growth-chart.svg +0 -82
  147. package/assets/growth-simple.svg +0 -69
  148. package/assets/hero-diagram.svg +0 -81
  149. package/assets/logo-new.svg +0 -21
  150. package/assets/logo.svg +0 -68
  151. package/assets/provider-comparison.svg +0 -121
  152. package/assets/social-preview-new.svg +0 -100
  153. package/assets/social-preview.svg +0 -194
  154. package/assets/social-v2.svg +0 -130
  155. package/assets/social-v3.svg +0 -212
  156. package/benchmark-provider-results.json +0 -245
  157. package/benchmark-results.json +0 -54
  158. package/council-votes/architecture-vote.md +0 -121
  159. package/council-votes/coverage-vote.md +0 -93
  160. package/data/adaptive-benchmark.json +0 -92
  161. package/data/benchmark-results.json +0 -47
  162. package/data/labeled-benchmark.json +0 -88
  163. package/demo/3blue1brown_video.py +0 -285
  164. package/demo/3blue1brown_video_v2.py +0 -310
  165. package/demo/IMPROVED_PROMPTS.md +0 -229
  166. package/demo/VEO3_PROMPTS.md +0 -269
  167. package/demo/VIDEO_PRODUCTION_GUIDE.md +0 -333
  168. package/demo/a3m_3blue1brown.mp4 +0 -0
  169. package/demo/asciinema-demo.sh +0 -195
  170. package/demo/demo-hn.tape +0 -74
  171. package/demo/demo-script.md +0 -53
  172. package/demo/demo-script.sh +0 -62
  173. package/demo/demo.svg +0 -75
  174. package/demo/frame1_ai_data_center.png +0 -0
  175. package/demo/frame1_sunset_video.mp4 +0 -0
  176. package/demo/frame2_cost_comparison.png +0 -0
  177. package/demo/frame2_cost_comparison_fallback.png +0 -0
  178. package/demo/frame3_parallel_execution.png +0 -0
  179. package/demo/frame3_parallel_execution_fallback.png +0 -0
  180. package/demo/frame4_providers.png +0 -0
  181. package/demo/frame4_providers_fallback.png +0 -0
  182. package/demo/frame5_endcard.png +0 -0
  183. package/demo/frame5_endcard_fallback.png +0 -0
  184. package/demo/new_frame1_hook.png +0 -0
  185. package/demo/new_frame2_proof.png +0 -0
  186. package/demo/new_frame3_wow.png +0 -0
  187. package/demo/new_frame4_social.png +0 -0
  188. package/demo/new_frame5_cta.png +0 -0
  189. package/demo/package.json +0 -13
  190. package/demo/product-video-final.mp4 +0 -0
  191. package/demo/product-video-hype-v1.mp4 +0 -0
  192. package/demo/product-video-v1.mp4 +0 -0
  193. package/demo/public/index.html +0 -762
  194. package/demo/recording.cast +0 -55
  195. package/demo/server.js +0 -405
  196. package/demo-new.tape +0 -71
  197. package/demo-real.sh +0 -198
  198. package/demo-simple.tape +0 -205
  199. package/demo.html +0 -520
  200. package/demo.sh +0 -85
  201. package/demo.tape +0 -259
  202. package/dist/analytics/costAnalytics.d.ts.map +0 -1
  203. package/dist/analytics/costAnalytics.js.map +0 -1
  204. package/dist/benchmark/comprehensive.js.map +0 -1
  205. package/dist/benchmark/reproducible.d.ts.map +0 -1
  206. package/dist/benchmark/reproducible.js.map +0 -1
  207. package/dist/cache/prefixCache.d.ts.map +0 -1
  208. package/dist/cache/prefixCache.js.map +0 -1
  209. package/dist/cache/responseCache.d.ts.map +0 -1
  210. package/dist/cache/responseCache.js.map +0 -1
  211. package/dist/cache/semanticCache.d.ts.map +0 -1
  212. package/dist/cache/semanticCache.js.map +0 -1
  213. package/dist/cli/setupWizard.d.ts.map +0 -1
  214. package/dist/cli/setupWizard.js.map +0 -1
  215. package/dist/cost/budgetEnforcer.d.ts.map +0 -1
  216. package/dist/cost/budgetEnforcer.js.map +0 -1
  217. package/dist/cost/costTracker.d.ts.map +0 -1
  218. package/dist/cost/costTracker.js.map +0 -1
  219. package/dist/ensemble/multiRoundDialog.js.map +0 -1
  220. package/dist/ensemble/shapleyValue.js.map +0 -1
  221. package/dist/integrations/langchainAdapter.d.ts.map +0 -1
  222. package/dist/integrations/langchainAdapter.js.map +0 -1
  223. package/dist/integrations/oauth.d.ts.map +0 -1
  224. package/dist/integrations/oauth.js.map +0 -1
  225. package/dist/integrations/scienceAdapter.js.map +0 -1
  226. package/dist/memory/autoFetch.d.ts.map +0 -1
  227. package/dist/memory/autoFetch.js.map +0 -1
  228. package/dist/memory/episodicMemory.d.ts.map +0 -1
  229. package/dist/memory/episodicMemory.js.map +0 -1
  230. package/dist/memory/memoryTree.d.ts.map +0 -1
  231. package/dist/memory/memoryTree.js.map +0 -1
  232. package/dist/memory/obsidianVault.d.ts.map +0 -1
  233. package/dist/memory/obsidianVault.js.map +0 -1
  234. package/dist/observability/changeWatch.d.ts.map +0 -1
  235. package/dist/observability/changeWatch.js.map +0 -1
  236. package/dist/observability/fatigueDetector.d.ts.map +0 -1
  237. package/dist/observability/fatigueDetector.js.map +0 -1
  238. package/dist/observability/index.d.ts.map +0 -1
  239. package/dist/observability/index.js.map +0 -1
  240. package/dist/observability/metrics.d.ts.map +0 -1
  241. package/dist/observability/metrics.js.map +0 -1
  242. package/dist/observability/middleware.d.ts.map +0 -1
  243. package/dist/observability/middleware.js.map +0 -1
  244. package/dist/observability/tracer.d.ts.map +0 -1
  245. package/dist/observability/tracer.js.map +0 -1
  246. package/dist/observability/types.d.ts.map +0 -1
  247. package/dist/observability/types.js.map +0 -1
  248. package/dist/orchestration/haloOrchestrator.d.ts.map +0 -1
  249. package/dist/orchestration/haloOrchestrator.js.map +0 -1
  250. package/dist/orchestration/mctsWorkflow.d.ts.map +0 -1
  251. package/dist/orchestration/mctsWorkflow.js.map +0 -1
  252. package/dist/providers/localProvider.d.ts.map +0 -1
  253. package/dist/providers/localProvider.js.map +0 -1
  254. package/dist/providers/providerConfig.d.ts.map +0 -1
  255. package/dist/providers/providerConfig.js.map +0 -1
  256. package/dist/providers/registry.d.ts.map +0 -1
  257. package/dist/providers/registry.js.map +0 -1
  258. package/dist/routing/advancedRouter.d.ts.map +0 -1
  259. package/dist/routing/advancedRouter.js.map +0 -1
  260. package/dist/routing/crossModelValidation.d.ts.map +0 -1
  261. package/dist/routing/crossModelValidation.js.map +0 -1
  262. package/dist/routing/providerHealth.d.ts.map +0 -1
  263. package/dist/routing/providerHealth.js.map +0 -1
  264. package/dist/routing/providerRetry.d.ts.map +0 -1
  265. package/dist/routing/providerRetry.js.map +0 -1
  266. package/dist/scripts/banner.js +0 -29
  267. package/dist/security/guardrails.d.ts.map +0 -1
  268. package/dist/security/guardrails.js.map +0 -1
  269. package/dist/server/dashboard.d.ts.map +0 -1
  270. package/dist/server/dashboard.js.map +0 -1
  271. package/dist/server/modelMapper.d.ts.map +0 -1
  272. package/dist/server/modelMapper.js.map +0 -1
  273. package/dist/server/proxyServer.d.ts.map +0 -1
  274. package/dist/server/proxyServer.js.map +0 -1
  275. package/dist/skills/__tests__/skill_manager.test.d.ts +0 -2
  276. package/dist/skills/__tests__/skill_manager.test.d.ts.map +0 -1
  277. package/dist/skills/__tests__/skill_manager.test.js +0 -268
  278. package/dist/skills/__tests__/skill_manager.test.js.map +0 -1
  279. package/dist/tools/tmlpdTools.d.ts.map +0 -1
  280. package/dist/tools/tmlpdTools.js.map +0 -1
  281. package/dist/tui/dashboard.d.ts.map +0 -1
  282. package/dist/tui/dashboard.js.map +0 -1
  283. package/dist/tui/index.d.ts.map +0 -1
  284. package/dist/tui/index.js.map +0 -1
  285. package/dist/utils/batchProcessor.d.ts.map +0 -1
  286. package/dist/utils/batchProcessor.js.map +0 -1
  287. package/dist/utils/compression.d.ts.map +0 -1
  288. package/dist/utils/compression.js.map +0 -1
  289. package/dist/utils/costUtils.d.ts.map +0 -1
  290. package/dist/utils/costUtils.js.map +0 -1
  291. package/dist/utils/reliability.d.ts.map +0 -1
  292. package/dist/utils/reliability.js.map +0 -1
  293. package/dist/utils/sorting.d.ts.map +0 -1
  294. package/dist/utils/sorting.js.map +0 -1
  295. package/dist/utils/speculativeDecoding.d.ts.map +0 -1
  296. package/dist/utils/speculativeDecoding.js.map +0 -1
  297. package/dist/utils/tokenUtils.d.ts.map +0 -1
  298. package/dist/utils/tokenUtils.js.map +0 -1
  299. package/docs/.nojekyll +0 -0
  300. package/docs/ANALYSIS_PRINCIPLES.md +0 -162
  301. package/docs/API.md +0 -855
  302. package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +0 -1391
  303. package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +0 -1051
  304. package/docs/BENCHMARK.md +0 -170
  305. package/docs/CHINESE_PROVIDER_RELIABILITY.md +0 -37
  306. package/docs/CITATIONS.md +0 -74
  307. package/docs/CLAIMS_AND_EVIDENCE.md +0 -58
  308. package/docs/CONFIGURATION.md +0 -476
  309. package/docs/COUNCIL_DECISION.json +0 -816
  310. package/docs/COUNCIL_SUMMARY.md +0 -319
  311. package/docs/COUNCIL_V2.2_DECISION.md +0 -416
  312. package/docs/ENGINEERING_SPEC.md +0 -55
  313. package/docs/FACTORY_RESET.md +0 -34
  314. package/docs/GEO.md +0 -66
  315. package/docs/GEO_OPTIMIZATION.md +0 -30
  316. package/docs/GEO_ROOT_CAUSE.md +0 -136
  317. package/docs/GEO_STATUS.md +0 -85
  318. package/docs/GEO_TEST_RESULTS.md +0 -176
  319. package/docs/HN_CHECKLIST.md +0 -38
  320. package/docs/HN_FOUNDER_COMMENT.md +0 -17
  321. package/docs/HN_SUBMISSION_FINAL.md +0 -180
  322. package/docs/HN_SUBMISSION_V3.md +0 -56
  323. package/docs/IMPROVEMENT_ROADMAP.md +0 -515
  324. package/docs/INTEGRATIONS.md +0 -420
  325. package/docs/LANGCHAIN_INTEGRATION.md +0 -147
  326. package/docs/LLM_COUNCIL_DECISION.md +0 -508
  327. package/docs/MIDDLEWARE_CHAIN.md +0 -35
  328. package/docs/PROMO_CHECKLIST.md +0 -200
  329. package/docs/QUICKSTART.md +0 -271
  330. package/docs/QUICK_START.md +0 -43
  331. package/docs/QUICK_START_VISIBILITY.md +0 -782
  332. package/docs/REDDIT_GAP_ANALYSIS.md +0 -299
  333. package/docs/RELEASE_CHECKLIST.md +0 -32
  334. package/docs/REPRODUCIBILITY.md +0 -63
  335. package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +0 -1180
  336. package/docs/ROUTING_RUBRIC.md +0 -197
  337. package/docs/SEO_AUDIT.md +0 -186
  338. package/docs/SOCIAL_LISTENING.md +0 -219
  339. package/docs/TMLPD_QNA.md +0 -751
  340. package/docs/TMLPD_V2.1_COMPLETE.md +0 -763
  341. package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +0 -754
  342. package/docs/UPDATE_TOPICS.md +0 -15
  343. package/docs/USE_CASES.md +0 -59
  344. package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +0 -446
  345. package/docs/V2_IMPLEMENTATION_GUIDE.md +0 -388
  346. package/docs/VERCEL_AI_SDK.md +0 -209
  347. package/docs/VISIBILITY_ADOPTION_PLAN.md +0 -1005
  348. package/docs/_config.yml +0 -49
  349. package/docs/ai-plugin.json +0 -16
  350. package/docs/api.html +0 -513
  351. package/docs/architecture-diagram.md +0 -40
  352. package/docs/benchmark-chart.png +0 -0
  353. package/docs/benchmark.html +0 -387
  354. package/docs/blog/routerarena-number-one.html +0 -73
  355. package/docs/cli-cheatsheet.md +0 -339
  356. package/docs/compare.md +0 -109
  357. package/docs/comparison-litellm.md +0 -88
  358. package/docs/comparison.md +0 -108
  359. package/docs/cost-chart-ascii.md +0 -42
  360. package/docs/cost-comparison-chart.svg +0 -88
  361. package/docs/curl-examples.md +0 -247
  362. package/docs/demo-auto.html +0 -264
  363. package/docs/demo.html +0 -416
  364. package/docs/geo/GENERATIVE_ENGINE_OPTIMIZATION.md +0 -232
  365. package/docs/index.html +0 -507
  366. package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +0 -421
  367. package/docs/launch-content/README.md +0 -457
  368. package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
  369. package/docs/launch-content/assets/cumulative_savings.png +0 -0
  370. package/docs/launch-content/assets/parallel_speedup.png +0 -0
  371. package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
  372. package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
  373. package/docs/launch-content/generate_charts.py +0 -313
  374. package/docs/launch-content/hn_show_post.md +0 -139
  375. package/docs/launch-content/partner_outreach_templates.md +0 -745
  376. package/docs/launch-content/reddit_posts.md +0 -467
  377. package/docs/launch-content/twitter_thread.txt +0 -460
  378. package/docs/npm-downloads-chart.svg +0 -43
  379. package/docs/openapi.json +0 -139
  380. package/docs/openapi.yaml +0 -1318
  381. package/docs/quick-start.html +0 -366
  382. package/docs/robots.txt +0 -52
  383. package/docs/sitemap.xml +0 -57
  384. package/docs/styles.css +0 -682
  385. package/docs/well-known/ai-plugin.json +0 -16
  386. package/docs/wellknown/ai-plugin.json +0 -16
  387. package/docs-site/assets/og-banner.svg +0 -194
  388. package/docs-site/index.html +0 -632
  389. package/eval/README.md +0 -46
  390. package/eval/baselines/main.json +0 -12
  391. package/eval/benchmark_dataset.jsonl +0 -16
  392. package/eval/check_golden_routes.js +0 -64
  393. package/eval/datasets/catalog.json +0 -33
  394. package/eval/datasets/slices/cn_provider_reliability_v1.jsonl +0 -3
  395. package/eval/datasets/slices/cost_pressure_v1.jsonl +0 -3
  396. package/eval/datasets/slices/safety_guardrails_v1.jsonl +0 -3
  397. package/eval/evals.json +0 -199
  398. package/eval/fault_injection_thresholds.json +0 -3
  399. package/eval/generate_report.js +0 -128
  400. package/eval/golden_routes.json +0 -114
  401. package/eval/lib/experiment_registry.js +0 -24
  402. package/eval/run_eval.js +0 -197
  403. package/eval/run_fault_injection.js +0 -201
  404. package/eval/run_shadow_eval.js +0 -85
  405. package/eval/thresholds.json +0 -9
  406. package/examples/QUICKSTART.md +0 -183
  407. package/examples/README.md +0 -61
  408. package/examples/a3m-sdk.js +0 -124
  409. package/examples/basic-route.js +0 -54
  410. package/examples/chat-loop.js +0 -202
  411. package/examples/classify-then-route.js +0 -102
  412. package/examples/cost-compare.js +0 -120
  413. package/examples/ensemble.js +0 -160
  414. package/examples/whatsapp-telegram-bridge-demo.js +0 -302
  415. package/examples/whatsapp-telegram-bridge.js +0 -269
  416. package/hf-space/README.md +0 -23
  417. package/hf-space/app.py +0 -240
  418. package/hf-space/requirements.txt +0 -1
  419. package/huggingface_space/README.md +0 -35
  420. package/huggingface_space/app.py +0 -126
  421. package/huggingface_space/create_space.py +0 -208
  422. package/huggingface_space/requirements.txt +0 -1
  423. package/mcp-server/README.md +0 -188
  424. package/mcp-server/package.json +0 -29
  425. package/mcp-server/src/index.ts +0 -744
  426. package/mcp-server/tsconfig.json +0 -19
  427. package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +0 -313
  428. package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +0 -277
  429. package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +0 -1234
  430. package/openclaw-alexa-bridge/test_fixes.js +0 -77
  431. package/playground/README.md +0 -51
  432. package/playground/codesandbox.json +0 -12
  433. package/playground/index.js +0 -39
  434. package/proxy/README.md +0 -227
  435. package/proxy/package-lock.json +0 -831
  436. package/proxy/package.json +0 -17
  437. package/proxy/rate-limit.js +0 -145
  438. package/proxy/rate-limit.test.js +0 -311
  439. package/proxy/server.js +0 -970
  440. package/python/README.md +0 -102
  441. package/python/a3m/__init__.py +0 -6
  442. package/python/a3m/client.py +0 -190
  443. package/python/a3m/models.py +0 -40
  444. package/python/a3m/sync_client.py +0 -61
  445. package/python/examples.py +0 -53
  446. package/python/integrations.py +0 -330
  447. package/python/pyproject.toml +0 -23
  448. package/python/setup.py +0 -28
  449. package/python/tmlpd.py +0 -369
  450. package/qna/REDDIT_GAP_ANALYSIS.md +0 -299
  451. package/qna/TMLPD_QNA.md +0 -751
  452. package/research/FINDING_001_safety.md +0 -28
  453. package/research/FINDING_002_error_diversity.md +0 -32
  454. package/research/FINDING_003_confidence_weighted_voting.md +0 -32
  455. package/research/FINDING_004_cross_model_semantic_detection.md +0 -37
  456. package/research/FINDING_005_knowledge_gap_orthogonality.md +0 -34
  457. package/research/HALLUCINATION_RESEARCH.md +0 -27
  458. package/research/ensemble-voting.md +0 -324
  459. package/research/loss-functions.md +0 -545
  460. package/research-log.md +0 -49
  461. package/scripts/banner.js +0 -29
  462. package/scripts/benchmark-local-routerarena.ts +0 -176
  463. package/scripts/benchmark.js +0 -145
  464. package/scripts/benchmark.sh +0 -61
  465. package/scripts/compare-providers.sh +0 -230
  466. package/scripts/content-planner.js +0 -25
  467. package/scripts/create-labeled-benchmark.ts +0 -105
  468. package/scripts/cross_post.py +0 -443
  469. package/scripts/local-router-benchmark.ts +0 -154
  470. package/scripts/post-all.sh +0 -41
  471. package/scripts/publish_fcc.py +0 -106
  472. package/scripts/push-to-gitee.sh +0 -25
  473. package/scripts/routerarena_ensemble.js +0 -144
  474. package/scripts/routing-benchmark-v2.js +0 -373
  475. package/scripts/routing-benchmark-v3.js +0 -118
  476. package/scripts/routing-benchmark.js +0 -462
  477. package/scripts/run-labeled-benchmark.mjs +0 -104
  478. package/scripts/run-mmlu-benchmark.js +0 -176
  479. package/scripts/run-provider-benchmark.js +0 -244
  480. package/scripts/update-npm-badges.js +0 -158
  481. package/skill/SKILL.md +0 -238
  482. package/src/__tests__/integration/tmpld_integration.test.py +0 -540
  483. package/src/skills/__tests__/skill_manager.test.ts +0 -328
  484. package/submissions/benchmarks/ALL_PLATFORMS_SUBMISSION.md +0 -94
  485. package/submissions/benchmarks/LLMROUTERBENCH_SUBMISSION.md +0 -121
  486. package/submissions/benchmarks/MMRBENCH_SUBMISSION.md +0 -94
  487. package/submissions/benchmarks/ROUTERARENA_UPDATE.md +0 -83
  488. package/submissions/benchmarks/ROUTERBENCH_SUBMISSION.md +0 -225
  489. package/test-council/1-structure-tests.test.js +0 -353
  490. package/test-council/1-structure-tests.test.ts +0 -353
  491. package/test-council/2-edge-case-tests.test.ts +0 -361
  492. package/test-council/3-performance-tests.test.ts +0 -669
  493. package/test-council/4-integration-tests.test.ts +0 -391
  494. package/test-council/5-agent-council-eval.test.ts +0 -413
  495. package/test-council/AGENT_COUNCIL_ARCHITECTURE.md +0 -349
  496. package/test-council/TEST_COUNCIL_REPORT.md +0 -201
  497. package/test-council/agents/edge-case-agent.ts +0 -363
  498. package/test-council/agents/performance-agent.ts +0 -426
  499. package/test-council/agents/structure-agent.ts +0 -227
  500. package/test-council/council.md +0 -183
  501. package/tests/__mocks__/tokenUtils.ts +0 -8
  502. package/tests/memory/episodicMemory.test.ts +0 -227
  503. package/tests/package-lock.json +0 -1628
  504. package/tests/package.json +0 -18
  505. package/tests/routing/ensembleVoting.test.ts +0 -236
  506. package/tests/routing/providerRetry.test.ts +0 -360
  507. package/tests/routing/queryTypePresets.test.ts +0 -208
  508. package/tests/security/guardrailEngine.test.ts +0 -700
  509. package/tests/tsconfig.json +0 -21
  510. package/tests/vitest.config.ts +0 -18
  511. package/tmlpd-pi-extension/README.md +0 -66
  512. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +0 -114
  513. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +0 -1
  514. package/tmlpd-pi-extension/dist/cache/prefixCache.js +0 -285
  515. package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +0 -1
  516. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +0 -58
  517. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +0 -1
  518. package/tmlpd-pi-extension/dist/cache/responseCache.js +0 -153
  519. package/tmlpd-pi-extension/dist/cache/responseCache.js.map +0 -1
  520. package/tmlpd-pi-extension/dist/cli.js +0 -59
  521. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +0 -95
  522. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +0 -1
  523. package/tmlpd-pi-extension/dist/cost/costTracker.js +0 -240
  524. package/tmlpd-pi-extension/dist/cost/costTracker.js.map +0 -1
  525. package/tmlpd-pi-extension/dist/index.d.ts +0 -723
  526. package/tmlpd-pi-extension/dist/index.d.ts.map +0 -1
  527. package/tmlpd-pi-extension/dist/index.js +0 -239
  528. package/tmlpd-pi-extension/dist/index.js.map +0 -1
  529. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +0 -82
  530. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +0 -1
  531. package/tmlpd-pi-extension/dist/memory/episodicMemory.js +0 -145
  532. package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +0 -1
  533. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +0 -102
  534. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +0 -1
  535. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +0 -207
  536. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +0 -1
  537. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +0 -85
  538. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +0 -1
  539. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +0 -210
  540. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +0 -1
  541. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +0 -102
  542. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +0 -1
  543. package/tmlpd-pi-extension/dist/providers/localProvider.js +0 -338
  544. package/tmlpd-pi-extension/dist/providers/localProvider.js.map +0 -1
  545. package/tmlpd-pi-extension/dist/providers/registry.d.ts +0 -55
  546. package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +0 -1
  547. package/tmlpd-pi-extension/dist/providers/registry.js +0 -138
  548. package/tmlpd-pi-extension/dist/providers/registry.js.map +0 -1
  549. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +0 -68
  550. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +0 -1
  551. package/tmlpd-pi-extension/dist/routing/advancedRouter.js +0 -332
  552. package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +0 -1
  553. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +0 -101
  554. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +0 -1
  555. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +0 -368
  556. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +0 -1
  557. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +0 -96
  558. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +0 -1
  559. package/tmlpd-pi-extension/dist/utils/batchProcessor.js +0 -170
  560. package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +0 -1
  561. package/tmlpd-pi-extension/dist/utils/compression.d.ts +0 -61
  562. package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +0 -1
  563. package/tmlpd-pi-extension/dist/utils/compression.js +0 -281
  564. package/tmlpd-pi-extension/dist/utils/compression.js.map +0 -1
  565. package/tmlpd-pi-extension/dist/utils/reliability.d.ts +0 -74
  566. package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +0 -1
  567. package/tmlpd-pi-extension/dist/utils/reliability.js +0 -177
  568. package/tmlpd-pi-extension/dist/utils/reliability.js.map +0 -1
  569. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +0 -117
  570. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +0 -1
  571. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +0 -246
  572. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +0 -1
  573. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +0 -50
  574. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +0 -1
  575. package/tmlpd-pi-extension/dist/utils/tokenUtils.js +0 -124
  576. package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +0 -1
  577. package/tmlpd-pi-extension/examples/QUICKSTART.md +0 -183
  578. package/tmlpd-pi-extension/package-lock.json +0 -79
  579. package/tmlpd-pi-extension/package.json +0 -172
  580. package/tmlpd-pi-extension/python/examples.py +0 -53
  581. package/tmlpd-pi-extension/python/integrations.py +0 -330
  582. package/tmlpd-pi-extension/python/setup.py +0 -28
  583. package/tmlpd-pi-extension/python/tmlpd.py +0 -369
  584. package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +0 -299
  585. package/tmlpd-pi-extension/qna/TMLPD_QNA.md +0 -751
  586. package/tmlpd-pi-extension/skill/SKILL.md +0 -238
  587. package/tmlpd-pi-extension/src/cache/responseCache.ts +0 -147
  588. package/tmlpd-pi-extension/src/cost/costTracker.ts +0 -302
  589. package/tmlpd-pi-extension/src/index.ts +0 -232
  590. package/tmlpd-pi-extension/src/memory/episodicMemory.ts +0 -257
  591. package/tmlpd-pi-extension/src/orchestration/haloOrchestrator.ts +0 -266
  592. package/tmlpd-pi-extension/src/orchestration/mctsWorkflow.ts +0 -262
  593. package/tmlpd-pi-extension/src/providers/localProvider.ts +0 -406
  594. package/tmlpd-pi-extension/src/providers/registry.ts +0 -164
  595. package/tmlpd-pi-extension/src/routing/ensembleVoting.ts +0 -159
  596. package/tmlpd-pi-extension/src/routing/queryTypePresets.ts +0 -136
  597. package/tmlpd-pi-extension/src/tools/tmlpdTools.ts +0 -433
  598. package/tmlpd-pi-extension/src/utils/batchProcessor.ts +0 -232
  599. package/tmlpd-pi-extension/src/utils/compression.ts +0 -325
  600. package/tmlpd-pi-extension/src/utils/reliability.ts +0 -221
  601. package/tmlpd-pi-extension/src/utils/tokenUtils.ts +0 -145
  602. package/tmlpd-pi-extension/tsconfig.json +0 -18
  603. package/tsconfig.build.json +0 -29
  604. package/tsconfig.json +0 -18
  605. /package/{docs/llms-full.txt → llms-full.txt.bak} +0 -0
@@ -1,545 +0,0 @@
1
- # Loss Functions for LLM Routing Optimization
2
-
3
- **Date:** 2026-06-03
4
- **Author:** A3M Research
5
- **Target:** Improve RouterArena score from 70.32
6
-
7
- ---
8
-
9
- ## 1. Current A3M Cost Model Analysis
10
-
11
- ### 1.1 Existing Routing Logic
12
-
13
- A3M's routing uses a **weighted scoring formula**:
14
-
15
- ```typescript
16
- // From src/routing/advancedRouter.ts
17
-
18
- // Quality score (static, heuristic-based)
19
- quality_score: strengths.includes('premium') ? 0.95 :
20
- strengths.includes('reasoning') ? 0.90 :
21
- strengths.includes('fast') ? 0.82 : 0.80
22
-
23
- // Cost efficiency (linear penalty)
24
- costEfficiency(model, features) = (1 - avg_cost / 10) * 0.2-0.6
25
-
26
- // Final score
27
- total_score = quality_score * complexity_bias + cost_score * (1 - complexity_bias)
28
-
29
- // Online learning (EMA)
30
- quality_score = quality_score * (1 - alpha) + actual_rating * alpha
31
- ```
32
-
33
- ### 1.2 Current Score Calculation
34
-
35
- ```typescript
36
- // Lines 302-340 in advancedRouter.ts
37
- let score = model.quality_score * 0.6; // Base quality weight
38
-
39
- // Domain bonus (+0.2)
40
- if (features.domain && model.strengths.includes(domainBonus[domain]))
41
- score += 0.2;
42
-
43
- // Code bonus (+0.15)
44
- if (features.has_code && model.strengths.includes('coding'))
45
- score += 0.15;
46
-
47
- // Free tier preference (+0.2)
48
- if (features.complexity < 0.5 && model.strengths.includes('free'))
49
- score += 0.2;
50
- ```
51
-
52
- ### 1.3 Issues with Current Approach
53
-
54
- | Issue | Impact | Severity |
55
- |-------|--------|----------|
56
- | **No learned embeddings** | Keyword matching can't capture semantic similarity | High |
57
- | **No contrastive loss** | Can't distinguish "similar but different" queries | Medium |
58
- | **Static quality scores** | Provider quality varies by query type | High |
59
- | **Linear cost penalty** | Doesn't model diminishing returns | Medium |
60
- | **No latency in loss** | RouterArena penalizes slow routing | High |
61
- | **Single-objective** | No Pareto-optimal exploration | Medium |
62
-
63
- ---
64
-
65
- ## 2. Literature Review
66
-
67
- ### 2.1 RouteLLM (arXiv:2404.06035)
68
-
69
- **Key Insight:** Learned routing from pairwise preferences.
70
-
71
- **Architecture:**
72
- - BERT classifier on query embeddings
73
- - Trained on weak vs strong model comparisons
74
- - Binary preference: "Which model gives better answer?"
75
-
76
- **Loss Function:**
77
- ```
78
- L = CrossEntropy(softmax(W * [q; m_w; m_s]), preference_label)
79
- ```
80
-
81
- Where `q` = query embedding, `m_w` = winner model embedding, `m_s` = strong model embedding.
82
-
83
- **Results:**
84
- - 85% routing accuracy (exact tier match)
85
- - 70% cost savings vs all-premium
86
-
87
- **Relevance to A3M:** RouteLLM's pairwise training is what enables learned routing. A3M's rule-based approach gets 70.32 (vs 85% exact), but could benefit from hybrid training.
88
-
89
- ### 2.2 RouterArena Benchmark (arXiv:2510.00202)
90
-
91
- **Scoring Formula:**
92
- ```
93
- RouterArena_Score = 0.6 * Accuracy + 0.2 * Cost_Efficiency + 0.2 * Latency_Score
94
-
95
- where:
96
- Accuracy = % queries routed to correct tier (exact or ±1)
97
- Cost_Efficiency = 1 - (router_cost / baseline_cost)
98
- Latency_Score = 1 - (router_latency / max_latency)
99
- ```
100
-
101
- **Key Finding:** A3M scores 70.32 with heuristic routing. RouteLLM scores 48.07 with learned routing. **Heuristic can beat learned when cost matters.**
102
-
103
- **Relevance to A3M:** The scoring weights (60% accuracy, 20% cost, 20% latency) directly inform our loss function design.
104
-
105
- ### 2.3 LLMRouterBench
106
-
107
- **Dataset:** 400K+ query-model pairs across 9 domains
108
- **Task:** 4-tier classification (free → budget → mid → premium)
109
- **Baseline:** TF-IDF + Logistic Regression = 62.3%
110
- **State-of-art:** Learned embeddings + neural classifier = 78.1%
111
-
112
- **Loss Function Pattern:**
113
- ```
114
- L = CrossEntropy(router(query), true_tier)
115
- + λ * L2_regularization
116
- + λ * cost_penalty
117
- ```
118
-
119
- **Relevance to A3M:** Could incorporate tier classification loss into A3M's multi-signal classifier.
120
-
121
- ### 2.4 Contrastive Learning for Routing
122
-
123
- **Paper:** SimCSE, MoCo, CLIP-style approaches
124
-
125
- **Idea:** Embed queries and model capabilities in same space.
126
-
127
- **Loss:**
128
- ```
129
- L_contrastive = -log(exp(sim(q, m_pos)) / Σ exp(sim(q, m_neg)))
130
- ```
131
-
132
- Where `sim` = cosine similarity, `m_pos` = correct model, `m_neg` = incorrect models.
133
-
134
- **Relevance to A3M:** A3M's current approach uses keyword matching. Contrastive learning could improve query embedding quality without full BERT classifier.
135
-
136
- ### 2.5 Multi-Objective Optimization for Routing
137
-
138
- **Problem:** Quality, cost, latency are conflicting objectives.
139
-
140
- **Approaches:**
141
- 1. **Weighted Sum:** `L = w1*Q + w2*(-C) + w3*(-L)` — simple but requires tuning
142
- 2. **Pareto Front:** Find non-dominated solutions — expensive
143
- 3. **Scalarization:** `L = Π (Q^α * C^β * L^γ)` — smooth tradeoffs
144
-
145
- **Recommended for A3M:** Weighted sum with dynamic weights based on query type.
146
-
147
- ---
148
-
149
- ## 3. Recommended Loss Function for A3M
150
-
151
- ### 3.1 Proposed Architecture: Hybrid Routing Loss
152
-
153
- ```
154
- L_total = α * L_tier + β * L_cost + γ * L_latency + δ * L_contrastive
155
- ```
156
-
157
- Where:
158
- - `L_tier` = Cross-entropy for tier classification
159
- - `L_cost` = Cost-aware margin loss
160
- - `L_latency` = Latency regression loss
161
- - `L_contrastive` = Contrastive query-model alignment
162
-
163
- ### 3.2 Component Details
164
-
165
- #### Tier Classification Loss (L_tier)
166
-
167
- ```python
168
- def tier_loss(logits, true_tier):
169
- """
170
- logits: [batch_size, 4] - raw scores for free/budget/mid/premium
171
- true_tier: [batch_size] - ground truth tier (0-3)
172
-
173
- Standard cross-entropy with class weights
174
- """
175
- weights = torch.tensor([1.0, 1.5, 2.0, 3.0]) # Premium is rarest
176
- return F.cross_entropy(logits, true_tier, weight=weights)
177
- ```
178
-
179
- #### Cost-Aware Margin Loss (L_cost)
180
-
181
- ```python
182
- def cost_margin_loss(scores, chosen_cost, best_cost, margin=0.1):
183
- """
184
- scores: routing scores for each model
185
- chosen_cost: cost of selected model
186
- best_cost: cost of optimal model
187
-
188
- Penalize choosing expensive models when cheaper options exist
189
- """
190
- cost_ratio = chosen_cost / (best_cost + 1e-6)
191
-
192
- # If cost ratio > 1.5, penalize heavily
193
- if cost_ratio > 1.5:
194
- return margin * (cost_ratio - 1.5) ** 2
195
- return 0.0
196
- ```
197
-
198
- #### Latency Regression Loss (L_latency)
199
-
200
- ```python
201
- def latency_loss(predicted_latency, actual_latency):
202
- """
203
- penalize high latency predictions
204
-
205
- Using log-scale to handle wide latency range (50ms - 10s)
206
- """
207
- return F.mse_loss(
208
- torch.log1p(predicted_latency),
209
- torch.log1p(actual_latency)
210
- )
211
- ```
212
-
213
- #### Contrastive Alignment Loss (L_contrastive)
214
-
215
- ```python
216
- def contrastive_loss(query_emb, model_emb, labels, temperature=0.1):
217
- """
218
- query_emb: [batch_size, dim] - query embeddings
219
- model_emb: [num_models, dim] - model capability embeddings
220
- labels: [batch_size] - ground truth model index
221
-
222
- InfoNCE loss: queries should be close to their correct model embeddings
223
- """
224
- # Normalize embeddings
225
- query_emb = F.normalize(query_emb, dim=-1)
226
- model_emb = F.normalize(model_emb, dim=-1)
227
-
228
- # Compute similarities
229
- sim = torch.matmul(query_emb, model_emb.T) / temperature
230
-
231
- # Positive pairs (correct model)
232
- loss = F.cross_entropy(sim, labels)
233
-
234
- return loss
235
- ```
236
-
237
- ### 3.3 Combined Loss Implementation
238
-
239
- ```python
240
- class RoutingLoss(nn.Module):
241
- def __init__(self, weights=(0.5, 0.2, 0.1, 0.2)):
242
- super().__init__()
243
- self.w_tier = weights[0]
244
- self.w_cost = weights[1]
245
- self.w_latency = weights[2]
246
- self.w_contrastive = weights[3]
247
-
248
- # Learnable temperature for contrastive loss
249
- self.temperature = nn.Parameter(torch.ones(1))
250
-
251
- def forward(self,
252
- tier_logits, tier_targets, # Tier classification
253
- chosen_costs, optimal_costs, # Cost efficiency
254
- pred_latencies, actual_latencies, # Latency
255
- query_emb, model_emb, emb_labels, # Contrastive
256
- cost_weight=0.3): # Dynamic weight
257
-
258
- # Normalize weights by cost_weight (high cost sensitivity → high β)
259
- if cost_weight > 0.5:
260
- self.w_cost = cost_weight
261
- self.w_tier = 1 - cost_weight
262
-
263
- L_tier = tier_loss(tier_logits, tier_targets)
264
- L_cost = cost_margin_loss(chosen_costs, optimal_costs)
265
- L_lat = latency_loss(pred_latencies, actual_latencies)
266
- L_contra = contrastive_loss(query_emb, model_emb, emb_labels, self.temperature)
267
-
268
- return (self.w_tier * L_tier +
269
- self.w_cost * L_cost +
270
- self.w_lat * L_lat +
271
- self.w_contrastive * L_contra)
272
- ```
273
-
274
- ---
275
-
276
- ## 4. Implementation Approach for A3M
277
-
278
- ### 4.1 Phase 1: Embedding-Based Query Representation
279
-
280
- **Problem:** A3M currently uses keyword matching (12 signals, 5 dimensions).
281
-
282
- **Solution:** Add lightweight embeddings (no GPU required).
283
-
284
- ```typescript
285
- // src/routing/queryEmbedder.ts
286
-
287
- import { pipeline } from '@xenova/transformers';
288
-
289
- let embedder: any = null;
290
-
291
- export async function getQueryEmbedding(query: string): Promise<Float32Array> {
292
- if (!embedder) {
293
- // Use sentence-transformers (onnx, CPU-friendly)
294
- embedder = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
295
- }
296
- return await embedder(query, { pooling: 'mean', normalize: true });
297
- }
298
-
299
- // Cached for speed
300
- const embeddingCache = new LRUCache<string, Float32Array>(10000);
301
- ```
302
-
303
- **Why:** MiniLM-L6-v2 is 22MB, CPU-fast, captures semantic similarity.
304
-
305
- ### 4.2 Phase 2: Cost-Aware Scoring
306
-
307
- **Current:** Linear penalty `(1 - cost/10) * weight`
308
-
309
- **Proposed:** Log-scale penalty + diminishing returns
310
-
311
- ```typescript
312
- // src/routing/costAwareScoring.ts
313
-
314
- export function costAwareScore(
315
- quality: number,
316
- cost_per_1k: number,
317
- complexity: number
318
- ): number {
319
- // Log-scale cost penalty (more realistic)
320
- const logCostPenalty = Math.log1p(cost_per_1k) / Math.log1p(10);
321
-
322
- // Complexity determines cost sensitivity
323
- // Simple queries: cost matters more (bias toward cheap)
324
- // Complex queries: quality matters more (bias toward better)
325
- const costSensitivity = 1 - complexity;
326
-
327
- // Quality should saturate (90% vs 95% is small difference)
328
- const qualitySigmoid = 1 / (1 + Math.exp(-10 * (quality - 0.8)));
329
-
330
- return (
331
- 0.6 * qualitySigmoid +
332
- 0.3 * (1 - logCostPenalty) * costSensitivity +
333
- 0.1 * (1 - costSensitivity) // Latency proxy
334
- );
335
- }
336
- ```
337
-
338
- ### 4.3 Phase 3: Contrastive Fine-Tuning (Optional)
339
-
340
- **For maximum RouterArena score improvement:**
341
-
342
- ```python
343
- # scripts/fine_tune_routing.py
344
-
345
- from sentence_transformers import SentenceTransformer, InputExample, losses
346
- from torch import nn
347
-
348
- # 1. Create training data from A3M's existing benchmark
349
- # Query → (chosen_model, cost, quality_rating) → (positive, negative) pairs
350
-
351
- def create_contrastive_examples(benchmark_data):
352
- examples = []
353
- for query in benchmark_data:
354
- for candidate in query.candidates:
355
- if candidate.chosen:
356
- pos = candidate.model_id
357
- else:
358
- neg = candidate.model_id
359
-
360
- examples.append(InputExample(
361
- texts=[query.text, pos, neg],
362
- label=1.0 if candidate.chosen else 0.0
363
- ))
364
- return examples
365
-
366
- # 2. Fine-tune embeddings
367
- model = SentenceTransformer('Xenova/all-MiniLM-L6-v2')
368
- train_loss = losses.ContrastiveLoss(model)
369
-
370
- model.fit(
371
- train_objectives=[(train_examples, train_loss)],
372
- epochs=5,
373
- warmup_steps=100
374
- )
375
-
376
- # 3. Export for A3M
377
- model.save('models/routing-embeddings')
378
- ```
379
-
380
- ### 4.4 Phase 4: Online Learning Enhancement
381
-
382
- **Current:** EMA on `quality_score` (α=0.2)
383
-
384
- **Proposed:** Contextual bandit updates
385
-
386
- ```typescript
387
- // src/routing/contextualBandit.ts
388
-
389
- interface RoutingFeedback {
390
- query: string;
391
- chosen_model: string;
392
- reward: number; // Computed from quality/cost/latency
393
-
394
- // Components
395
- quality_rating: number; // User feedback or cross-validation
396
- actual_cost: number;
397
- actual_latency: number;
398
- response_correct: boolean;
399
- }
400
-
401
- export function updateWithFeedback(feedback: RoutingFeedback): void {
402
- // Compute multi-objective reward
403
- const reward = computeReward(
404
- feedback.quality_rating,
405
- feedback.actual_cost,
406
- feedback.actual_latency,
407
- feedback.response_correct
408
- );
409
-
410
- // Thompson sampling for model selection
411
- const models = getAvailableModels();
412
-
413
- for (const model of models) {
414
- // Update posterior: Beta distribution per (query_type, model)
415
- const key = getQueryType(feedback.query) + ':' + model;
416
- const posterior = modelPosteriors[key];
417
-
418
- // Add reward observation
419
- if (reward > 0.5) {
420
- posterior.alpha += 1; // Success
421
- } else {
422
- posterior.beta += 1; // Failure
423
- }
424
- }
425
- }
426
-
427
- function computeReward(quality, cost, latency, correct): number {
428
- // Normalize to [0, 1]
429
- const q_norm = quality / 5.0; // 1-5 → 0-1
430
- const c_norm = Math.max(0, 1 - Math.log1p(cost) / 5); // Cost penalty
431
- const l_norm = Math.max(0, 1 - Math.log1p(latency) / 10000); // Latency penalty
432
- const r_norm = correct ? 1.0 : 0.0; // Correctness
433
-
434
- // Weighted sum (RouterArena-style)
435
- return 0.4 * q_norm + 0.2 * c_norm + 0.1 * l_norm + 0.3 * r_norm;
436
- }
437
- ```
438
-
439
- ---
440
-
441
- ## 5. Expected Improvement
442
-
443
- ### 5.1 RouterArena Score Projection
444
-
445
- | Change | Current Score | Expected New Score | Source |
446
- |--------|---------------|-------------------|--------|
447
- | Embedding-based routing | 70.32 | 73-75 | Semantic similarity improvement |
448
- | Cost-aware loss | 70.32 | 72-74 | Better cost-quality tradeoff |
449
- | Contrastive fine-tuning | 70.32 | 75-78 | Learned query-model alignment |
450
- | All combined | 70.32 | **77-80** | End-to-end improvement |
451
-
452
- ### 5.2 Breakdown by RouterArena Component
453
-
454
- | Component | Weight | Current | With Loss Functions | Improvement |
455
- |-----------|--------|---------|-------------------|-------------|
456
- | Accuracy (±1 tier) | 60% | ~85% | ~90% | +5 pts |
457
- | Cost Efficiency | 20% | ~60% | ~75% | +15 pts |
458
- | Latency | 20% | ~70% | ~75% | +5 pts |
459
- | **Total** | 100% | **70.32** | **~76-78** | **+6-8 pts** |
460
-
461
- ### 5.3 Conservative Estimate
462
-
463
- Even without full ML training, adding:
464
- - **Log-scale cost penalty** → +2 RouterArena points
465
- - **Embedding cache** → +1 point (better semantic matching)
466
- - **Contextual bandit updates** → +2 points (faster online learning)
467
-
468
- **Conservative target: 73-74 RouterArena score**
469
-
470
- ---
471
-
472
- ## 6. Implementation Priority
473
-
474
- | Priority | Change | Complexity | Impact | Est. Time |
475
- |----------|--------|------------|--------|-----------|
476
- | P0 | Log-scale cost penalty | Low | Medium | 1 day |
477
- | P1 | Embedding cache (MiniLM) | Medium | High | 2 days |
478
- | P2 | Contextual bandit updates | Medium | High | 3 days |
479
- | P3 | Contrastive fine-tuning | High | Very High | 1 week |
480
-
481
- ---
482
-
483
- ## 7. References
484
-
485
- 1. **RouteLLM** - LMSYS/Anyscale, arXiv:2404.06035
486
- - Learned routing from pairwise preferences
487
- - BERT classifier with cross-entropy loss
488
-
489
- 2. **RouterArena** - Berkeley, arXiv:2510.00202
490
- - 8,400 queries, 19 routers evaluated
491
- - Composite scoring: accuracy (60%), cost (20%), latency (20%)
492
-
493
- 3. **LLMRouterBench** - ACL 2024
494
- - 400K+ instances, 9 domains
495
- - TF-IDF baseline: 62.3%, Neural: 78.1%
496
-
497
- 4. **Self-Consistency** - Wang et al., ICLR 2023
498
- - Multiple reasoning paths improve GSM8K by +17.9 points
499
- - Relevant to A3M's ensemble voting
500
-
501
- 5. **Deep Ensembles** - Lakshminarayanan et al., NeurIPS 2017
502
- - Confidence-weighted ensembles reduce error by 10-30%
503
- - Foundation for A3M's voting mechanism
504
-
505
- ---
506
-
507
- ## Appendix: Quick Wins
508
-
509
- ### Quick Win 1: Immediate Cost Penalty Fix
510
-
511
- In `advancedRouter.ts`, replace:
512
-
513
- ```typescript
514
- // CURRENT (linear)
515
- const avg_cost = (model.cost_per_1k_input + model.cost_per_1k_output) / 2;
516
- return (1 - Math.min(avg_cost / 10, 1)) * 0.6;
517
- ```
518
-
519
- With:
520
-
521
- ```typescript
522
- // PROPOSED (log-scale)
523
- const avg_cost = (model.cost_per_1k_input + model.cost_per_1k_output) / 2;
524
- return Math.max(0, 1 - Math.log1p(avg_cost) / Math.log1p(10)) * 0.6;
525
- ```
526
-
527
- **Effect:** Makes router less aggressive about ultra-cheap models, better cost-quality tradeoff.
528
-
529
- ### Quick Win 2: Latency in Routing Score
530
-
531
- Add latency penalty to scoring:
532
-
533
- ```typescript
534
- const latencyPenalty = Math.max(0, 1 - model.latency_ms / 10000);
535
- const qualityScore = scoreModelFit(profile, features);
536
- const costScore = costEfficiency(profile, features);
537
-
538
- return 0.5 * qualityScore + 0.3 * costScore + 0.2 * latencyPenalty;
539
- ```
540
-
541
- **Effect:** RouterArena scores improve on latency component (+2-3 points).
542
-
543
- ---
544
-
545
- *Generated: 2026-06-03 | For A3M Router v2.2+*
package/research-log.md DELETED
@@ -1,49 +0,0 @@
1
- # A3M Router Research Log
2
-
3
- ## 2026-06-03 - Test Coverage Analysis
4
-
5
- ### Research State
6
- ```
7
- Project: A3M Router Test Coverage Analysis
8
- Date: 2026-06-03
9
- Agents: 3 (Architecture, Performance, Test Coverage)
10
- Goal: Identify top 3 improvements via council vote
11
- ```
12
-
13
- ### Scope Explored
14
- - `test/` - 7 legacy JS test files (budgetEnforcer, observability, providerHealth, providerRetry, semanticCache)
15
- - `tests/` - Vitest test suite (routing/ensembleVoting, routing/providerRetry, routing/queryTypePresets, memory/episodicMemory)
16
- - `test-council/` - 5 test files (structure, edge-case, performance, integration, agent-council-eval)
17
-
18
- ### Key Source Files Analyzed
19
- - `src/ensemble.ts` - EnsembleOrchestrator (no tests)
20
- - `src/sdk.ts` - A3MRouter SDK (structure only)
21
- - `src/cost/budgetEnforcer.ts` - Budget enforcement (legacy test)
22
- - `src/analytics/costAnalytics.ts` - Cost analytics (no tests)
23
- - `src/security/guardrails.ts` - GuardrailEngine (NO TESTS - CRITICAL)
24
- - `src/observability/middleware.ts` - Express middleware (not tested)
25
- - `src/routing/crossModelValidation.ts` - Cross-model validation (not tested)
26
- - `src/observability/fatigueDetector.ts` - Fatigue detection (not tested)
27
-
28
- ### Coverage Summary
29
- | Module | Coverage | Status |
30
- |--------|----------|--------|
31
- | Routing | Partial | ensembleVoting, providerRetry, queryTypePresets |
32
- | Memory | Good | episodicMemory well tested |
33
- | Observability | Partial | Tracer, MetricsCollector tested; middleware not |
34
- | Security | NONE | GuardrailEngine untested |
35
- | Cost | Partial | budgetEnforcer legacy test; costAnalytics untested |
36
- | SDK | Structure only | No behavioral tests |
37
-
38
- ### Critical Gaps Identified
39
- 1. **GuardrailEngine** - Zero tests for security-critical code
40
- 2. **EnsembleOrchestrator** - Core P0 feature lacks integration tests
41
- 3. **CostAnalytics** - No tests for savings calculation accuracy
42
- 4. **SDK Class** - Only type checking, no behavioral tests
43
- 5. **Middleware** - Not tested
44
-
45
- ### Output
46
- Created: `council-votes/coverage-vote.md`
47
- Vote: Finding #1 (GuardrailEngine) as highest priority
48
-
49
- ---
package/scripts/banner.js DELETED
@@ -1,29 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * A3M Router — Terminal ASCII Art Banner
4
- *
5
- * Printed on CLI startup to reinforce A3M branding.
6
- * Usage:
7
- * node scripts/banner.js
8
- * // or import './banner' in CLI entry point
9
- */
10
-
11
- const A3M_BANNER = `
12
- ╔══════════════════════════════════════════════════════════╗
13
- ║ ╔═╗╔═╗╔╗╔╔═╗ ║
14
- ║ ╠═╣║ ║║║║║ ║ ║
15
- ║ ╩ ╩╚═╝╝╚╝╚═╝ ║
16
- ║ ║
17
- ║ Parallel Multi-LLM Execution Engine ║
18
- ║ ║
19
- ║ 47+ Providers · Ensemble Voting · 62% Cost Savings ║
20
- ║ ║
21
- ║ ${'\x1b[2m'}https://github.com/Das-rebel/a3m-router${'\x1b[0m'}${' '.repeat(19)}║
22
- ╚══════════════════════════════════════════════════════════╝
23
- `;
24
-
25
- module.exports = A3M_BANNER;
26
-
27
- if (require.main === module) {
28
- process.stdout.write(A3M_BANNER);
29
- }