adaptive-memory-multi-model-router 2.14.46 → 2.14.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (598) hide show
  1. package/{docs/llms.txt → llms.txt.bak} +6 -6
  2. package/package.json +13 -84
  3. package/src/routing/advancedRouter.ts.bak +650 -0
  4. package/test.js.bak +376 -0
  5. package/.dockerignore +0 -82
  6. package/.env.example +0 -303
  7. package/.github/DISCUSSIONS_WELCOME.md +0 -27
  8. package/.github/DISCUSSION_TEMPLATE.yml +0 -5
  9. package/.github/FUNDING.yml +0 -2
  10. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -94
  11. package/.github/ISSUE_TEMPLATE/config.yml +0 -17
  12. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -71
  13. package/.github/PULL_REQUEST_TEMPLATE.md +0 -71
  14. package/.github/dependabot.yml +0 -9
  15. package/.github/workflows/auto-publish.yml +0 -51
  16. package/.github/workflows/ci.yml +0 -263
  17. package/.github/workflows/codeql.yml +0 -38
  18. package/.github/workflows/npm-publish.yml +0 -20
  19. package/.github/workflows/pages.yml +0 -37
  20. package/.github/workflows/stale.yml +0 -54
  21. package/.publish-tick +0 -1
  22. package/.well-known/ai-plugin.json +0 -16
  23. package/AGENT_COUNCIL_FINDINGS.md +0 -142
  24. package/ARCHITECTURE.md +0 -346
  25. package/AUDIT_REPORT.md +0 -28
  26. package/CODE_OF_CONDUCT.md +0 -128
  27. package/CONTRIBUTING.md +0 -50
  28. package/CONTRIBUTORS.md +0 -20
  29. package/Dockerfile +0 -53
  30. package/Dockerfile.proxy +0 -33
  31. package/HEALTH_REPORT.md +0 -118
  32. package/IMPROVEMENT_PLAN.md +0 -107
  33. package/LANDING.md +0 -43
  34. package/LAUNCH-PAIN-DRIVEN.md +0 -339
  35. package/LAUNCH.md +0 -337
  36. package/LAUNCH_CHECKLIST.md +0 -141
  37. package/LAUNCH_SNAPSHOT.md +0 -260
  38. package/MANIFESTO.md +0 -41
  39. package/POPULARITY_BOOSTERS.md +0 -285
  40. package/PR_STATUS_REPORT.md +0 -148
  41. package/REDESIGN.md +0 -95
  42. package/RUNKIT.md +0 -83
  43. package/SECURITY.md +0 -29
  44. package/SUBMISSIONS.md +0 -43
  45. package/_schema.html +0 -53
  46. package/ai-plugin.json +0 -16
  47. package/articles/AI_AGENT_LLM_ROUTING.md +0 -150
  48. package/articles/CHINESE_DIRECTORIES.md +0 -100
  49. package/articles/CHINESE_SUBMISSIONS_READY.md +0 -322
  50. package/articles/COMPETITOR_ALERTS.md +0 -31
  51. package/articles/COMPLETE_POSTING_DIRECTORY.md +0 -147
  52. package/articles/CONTENT_STRUCTURE.md +0 -292
  53. package/articles/DEVTO_COST_GUIDE.md +0 -473
  54. package/articles/DEVTO_FINAL.md +0 -416
  55. package/articles/DEVTO_MULTI_PROVIDER.md +0 -542
  56. package/articles/DEVTO_READY.md +0 -255
  57. package/articles/DEVTO_V2_ANNOUNCEMENT.md +0 -160
  58. package/articles/DEVTO_VIRAL_GROWTH.md +0 -280
  59. package/articles/FRESH_devto.md +0 -460
  60. package/articles/FRESH_devto_2026_05.md +0 -73
  61. package/articles/FRESH_hackernews.md +0 -14
  62. package/articles/FRESH_reddit_ml.md +0 -90
  63. package/articles/FRESH_reddit_node.md +0 -198
  64. package/articles/FRESH_reddit_sideproject.md +0 -72
  65. package/articles/FRESH_reddit_webdev.md +0 -130
  66. package/articles/FROM_ZERO_TO_10K.md +0 -107
  67. package/articles/HN_10X_BETTER.md +0 -430
  68. package/articles/HN_ACCOUNT_GUIDE.md +0 -21
  69. package/articles/HN_CHINESE_STYLE.md +0 -308
  70. package/articles/HN_FINAL.md +0 -148
  71. package/articles/HN_POSTED_VERSION.md +0 -56
  72. package/articles/HN_POST_READY.md +0 -137
  73. package/articles/HN_RESEARCH.md +0 -364
  74. package/articles/HN_SHOW_routerarena.md +0 -17
  75. package/articles/HN_TIMING_GUIDE.md +0 -52
  76. package/articles/INDIEHACKERS_POST.md +0 -52
  77. package/articles/INDIEHACKERS_READY.md +0 -120
  78. package/articles/LLM_BENCHMARK_DEEP_DIVE.md +0 -153
  79. package/articles/MASTER_POSTING_DIRECTORY.md +0 -189
  80. package/articles/NEWSLETTER_SEND_NOW.md +0 -259
  81. package/articles/NEWSLETTER_SUBMISSIONS.md +0 -112
  82. package/articles/PAIN-DRIVEN-devto-v2.md +0 -308
  83. package/articles/PAIN-DRIVEN-devto-v3.md +0 -268
  84. package/articles/PAIN-DRIVEN-devto.md +0 -242
  85. package/articles/PAIN-DRIVEN-hackernews-v2.md +0 -138
  86. package/articles/PAIN-DRIVEN-hackernews-v3.md +0 -151
  87. package/articles/PAIN-DRIVEN-hackernews.md +0 -131
  88. package/articles/PAIN-DRIVEN-reddit-v2.md +0 -301
  89. package/articles/PAIN-DRIVEN-reddit-v3.md +0 -236
  90. package/articles/PAIN-DRIVEN-reddit.md +0 -218
  91. package/articles/PAIN-DRIVEN-twitter-v2.md +0 -110
  92. package/articles/PAIN-DRIVEN-twitter-v3.md +0 -121
  93. package/articles/PAIN-DRIVEN-twitter.md +0 -120
  94. package/articles/PORTKEY_VS_A3M.md +0 -147
  95. package/articles/POSTING_KIT_2026_05.md +0 -67
  96. package/articles/PRESS_KIT_routerarena.md +0 -77
  97. package/articles/PRODUCTHUNT_LISTING.md +0 -48
  98. package/articles/PRODUCTHUNT_READY.md +0 -106
  99. package/articles/PR_PLAN_vault.md +0 -125
  100. package/articles/REDDIT_FINAL.md +0 -232
  101. package/articles/REDDIT_POST.md +0 -67
  102. package/articles/REDDIT_SUBMISSION_READY.md +0 -348
  103. package/articles/ROUTERARENA_LEADER.md +0 -45
  104. package/articles/SHOW_HN_FINAL.md +0 -29
  105. package/articles/TWEETS_10K_DOWNLOADS.md +0 -47
  106. package/articles/TWEETS_BENCHMARK_FIRST.md +0 -46
  107. package/articles/TWEETS_MCP_PLAY.md +0 -51
  108. package/articles/TWEETS_SEQUENTIAL_BROKEN.md +0 -49
  109. package/articles/TWEETS_WHY_BUILD.md +0 -54
  110. package/articles/TWEETS_routerarena_leader.md +0 -53
  111. package/articles/TWEET_STORM_READY.md +0 -165
  112. package/articles/TWITTER_FINAL.md +0 -167
  113. package/articles/WHY_10X_BETTER.md +0 -261
  114. package/articles/WHY_CHINESE_STYLE_BETTER.md +0 -323
  115. package/articles/ai-discoverability-llm-routing.md +0 -210
  116. package/articles/devto-llm-routing.md +0 -138
  117. package/articles/hackernews-show-hn.md +0 -54
  118. package/articles/hashnode-llm-cost-optimization.md +0 -125
  119. package/articles/hn_show_2026_05.md +0 -11
  120. package/articles/medium-building-llm-router.md +0 -205
  121. package/articles/reddit-ml.md +0 -76
  122. package/articles/twitter-thread-cost-savings.md +0 -50
  123. package/articles/youtube-tutorial-script.md +0 -262
  124. package/assets/a3m_3blue1brown.mp4 +0 -0
  125. package/assets/banner.svg +0 -109
  126. package/assets/chart-cost-v2.svg +0 -91
  127. package/assets/chart-cost-v3.svg +0 -143
  128. package/assets/chart-features-v2.svg +0 -132
  129. package/assets/chart-features-v3.svg +0 -211
  130. package/assets/chart-growth-v2.svg +0 -122
  131. package/assets/chart-growth-v3.svg +0 -189
  132. package/assets/cost-comparison.svg +0 -134
  133. package/assets/cost-simple.svg +0 -64
  134. package/assets/demo-hn.gif +0 -0
  135. package/assets/feature-matrix.svg +0 -136
  136. package/assets/growth-chart-animated.svg +0 -76
  137. package/assets/growth-chart.svg +0 -82
  138. package/assets/growth-simple.svg +0 -69
  139. package/assets/hero-diagram.svg +0 -81
  140. package/assets/logo-new.svg +0 -21
  141. package/assets/logo.svg +0 -68
  142. package/assets/provider-comparison.svg +0 -121
  143. package/assets/social-preview-new.svg +0 -100
  144. package/assets/social-preview.svg +0 -194
  145. package/assets/social-v2.svg +0 -130
  146. package/assets/social-v3.svg +0 -212
  147. package/benchmark-provider-results.json +0 -245
  148. package/benchmark-results.json +0 -54
  149. package/council-votes/architecture-vote.md +0 -121
  150. package/council-votes/coverage-vote.md +0 -93
  151. package/data/adaptive-benchmark.json +0 -92
  152. package/data/benchmark-results.json +0 -47
  153. package/data/labeled-benchmark.json +0 -88
  154. package/demo/3blue1brown_video.py +0 -285
  155. package/demo/3blue1brown_video_v2.py +0 -310
  156. package/demo/IMPROVED_PROMPTS.md +0 -229
  157. package/demo/VEO3_PROMPTS.md +0 -269
  158. package/demo/VIDEO_PRODUCTION_GUIDE.md +0 -333
  159. package/demo/a3m_3blue1brown.mp4 +0 -0
  160. package/demo/asciinema-demo.sh +0 -195
  161. package/demo/demo-hn.tape +0 -74
  162. package/demo/demo-script.md +0 -53
  163. package/demo/demo-script.sh +0 -62
  164. package/demo/demo.svg +0 -75
  165. package/demo/frame1_ai_data_center.png +0 -0
  166. package/demo/frame1_sunset_video.mp4 +0 -0
  167. package/demo/frame2_cost_comparison.png +0 -0
  168. package/demo/frame2_cost_comparison_fallback.png +0 -0
  169. package/demo/frame3_parallel_execution.png +0 -0
  170. package/demo/frame3_parallel_execution_fallback.png +0 -0
  171. package/demo/frame4_providers.png +0 -0
  172. package/demo/frame4_providers_fallback.png +0 -0
  173. package/demo/frame5_endcard.png +0 -0
  174. package/demo/frame5_endcard_fallback.png +0 -0
  175. package/demo/new_frame1_hook.png +0 -0
  176. package/demo/new_frame2_proof.png +0 -0
  177. package/demo/new_frame3_wow.png +0 -0
  178. package/demo/new_frame4_social.png +0 -0
  179. package/demo/new_frame5_cta.png +0 -0
  180. package/demo/package.json +0 -13
  181. package/demo/product-video-final.mp4 +0 -0
  182. package/demo/product-video-hype-v1.mp4 +0 -0
  183. package/demo/product-video-v1.mp4 +0 -0
  184. package/demo/public/index.html +0 -762
  185. package/demo/recording.cast +0 -55
  186. package/demo/server.js +0 -405
  187. package/demo-new.tape +0 -71
  188. package/demo-real.sh +0 -198
  189. package/demo-simple.tape +0 -205
  190. package/demo.html +0 -520
  191. package/demo.sh +0 -85
  192. package/demo.tape +0 -259
  193. package/dist/analytics/costAnalytics.d.ts.map +0 -1
  194. package/dist/analytics/costAnalytics.js.map +0 -1
  195. package/dist/benchmark/comprehensive.js.map +0 -1
  196. package/dist/benchmark/reproducible.d.ts.map +0 -1
  197. package/dist/benchmark/reproducible.js.map +0 -1
  198. package/dist/cache/prefixCache.d.ts.map +0 -1
  199. package/dist/cache/prefixCache.js.map +0 -1
  200. package/dist/cache/responseCache.d.ts.map +0 -1
  201. package/dist/cache/responseCache.js.map +0 -1
  202. package/dist/cache/semanticCache.d.ts.map +0 -1
  203. package/dist/cache/semanticCache.js.map +0 -1
  204. package/dist/cli/setupWizard.d.ts.map +0 -1
  205. package/dist/cli/setupWizard.js.map +0 -1
  206. package/dist/cost/budgetEnforcer.d.ts.map +0 -1
  207. package/dist/cost/budgetEnforcer.js.map +0 -1
  208. package/dist/cost/costTracker.d.ts.map +0 -1
  209. package/dist/cost/costTracker.js.map +0 -1
  210. package/dist/ensemble/multiRoundDialog.js.map +0 -1
  211. package/dist/ensemble/shapleyValue.js.map +0 -1
  212. package/dist/integrations/langchainAdapter.d.ts.map +0 -1
  213. package/dist/integrations/langchainAdapter.js.map +0 -1
  214. package/dist/integrations/oauth.d.ts.map +0 -1
  215. package/dist/integrations/oauth.js.map +0 -1
  216. package/dist/integrations/scienceAdapter.js.map +0 -1
  217. package/dist/memory/autoFetch.d.ts.map +0 -1
  218. package/dist/memory/autoFetch.js.map +0 -1
  219. package/dist/memory/episodicMemory.d.ts.map +0 -1
  220. package/dist/memory/episodicMemory.js.map +0 -1
  221. package/dist/memory/hybridMemory.js.map +0 -1
  222. package/dist/memory/memoryTree.d.ts.map +0 -1
  223. package/dist/memory/memoryTree.js.map +0 -1
  224. package/dist/memory/obsidianVault.d.ts.map +0 -1
  225. package/dist/memory/obsidianVault.js.map +0 -1
  226. package/dist/memory/reasoningBank.js.map +0 -1
  227. package/dist/observability/changeWatch.d.ts.map +0 -1
  228. package/dist/observability/changeWatch.js.map +0 -1
  229. package/dist/observability/fatigueDetector.d.ts.map +0 -1
  230. package/dist/observability/fatigueDetector.js.map +0 -1
  231. package/dist/observability/index.d.ts.map +0 -1
  232. package/dist/observability/index.js.map +0 -1
  233. package/dist/observability/metrics.d.ts.map +0 -1
  234. package/dist/observability/metrics.js.map +0 -1
  235. package/dist/observability/middleware.d.ts.map +0 -1
  236. package/dist/observability/middleware.js.map +0 -1
  237. package/dist/observability/tracer.d.ts.map +0 -1
  238. package/dist/observability/tracer.js.map +0 -1
  239. package/dist/observability/types.d.ts.map +0 -1
  240. package/dist/observability/types.js.map +0 -1
  241. package/dist/orchestration/haloOrchestrator.d.ts.map +0 -1
  242. package/dist/orchestration/haloOrchestrator.js.map +0 -1
  243. package/dist/orchestration/mctsWorkflow.d.ts.map +0 -1
  244. package/dist/orchestration/mctsWorkflow.js.map +0 -1
  245. package/dist/providers/localProvider.d.ts.map +0 -1
  246. package/dist/providers/localProvider.js.map +0 -1
  247. package/dist/providers/providerConfig.d.ts.map +0 -1
  248. package/dist/providers/providerConfig.js.map +0 -1
  249. package/dist/providers/registry.d.ts.map +0 -1
  250. package/dist/providers/registry.js.map +0 -1
  251. package/dist/routing/advancedRouter.d.ts.map +0 -1
  252. package/dist/routing/advancedRouter.js.map +0 -1
  253. package/dist/routing/crossModelValidation.d.ts.map +0 -1
  254. package/dist/routing/crossModelValidation.js.map +0 -1
  255. package/dist/routing/providerHealth.d.ts.map +0 -1
  256. package/dist/routing/providerHealth.js.map +0 -1
  257. package/dist/routing/providerRetry.d.ts.map +0 -1
  258. package/dist/routing/providerRetry.js.map +0 -1
  259. package/dist/scripts/banner.js +0 -29
  260. package/dist/security/guardrails.d.ts.map +0 -1
  261. package/dist/security/guardrails.js.map +0 -1
  262. package/dist/server/dashboard.d.ts.map +0 -1
  263. package/dist/server/dashboard.js.map +0 -1
  264. package/dist/server/modelMapper.d.ts.map +0 -1
  265. package/dist/server/modelMapper.js.map +0 -1
  266. package/dist/server/proxyServer.d.ts.map +0 -1
  267. package/dist/server/proxyServer.js.map +0 -1
  268. package/dist/skills/__tests__/skill_manager.test.d.ts +0 -2
  269. package/dist/skills/__tests__/skill_manager.test.d.ts.map +0 -1
  270. package/dist/skills/__tests__/skill_manager.test.js +0 -268
  271. package/dist/skills/__tests__/skill_manager.test.js.map +0 -1
  272. package/dist/tools/tmlpdTools.d.ts.map +0 -1
  273. package/dist/tools/tmlpdTools.js.map +0 -1
  274. package/dist/tui/dashboard.d.ts.map +0 -1
  275. package/dist/tui/dashboard.js.map +0 -1
  276. package/dist/tui/index.d.ts.map +0 -1
  277. package/dist/tui/index.js.map +0 -1
  278. package/dist/utils/batchProcessor.d.ts.map +0 -1
  279. package/dist/utils/batchProcessor.js.map +0 -1
  280. package/dist/utils/compression.d.ts.map +0 -1
  281. package/dist/utils/compression.js.map +0 -1
  282. package/dist/utils/costUtils.d.ts.map +0 -1
  283. package/dist/utils/costUtils.js.map +0 -1
  284. package/dist/utils/reliability.d.ts.map +0 -1
  285. package/dist/utils/reliability.js.map +0 -1
  286. package/dist/utils/sorting.d.ts.map +0 -1
  287. package/dist/utils/sorting.js.map +0 -1
  288. package/dist/utils/speculativeDecoding.d.ts.map +0 -1
  289. package/dist/utils/speculativeDecoding.js.map +0 -1
  290. package/dist/utils/tokenUtils.d.ts.map +0 -1
  291. package/dist/utils/tokenUtils.js.map +0 -1
  292. package/docs/.nojekyll +0 -0
  293. package/docs/ANALYSIS_PRINCIPLES.md +0 -162
  294. package/docs/API.md +0 -855
  295. package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +0 -1391
  296. package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +0 -1051
  297. package/docs/BENCHMARK.md +0 -170
  298. package/docs/CHINESE_PROVIDER_RELIABILITY.md +0 -37
  299. package/docs/CITATIONS.md +0 -74
  300. package/docs/CLAIMS_AND_EVIDENCE.md +0 -58
  301. package/docs/CONFIGURATION.md +0 -476
  302. package/docs/COUNCIL_DECISION.json +0 -816
  303. package/docs/COUNCIL_SUMMARY.md +0 -319
  304. package/docs/COUNCIL_V2.2_DECISION.md +0 -416
  305. package/docs/ENGINEERING_SPEC.md +0 -55
  306. package/docs/FACTORY_RESET.md +0 -34
  307. package/docs/GEO.md +0 -66
  308. package/docs/GEO_OPTIMIZATION.md +0 -30
  309. package/docs/GEO_ROOT_CAUSE.md +0 -136
  310. package/docs/GEO_STATUS.md +0 -85
  311. package/docs/GEO_TEST_RESULTS.md +0 -176
  312. package/docs/HN_CHECKLIST.md +0 -38
  313. package/docs/HN_FOUNDER_COMMENT.md +0 -17
  314. package/docs/HN_SUBMISSION_FINAL.md +0 -180
  315. package/docs/HN_SUBMISSION_V3.md +0 -56
  316. package/docs/IMPROVEMENT_ROADMAP.md +0 -515
  317. package/docs/INTEGRATIONS.md +0 -420
  318. package/docs/LANGCHAIN_INTEGRATION.md +0 -147
  319. package/docs/LLM_COUNCIL_DECISION.md +0 -508
  320. package/docs/MIDDLEWARE_CHAIN.md +0 -35
  321. package/docs/PROMO_CHECKLIST.md +0 -200
  322. package/docs/QUICKSTART.md +0 -271
  323. package/docs/QUICK_START.md +0 -43
  324. package/docs/QUICK_START_VISIBILITY.md +0 -782
  325. package/docs/REDDIT_GAP_ANALYSIS.md +0 -299
  326. package/docs/RELEASE_CHECKLIST.md +0 -32
  327. package/docs/REPRODUCIBILITY.md +0 -63
  328. package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +0 -1180
  329. package/docs/ROUTING_RUBRIC.md +0 -197
  330. package/docs/SEO_AUDIT.md +0 -186
  331. package/docs/SOCIAL_LISTENING.md +0 -219
  332. package/docs/TMLPD_QNA.md +0 -751
  333. package/docs/TMLPD_V2.1_COMPLETE.md +0 -763
  334. package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +0 -754
  335. package/docs/UPDATE_TOPICS.md +0 -15
  336. package/docs/USE_CASES.md +0 -59
  337. package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +0 -446
  338. package/docs/V2_IMPLEMENTATION_GUIDE.md +0 -388
  339. package/docs/VERCEL_AI_SDK.md +0 -209
  340. package/docs/VISIBILITY_ADOPTION_PLAN.md +0 -1005
  341. package/docs/_config.yml +0 -49
  342. package/docs/ai-plugin.json +0 -16
  343. package/docs/api.html +0 -513
  344. package/docs/architecture-diagram.md +0 -40
  345. package/docs/benchmark-chart.png +0 -0
  346. package/docs/benchmark.html +0 -387
  347. package/docs/blog/routerarena-number-one.html +0 -73
  348. package/docs/cli-cheatsheet.md +0 -339
  349. package/docs/compare.md +0 -109
  350. package/docs/comparison-litellm.md +0 -88
  351. package/docs/comparison.md +0 -108
  352. package/docs/cost-chart-ascii.md +0 -42
  353. package/docs/cost-comparison-chart.svg +0 -88
  354. package/docs/curl-examples.md +0 -247
  355. package/docs/demo-auto.html +0 -264
  356. package/docs/demo.html +0 -416
  357. package/docs/geo/GENERATIVE_ENGINE_OPTIMIZATION.md +0 -232
  358. package/docs/index.html +0 -507
  359. package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +0 -421
  360. package/docs/launch-content/README.md +0 -457
  361. package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
  362. package/docs/launch-content/assets/cumulative_savings.png +0 -0
  363. package/docs/launch-content/assets/parallel_speedup.png +0 -0
  364. package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
  365. package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
  366. package/docs/launch-content/generate_charts.py +0 -313
  367. package/docs/launch-content/hn_show_post.md +0 -139
  368. package/docs/launch-content/partner_outreach_templates.md +0 -745
  369. package/docs/launch-content/reddit_posts.md +0 -467
  370. package/docs/launch-content/twitter_thread.txt +0 -460
  371. package/docs/npm-downloads-chart.svg +0 -43
  372. package/docs/openapi.json +0 -139
  373. package/docs/openapi.yaml +0 -1318
  374. package/docs/quick-start.html +0 -366
  375. package/docs/robots.txt +0 -52
  376. package/docs/sitemap.xml +0 -57
  377. package/docs/styles.css +0 -682
  378. package/docs/well-known/ai-plugin.json +0 -16
  379. package/docs/wellknown/ai-plugin.json +0 -16
  380. package/docs-site/assets/og-banner.svg +0 -194
  381. package/docs-site/index.html +0 -632
  382. package/eval/README.md +0 -46
  383. package/eval/baselines/main.json +0 -12
  384. package/eval/benchmark_dataset.jsonl +0 -16
  385. package/eval/check_golden_routes.js +0 -64
  386. package/eval/datasets/catalog.json +0 -33
  387. package/eval/datasets/slices/cn_provider_reliability_v1.jsonl +0 -3
  388. package/eval/datasets/slices/cost_pressure_v1.jsonl +0 -3
  389. package/eval/datasets/slices/safety_guardrails_v1.jsonl +0 -3
  390. package/eval/evals.json +0 -199
  391. package/eval/fault_injection_thresholds.json +0 -3
  392. package/eval/generate_report.js +0 -128
  393. package/eval/golden_routes.json +0 -114
  394. package/eval/lib/experiment_registry.js +0 -24
  395. package/eval/run_eval.js +0 -197
  396. package/eval/run_fault_injection.js +0 -201
  397. package/eval/run_shadow_eval.js +0 -85
  398. package/eval/thresholds.json +0 -9
  399. package/examples/QUICKSTART.md +0 -183
  400. package/examples/README.md +0 -61
  401. package/examples/a3m-sdk.js +0 -124
  402. package/examples/basic-route.js +0 -54
  403. package/examples/chat-loop.js +0 -202
  404. package/examples/classify-then-route.js +0 -102
  405. package/examples/cost-compare.js +0 -120
  406. package/examples/ensemble.js +0 -160
  407. package/examples/whatsapp-telegram-bridge-demo.js +0 -302
  408. package/examples/whatsapp-telegram-bridge.js +0 -269
  409. package/hf-space/README.md +0 -23
  410. package/hf-space/app.py +0 -240
  411. package/hf-space/requirements.txt +0 -1
  412. package/huggingface_space/README.md +0 -35
  413. package/huggingface_space/app.py +0 -126
  414. package/huggingface_space/create_space.py +0 -208
  415. package/huggingface_space/requirements.txt +0 -1
  416. package/mcp-server/README.md +0 -188
  417. package/mcp-server/package.json +0 -29
  418. package/mcp-server/src/index.ts +0 -744
  419. package/mcp-server/tsconfig.json +0 -19
  420. package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +0 -313
  421. package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +0 -277
  422. package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +0 -1234
  423. package/openclaw-alexa-bridge/test_fixes.js +0 -77
  424. package/playground/README.md +0 -51
  425. package/playground/codesandbox.json +0 -12
  426. package/playground/index.js +0 -39
  427. package/proxy/README.md +0 -227
  428. package/proxy/package-lock.json +0 -831
  429. package/proxy/package.json +0 -17
  430. package/proxy/rate-limit.js +0 -145
  431. package/proxy/rate-limit.test.js +0 -311
  432. package/proxy/server.js +0 -970
  433. package/python/README.md +0 -102
  434. package/python/a3m/__init__.py +0 -6
  435. package/python/a3m/client.py +0 -190
  436. package/python/a3m/models.py +0 -40
  437. package/python/a3m/sync_client.py +0 -61
  438. package/python/examples.py +0 -53
  439. package/python/integrations.py +0 -330
  440. package/python/pyproject.toml +0 -23
  441. package/python/setup.py +0 -28
  442. package/python/tmlpd.py +0 -369
  443. package/qna/REDDIT_GAP_ANALYSIS.md +0 -299
  444. package/qna/TMLPD_QNA.md +0 -751
  445. package/research/FINDING_001_safety.md +0 -28
  446. package/research/FINDING_002_error_diversity.md +0 -32
  447. package/research/FINDING_003_confidence_weighted_voting.md +0 -32
  448. package/research/FINDING_004_cross_model_semantic_detection.md +0 -37
  449. package/research/FINDING_005_knowledge_gap_orthogonality.md +0 -34
  450. package/research/HALLUCINATION_RESEARCH.md +0 -27
  451. package/research/ensemble-voting.md +0 -324
  452. package/research/loss-functions.md +0 -545
  453. package/research-log.md +0 -49
  454. package/scripts/banner.js +0 -29
  455. package/scripts/benchmark-local-routerarena.ts +0 -176
  456. package/scripts/benchmark.js +0 -145
  457. package/scripts/benchmark.sh +0 -61
  458. package/scripts/compare-providers.sh +0 -230
  459. package/scripts/content-planner.js +0 -25
  460. package/scripts/create-labeled-benchmark.ts +0 -105
  461. package/scripts/cross_post.py +0 -443
  462. package/scripts/local-router-benchmark.ts +0 -154
  463. package/scripts/post-all.sh +0 -41
  464. package/scripts/publish_fcc.py +0 -106
  465. package/scripts/push-to-gitee.sh +0 -25
  466. package/scripts/routerarena_ensemble.js +0 -144
  467. package/scripts/routing-benchmark-v2.js +0 -373
  468. package/scripts/routing-benchmark-v3.js +0 -118
  469. package/scripts/routing-benchmark.js +0 -462
  470. package/scripts/run-labeled-benchmark.mjs +0 -104
  471. package/scripts/run-mmlu-benchmark.js +0 -176
  472. package/scripts/run-provider-benchmark.js +0 -244
  473. package/scripts/update-npm-badges.js +0 -158
  474. package/skill/SKILL.md +0 -238
  475. package/src/__tests__/integration/tmpld_integration.test.py +0 -540
  476. package/src/skills/__tests__/skill_manager.test.ts +0 -328
  477. package/submissions/benchmarks/ALL_PLATFORMS_SUBMISSION.md +0 -94
  478. package/submissions/benchmarks/LLMROUTERBENCH_SUBMISSION.md +0 -121
  479. package/submissions/benchmarks/MMRBENCH_SUBMISSION.md +0 -94
  480. package/submissions/benchmarks/ROUTERARENA_UPDATE.md +0 -83
  481. package/submissions/benchmarks/ROUTERBENCH_SUBMISSION.md +0 -225
  482. package/test-council/1-structure-tests.test.js +0 -353
  483. package/test-council/1-structure-tests.test.ts +0 -353
  484. package/test-council/2-edge-case-tests.test.ts +0 -361
  485. package/test-council/3-performance-tests.test.ts +0 -669
  486. package/test-council/4-integration-tests.test.ts +0 -391
  487. package/test-council/5-agent-council-eval.test.ts +0 -413
  488. package/test-council/AGENT_COUNCIL_ARCHITECTURE.md +0 -349
  489. package/test-council/TEST_COUNCIL_REPORT.md +0 -201
  490. package/test-council/agents/edge-case-agent.ts +0 -363
  491. package/test-council/agents/performance-agent.ts +0 -426
  492. package/test-council/agents/structure-agent.ts +0 -227
  493. package/test-council/council.md +0 -183
  494. package/tests/__mocks__/tokenUtils.ts +0 -8
  495. package/tests/memory/episodicMemory.test.ts +0 -227
  496. package/tests/package-lock.json +0 -1628
  497. package/tests/package.json +0 -18
  498. package/tests/routing/ensembleVoting.test.ts +0 -236
  499. package/tests/routing/providerRetry.test.ts +0 -360
  500. package/tests/routing/queryTypePresets.test.ts +0 -208
  501. package/tests/security/guardrailEngine.test.ts +0 -700
  502. package/tests/tsconfig.json +0 -21
  503. package/tests/vitest.config.ts +0 -18
  504. package/tmlpd-pi-extension/README.md +0 -66
  505. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +0 -114
  506. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +0 -1
  507. package/tmlpd-pi-extension/dist/cache/prefixCache.js +0 -285
  508. package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +0 -1
  509. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +0 -58
  510. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +0 -1
  511. package/tmlpd-pi-extension/dist/cache/responseCache.js +0 -153
  512. package/tmlpd-pi-extension/dist/cache/responseCache.js.map +0 -1
  513. package/tmlpd-pi-extension/dist/cli.js +0 -59
  514. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +0 -95
  515. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +0 -1
  516. package/tmlpd-pi-extension/dist/cost/costTracker.js +0 -240
  517. package/tmlpd-pi-extension/dist/cost/costTracker.js.map +0 -1
  518. package/tmlpd-pi-extension/dist/index.d.ts +0 -723
  519. package/tmlpd-pi-extension/dist/index.d.ts.map +0 -1
  520. package/tmlpd-pi-extension/dist/index.js +0 -239
  521. package/tmlpd-pi-extension/dist/index.js.map +0 -1
  522. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +0 -82
  523. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +0 -1
  524. package/tmlpd-pi-extension/dist/memory/episodicMemory.js +0 -145
  525. package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +0 -1
  526. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +0 -102
  527. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +0 -1
  528. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +0 -207
  529. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +0 -1
  530. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +0 -85
  531. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +0 -1
  532. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +0 -210
  533. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +0 -1
  534. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +0 -102
  535. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +0 -1
  536. package/tmlpd-pi-extension/dist/providers/localProvider.js +0 -338
  537. package/tmlpd-pi-extension/dist/providers/localProvider.js.map +0 -1
  538. package/tmlpd-pi-extension/dist/providers/registry.d.ts +0 -55
  539. package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +0 -1
  540. package/tmlpd-pi-extension/dist/providers/registry.js +0 -138
  541. package/tmlpd-pi-extension/dist/providers/registry.js.map +0 -1
  542. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +0 -68
  543. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +0 -1
  544. package/tmlpd-pi-extension/dist/routing/advancedRouter.js +0 -332
  545. package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +0 -1
  546. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +0 -101
  547. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +0 -1
  548. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +0 -368
  549. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +0 -1
  550. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +0 -96
  551. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +0 -1
  552. package/tmlpd-pi-extension/dist/utils/batchProcessor.js +0 -170
  553. package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +0 -1
  554. package/tmlpd-pi-extension/dist/utils/compression.d.ts +0 -61
  555. package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +0 -1
  556. package/tmlpd-pi-extension/dist/utils/compression.js +0 -281
  557. package/tmlpd-pi-extension/dist/utils/compression.js.map +0 -1
  558. package/tmlpd-pi-extension/dist/utils/reliability.d.ts +0 -74
  559. package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +0 -1
  560. package/tmlpd-pi-extension/dist/utils/reliability.js +0 -177
  561. package/tmlpd-pi-extension/dist/utils/reliability.js.map +0 -1
  562. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +0 -117
  563. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +0 -1
  564. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +0 -246
  565. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +0 -1
  566. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +0 -50
  567. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +0 -1
  568. package/tmlpd-pi-extension/dist/utils/tokenUtils.js +0 -124
  569. package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +0 -1
  570. package/tmlpd-pi-extension/examples/QUICKSTART.md +0 -183
  571. package/tmlpd-pi-extension/package-lock.json +0 -79
  572. package/tmlpd-pi-extension/package.json +0 -172
  573. package/tmlpd-pi-extension/python/examples.py +0 -53
  574. package/tmlpd-pi-extension/python/integrations.py +0 -330
  575. package/tmlpd-pi-extension/python/setup.py +0 -28
  576. package/tmlpd-pi-extension/python/tmlpd.py +0 -369
  577. package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +0 -299
  578. package/tmlpd-pi-extension/qna/TMLPD_QNA.md +0 -751
  579. package/tmlpd-pi-extension/skill/SKILL.md +0 -238
  580. package/tmlpd-pi-extension/src/cache/responseCache.ts +0 -147
  581. package/tmlpd-pi-extension/src/cost/costTracker.ts +0 -302
  582. package/tmlpd-pi-extension/src/index.ts +0 -232
  583. package/tmlpd-pi-extension/src/memory/episodicMemory.ts +0 -257
  584. package/tmlpd-pi-extension/src/orchestration/haloOrchestrator.ts +0 -266
  585. package/tmlpd-pi-extension/src/orchestration/mctsWorkflow.ts +0 -262
  586. package/tmlpd-pi-extension/src/providers/localProvider.ts +0 -406
  587. package/tmlpd-pi-extension/src/providers/registry.ts +0 -164
  588. package/tmlpd-pi-extension/src/routing/ensembleVoting.ts +0 -159
  589. package/tmlpd-pi-extension/src/routing/queryTypePresets.ts +0 -136
  590. package/tmlpd-pi-extension/src/tools/tmlpdTools.ts +0 -433
  591. package/tmlpd-pi-extension/src/utils/batchProcessor.ts +0 -232
  592. package/tmlpd-pi-extension/src/utils/compression.ts +0 -325
  593. package/tmlpd-pi-extension/src/utils/reliability.ts +0 -221
  594. package/tmlpd-pi-extension/src/utils/tokenUtils.ts +0 -145
  595. package/tmlpd-pi-extension/tsconfig.json +0 -18
  596. package/tsconfig.build.json +0 -29
  597. package/tsconfig.json +0 -18
  598. /package/{docs/llms-full.txt → llms-full.txt.bak} +0 -0
@@ -1,545 +0,0 @@
1
- # Loss Functions for LLM Routing Optimization
2
-
3
- **Date:** 2026-06-03
4
- **Author:** A3M Research
5
- **Target:** Improve RouterArena score from 70.32
6
-
7
- ---
8
-
9
- ## 1. Current A3M Cost Model Analysis
10
-
11
- ### 1.1 Existing Routing Logic
12
-
13
- A3M's routing uses a **weighted scoring formula**:
14
-
15
- ```typescript
16
- // From src/routing/advancedRouter.ts
17
-
18
- // Quality score (static, heuristic-based)
19
- quality_score: strengths.includes('premium') ? 0.95 :
20
- strengths.includes('reasoning') ? 0.90 :
21
- strengths.includes('fast') ? 0.82 : 0.80
22
-
23
- // Cost efficiency (linear penalty)
24
- costEfficiency(model, features) = (1 - avg_cost / 10) * 0.2-0.6
25
-
26
- // Final score
27
- total_score = quality_score * complexity_bias + cost_score * (1 - complexity_bias)
28
-
29
- // Online learning (EMA)
30
- quality_score = quality_score * (1 - alpha) + actual_rating * alpha
31
- ```
32
-
33
- ### 1.2 Current Score Calculation
34
-
35
- ```typescript
36
- // Lines 302-340 in advancedRouter.ts
37
- let score = model.quality_score * 0.6; // Base quality weight
38
-
39
- // Domain bonus (+0.2)
40
- if (features.domain && model.strengths.includes(domainBonus[domain]))
41
- score += 0.2;
42
-
43
- // Code bonus (+0.15)
44
- if (features.has_code && model.strengths.includes('coding'))
45
- score += 0.15;
46
-
47
- // Free tier preference (+0.2)
48
- if (features.complexity < 0.5 && model.strengths.includes('free'))
49
- score += 0.2;
50
- ```
51
-
52
- ### 1.3 Issues with Current Approach
53
-
54
- | Issue | Impact | Severity |
55
- |-------|--------|----------|
56
- | **No learned embeddings** | Keyword matching can't capture semantic similarity | High |
57
- | **No contrastive loss** | Can't distinguish "similar but different" queries | Medium |
58
- | **Static quality scores** | Provider quality varies by query type | High |
59
- | **Linear cost penalty** | Doesn't model diminishing returns | Medium |
60
- | **No latency in loss** | RouterArena penalizes slow routing | High |
61
- | **Single-objective** | No Pareto-optimal exploration | Medium |
62
-
63
- ---
64
-
65
- ## 2. Literature Review
66
-
67
- ### 2.1 RouteLLM (arXiv:2404.06035)
68
-
69
- **Key Insight:** Learned routing from pairwise preferences.
70
-
71
- **Architecture:**
72
- - BERT classifier on query embeddings
73
- - Trained on weak vs strong model comparisons
74
- - Binary preference: "Which model gives better answer?"
75
-
76
- **Loss Function:**
77
- ```
78
- L = CrossEntropy(softmax(W * [q; m_w; m_s]), preference_label)
79
- ```
80
-
81
- Where `q` = query embedding, `m_w` = winner model embedding, `m_s` = strong model embedding.
82
-
83
- **Results:**
84
- - 85% routing accuracy (exact tier match)
85
- - 70% cost savings vs all-premium
86
-
87
- **Relevance to A3M:** RouteLLM's pairwise training is what enables learned routing. A3M's rule-based approach gets 70.32 (vs 85% exact), but could benefit from hybrid training.
88
-
89
- ### 2.2 RouterArena Benchmark (arXiv:2510.00202)
90
-
91
- **Scoring Formula:**
92
- ```
93
- RouterArena_Score = 0.6 * Accuracy + 0.2 * Cost_Efficiency + 0.2 * Latency_Score
94
-
95
- where:
96
- Accuracy = % queries routed to correct tier (exact or ±1)
97
- Cost_Efficiency = 1 - (router_cost / baseline_cost)
98
- Latency_Score = 1 - (router_latency / max_latency)
99
- ```
100
-
101
- **Key Finding:** A3M scores 70.32 with heuristic routing. RouteLLM scores 48.07 with learned routing. **Heuristic can beat learned when cost matters.**
102
-
103
- **Relevance to A3M:** The scoring weights (60% accuracy, 20% cost, 20% latency) directly inform our loss function design.
104
-
105
- ### 2.3 LLMRouterBench
106
-
107
- **Dataset:** 400K+ query-model pairs across 9 domains
108
- **Task:** 4-tier classification (free → budget → mid → premium)
109
- **Baseline:** TF-IDF + Logistic Regression = 62.3%
110
- **State-of-art:** Learned embeddings + neural classifier = 78.1%
111
-
112
- **Loss Function Pattern:**
113
- ```
114
- L = CrossEntropy(router(query), true_tier)
115
- + λ * L2_regularization
116
- + λ * cost_penalty
117
- ```
118
-
119
- **Relevance to A3M:** Could incorporate tier classification loss into A3M's multi-signal classifier.
120
-
121
- ### 2.4 Contrastive Learning for Routing
122
-
123
- **Paper:** SimCSE, MoCo, CLIP-style approaches
124
-
125
- **Idea:** Embed queries and model capabilities in same space.
126
-
127
- **Loss:**
128
- ```
129
- L_contrastive = -log(exp(sim(q, m_pos)) / Σ exp(sim(q, m_neg)))
130
- ```
131
-
132
- Where `sim` = cosine similarity, `m_pos` = correct model, `m_neg` = incorrect models.
133
-
134
- **Relevance to A3M:** A3M's current approach uses keyword matching. Contrastive learning could improve query embedding quality without full BERT classifier.
135
-
136
- ### 2.5 Multi-Objective Optimization for Routing
137
-
138
- **Problem:** Quality, cost, latency are conflicting objectives.
139
-
140
- **Approaches:**
141
- 1. **Weighted Sum:** `L = w1*Q + w2*(-C) + w3*(-L)` — simple but requires tuning
142
- 2. **Pareto Front:** Find non-dominated solutions — expensive
143
- 3. **Scalarization:** `L = Π (Q^α * C^β * L^γ)` — smooth tradeoffs
144
-
145
- **Recommended for A3M:** Weighted sum with dynamic weights based on query type.
146
-
147
- ---
148
-
149
- ## 3. Recommended Loss Function for A3M
150
-
151
- ### 3.1 Proposed Architecture: Hybrid Routing Loss
152
-
153
- ```
154
- L_total = α * L_tier + β * L_cost + γ * L_latency + δ * L_contrastive
155
- ```
156
-
157
- Where:
158
- - `L_tier` = Cross-entropy for tier classification
159
- - `L_cost` = Cost-aware margin loss
160
- - `L_latency` = Latency regression loss
161
- - `L_contrastive` = Contrastive query-model alignment
162
-
163
- ### 3.2 Component Details
164
-
165
- #### Tier Classification Loss (L_tier)
166
-
167
- ```python
168
- def tier_loss(logits, true_tier):
169
- """
170
- logits: [batch_size, 4] - raw scores for free/budget/mid/premium
171
- true_tier: [batch_size] - ground truth tier (0-3)
172
-
173
- Standard cross-entropy with class weights
174
- """
175
- weights = torch.tensor([1.0, 1.5, 2.0, 3.0]) # Premium is rarest
176
- return F.cross_entropy(logits, true_tier, weight=weights)
177
- ```
178
-
179
- #### Cost-Aware Margin Loss (L_cost)
180
-
181
- ```python
182
- def cost_margin_loss(scores, chosen_cost, best_cost, margin=0.1):
183
- """
184
- scores: routing scores for each model
185
- chosen_cost: cost of selected model
186
- best_cost: cost of optimal model
187
-
188
- Penalize choosing expensive models when cheaper options exist
189
- """
190
- cost_ratio = chosen_cost / (best_cost + 1e-6)
191
-
192
- # If cost ratio > 1.5, penalize heavily
193
- if cost_ratio > 1.5:
194
- return margin * (cost_ratio - 1.5) ** 2
195
- return 0.0
196
- ```
197
-
198
- #### Latency Regression Loss (L_latency)
199
-
200
- ```python
201
- def latency_loss(predicted_latency, actual_latency):
202
- """
203
- penalize high latency predictions
204
-
205
- Using log-scale to handle wide latency range (50ms - 10s)
206
- """
207
- return F.mse_loss(
208
- torch.log1p(predicted_latency),
209
- torch.log1p(actual_latency)
210
- )
211
- ```
212
-
213
- #### Contrastive Alignment Loss (L_contrastive)
214
-
215
- ```python
216
- def contrastive_loss(query_emb, model_emb, labels, temperature=0.1):
217
- """
218
- query_emb: [batch_size, dim] - query embeddings
219
- model_emb: [num_models, dim] - model capability embeddings
220
- labels: [batch_size] - ground truth model index
221
-
222
- InfoNCE loss: queries should be close to their correct model embeddings
223
- """
224
- # Normalize embeddings
225
- query_emb = F.normalize(query_emb, dim=-1)
226
- model_emb = F.normalize(model_emb, dim=-1)
227
-
228
- # Compute similarities
229
- sim = torch.matmul(query_emb, model_emb.T) / temperature
230
-
231
- # Positive pairs (correct model)
232
- loss = F.cross_entropy(sim, labels)
233
-
234
- return loss
235
- ```
236
-
237
- ### 3.3 Combined Loss Implementation
238
-
239
- ```python
240
- class RoutingLoss(nn.Module):
241
- def __init__(self, weights=(0.5, 0.2, 0.1, 0.2)):
242
- super().__init__()
243
- self.w_tier = weights[0]
244
- self.w_cost = weights[1]
245
- self.w_latency = weights[2]
246
- self.w_contrastive = weights[3]
247
-
248
- # Learnable temperature for contrastive loss
249
- self.temperature = nn.Parameter(torch.ones(1))
250
-
251
- def forward(self,
252
- tier_logits, tier_targets, # Tier classification
253
- chosen_costs, optimal_costs, # Cost efficiency
254
- pred_latencies, actual_latencies, # Latency
255
- query_emb, model_emb, emb_labels, # Contrastive
256
- cost_weight=0.3): # Dynamic weight
257
-
258
- # Normalize weights by cost_weight (high cost sensitivity → high β)
259
- if cost_weight > 0.5:
260
- self.w_cost = cost_weight
261
- self.w_tier = 1 - cost_weight
262
-
263
- L_tier = tier_loss(tier_logits, tier_targets)
264
- L_cost = cost_margin_loss(chosen_costs, optimal_costs)
265
- L_lat = latency_loss(pred_latencies, actual_latencies)
266
- L_contra = contrastive_loss(query_emb, model_emb, emb_labels, self.temperature)
267
-
268
- return (self.w_tier * L_tier +
269
- self.w_cost * L_cost +
270
- self.w_lat * L_lat +
271
- self.w_contrastive * L_contra)
272
- ```
273
-
274
- ---
275
-
276
- ## 4. Implementation Approach for A3M
277
-
278
- ### 4.1 Phase 1: Embedding-Based Query Representation
279
-
280
- **Problem:** A3M currently uses keyword matching (12 signals, 5 dimensions).
281
-
282
- **Solution:** Add lightweight embeddings (no GPU required).
283
-
284
- ```typescript
285
- // src/routing/queryEmbedder.ts
286
-
287
- import { pipeline } from '@xenova/transformers';
288
-
289
- let embedder: any = null;
290
-
291
- export async function getQueryEmbedding(query: string): Promise<Float32Array> {
292
- if (!embedder) {
293
- // Use sentence-transformers (onnx, CPU-friendly)
294
- embedder = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
295
- }
296
- return await embedder(query, { pooling: 'mean', normalize: true });
297
- }
298
-
299
- // Cached for speed
300
- const embeddingCache = new LRUCache<string, Float32Array>(10000);
301
- ```
302
-
303
- **Why:** MiniLM-L6-v2 is 22MB, CPU-fast, captures semantic similarity.
304
-
305
- ### 4.2 Phase 2: Cost-Aware Scoring
306
-
307
- **Current:** Linear penalty `(1 - cost/10) * weight`
308
-
309
- **Proposed:** Log-scale penalty + diminishing returns
310
-
311
- ```typescript
312
- // src/routing/costAwareScoring.ts
313
-
314
- export function costAwareScore(
315
- quality: number,
316
- cost_per_1k: number,
317
- complexity: number
318
- ): number {
319
- // Log-scale cost penalty (more realistic)
320
- const logCostPenalty = Math.log1p(cost_per_1k) / Math.log1p(10);
321
-
322
- // Complexity determines cost sensitivity
323
- // Simple queries: cost matters more (bias toward cheap)
324
- // Complex queries: quality matters more (bias toward better)
325
- const costSensitivity = 1 - complexity;
326
-
327
- // Quality should saturate (90% vs 95% is small difference)
328
- const qualitySigmoid = 1 / (1 + Math.exp(-10 * (quality - 0.8)));
329
-
330
- return (
331
- 0.6 * qualitySigmoid +
332
- 0.3 * (1 - logCostPenalty) * costSensitivity +
333
- 0.1 * (1 - costSensitivity) // Latency proxy
334
- );
335
- }
336
- ```
337
-
338
- ### 4.3 Phase 3: Contrastive Fine-Tuning (Optional)
339
-
340
- **For maximum RouterArena score improvement:**
341
-
342
- ```python
343
- # scripts/fine_tune_routing.py
344
-
345
- from sentence_transformers import SentenceTransformer, InputExample, losses
346
- from torch import nn
347
-
348
- # 1. Create training data from A3M's existing benchmark
349
- # Query → (chosen_model, cost, quality_rating) → (positive, negative) pairs
350
-
351
- def create_contrastive_examples(benchmark_data):
352
- examples = []
353
- for query in benchmark_data:
354
- for candidate in query.candidates:
355
- if candidate.chosen:
356
- pos = candidate.model_id
357
- else:
358
- neg = candidate.model_id
359
-
360
- examples.append(InputExample(
361
- texts=[query.text, pos, neg],
362
- label=1.0 if candidate.chosen else 0.0
363
- ))
364
- return examples
365
-
366
- # 2. Fine-tune embeddings
367
- model = SentenceTransformer('Xenova/all-MiniLM-L6-v2')
368
- train_loss = losses.ContrastiveLoss(model)
369
-
370
- model.fit(
371
- train_objectives=[(train_examples, train_loss)],
372
- epochs=5,
373
- warmup_steps=100
374
- )
375
-
376
- # 3. Export for A3M
377
- model.save('models/routing-embeddings')
378
- ```
379
-
380
- ### 4.4 Phase 4: Online Learning Enhancement
381
-
382
- **Current:** EMA on `quality_score` (α=0.2)
383
-
384
- **Proposed:** Contextual bandit updates
385
-
386
- ```typescript
387
- // src/routing/contextualBandit.ts
388
-
389
- interface RoutingFeedback {
390
- query: string;
391
- chosen_model: string;
392
- reward: number; // Computed from quality/cost/latency
393
-
394
- // Components
395
- quality_rating: number; // User feedback or cross-validation
396
- actual_cost: number;
397
- actual_latency: number;
398
- response_correct: boolean;
399
- }
400
-
401
- export function updateWithFeedback(feedback: RoutingFeedback): void {
402
- // Compute multi-objective reward
403
- const reward = computeReward(
404
- feedback.quality_rating,
405
- feedback.actual_cost,
406
- feedback.actual_latency,
407
- feedback.response_correct
408
- );
409
-
410
- // Thompson sampling for model selection
411
- const models = getAvailableModels();
412
-
413
- for (const model of models) {
414
- // Update posterior: Beta distribution per (query_type, model)
415
- const key = getQueryType(feedback.query) + ':' + model;
416
- const posterior = modelPosteriors[key];
417
-
418
- // Add reward observation
419
- if (reward > 0.5) {
420
- posterior.alpha += 1; // Success
421
- } else {
422
- posterior.beta += 1; // Failure
423
- }
424
- }
425
- }
426
-
427
- function computeReward(quality, cost, latency, correct): number {
428
- // Normalize to [0, 1]
429
- const q_norm = quality / 5.0; // 1-5 → 0-1
430
- const c_norm = Math.max(0, 1 - Math.log1p(cost) / 5); // Cost penalty
431
- const l_norm = Math.max(0, 1 - Math.log1p(latency) / 10000); // Latency penalty
432
- const r_norm = correct ? 1.0 : 0.0; // Correctness
433
-
434
- // Weighted sum (RouterArena-style)
435
- return 0.4 * q_norm + 0.2 * c_norm + 0.1 * l_norm + 0.3 * r_norm;
436
- }
437
- ```
438
-
439
- ---
440
-
441
- ## 5. Expected Improvement
442
-
443
- ### 5.1 RouterArena Score Projection
444
-
445
- | Change | Current Score | Expected New Score | Source |
446
- |--------|---------------|-------------------|--------|
447
- | Embedding-based routing | 70.32 | 73-75 | Semantic similarity improvement |
448
- | Cost-aware loss | 70.32 | 72-74 | Better cost-quality tradeoff |
449
- | Contrastive fine-tuning | 70.32 | 75-78 | Learned query-model alignment |
450
- | All combined | 70.32 | **77-80** | End-to-end improvement |
451
-
452
- ### 5.2 Breakdown by RouterArena Component
453
-
454
- | Component | Weight | Current | With Loss Functions | Improvement |
455
- |-----------|--------|---------|-------------------|-------------|
456
- | Accuracy (±1 tier) | 60% | ~85% | ~90% | +5 pts |
457
- | Cost Efficiency | 20% | ~60% | ~75% | +15 pts |
458
- | Latency | 20% | ~70% | ~75% | +5 pts |
459
- | **Total** | 100% | **70.32** | **~76-78** | **+6-8 pts** |
460
-
461
- ### 5.3 Conservative Estimate
462
-
463
- Even without full ML training, adding:
464
- - **Log-scale cost penalty** → +2 RouterArena points
465
- - **Embedding cache** → +1 point (better semantic matching)
466
- - **Contextual bandit updates** → +2 points (faster online learning)
467
-
468
- **Conservative target: 73-74 RouterArena score**
469
-
470
- ---
471
-
472
- ## 6. Implementation Priority
473
-
474
- | Priority | Change | Complexity | Impact | Est. Time |
475
- |----------|--------|------------|--------|-----------|
476
- | P0 | Log-scale cost penalty | Low | Medium | 1 day |
477
- | P1 | Embedding cache (MiniLM) | Medium | High | 2 days |
478
- | P2 | Contextual bandit updates | Medium | High | 3 days |
479
- | P3 | Contrastive fine-tuning | High | Very High | 1 week |
480
-
481
- ---
482
-
483
- ## 7. References
484
-
485
- 1. **RouteLLM** - LMSYS/Anyscale, arXiv:2404.06035
486
- - Learned routing from pairwise preferences
487
- - BERT classifier with cross-entropy loss
488
-
489
- 2. **RouterArena** - Berkeley, arXiv:2510.00202
490
- - 8,400 queries, 19 routers evaluated
491
- - Composite scoring: accuracy (60%), cost (20%), latency (20%)
492
-
493
- 3. **LLMRouterBench** - ACL 2024
494
- - 400K+ instances, 9 domains
495
- - TF-IDF baseline: 62.3%, Neural: 78.1%
496
-
497
- 4. **Self-Consistency** - Wang et al., ICLR 2023
498
- - Multiple reasoning paths improve GSM8K by +17.9 points
499
- - Relevant to A3M's ensemble voting
500
-
501
- 5. **Deep Ensembles** - Lakshminarayanan et al., NeurIPS 2017
502
- - Confidence-weighted ensembles reduce error by 10-30%
503
- - Foundation for A3M's voting mechanism
504
-
505
- ---
506
-
507
- ## Appendix: Quick Wins
508
-
509
- ### Quick Win 1: Immediate Cost Penalty Fix
510
-
511
- In `advancedRouter.ts`, replace:
512
-
513
- ```typescript
514
- // CURRENT (linear)
515
- const avg_cost = (model.cost_per_1k_input + model.cost_per_1k_output) / 2;
516
- return (1 - Math.min(avg_cost / 10, 1)) * 0.6;
517
- ```
518
-
519
- With:
520
-
521
- ```typescript
522
- // PROPOSED (log-scale)
523
- const avg_cost = (model.cost_per_1k_input + model.cost_per_1k_output) / 2;
524
- return Math.max(0, 1 - Math.log1p(avg_cost) / Math.log1p(10)) * 0.6;
525
- ```
526
-
527
- **Effect:** Makes router less aggressive about ultra-cheap models, better cost-quality tradeoff.
528
-
529
- ### Quick Win 2: Latency in Routing Score
530
-
531
- Add latency penalty to scoring:
532
-
533
- ```typescript
534
- const latencyPenalty = Math.max(0, 1 - model.latency_ms / 10000);
535
- const qualityScore = scoreModelFit(profile, features);
536
- const costScore = costEfficiency(profile, features);
537
-
538
- return 0.5 * qualityScore + 0.3 * costScore + 0.2 * latencyPenalty;
539
- ```
540
-
541
- **Effect:** RouterArena scores improve on latency component (+2-3 points).
542
-
543
- ---
544
-
545
- *Generated: 2026-06-03 | For A3M Router v2.2+*
package/research-log.md DELETED
@@ -1,49 +0,0 @@
1
- # A3M Router Research Log
2
-
3
- ## 2026-06-03 - Test Coverage Analysis
4
-
5
- ### Research State
6
- ```
7
- Project: A3M Router Test Coverage Analysis
8
- Date: 2026-06-03
9
- Agents: 3 (Architecture, Performance, Test Coverage)
10
- Goal: Identify top 3 improvements via council vote
11
- ```
12
-
13
- ### Scope Explored
14
- - `test/` - 7 legacy JS test files (budgetEnforcer, observability, providerHealth, providerRetry, semanticCache)
15
- - `tests/` - Vitest test suite (routing/ensembleVoting, routing/providerRetry, routing/queryTypePresets, memory/episodicMemory)
16
- - `test-council/` - 5 test files (structure, edge-case, performance, integration, agent-council-eval)
17
-
18
- ### Key Source Files Analyzed
19
- - `src/ensemble.ts` - EnsembleOrchestrator (no tests)
20
- - `src/sdk.ts` - A3MRouter SDK (structure only)
21
- - `src/cost/budgetEnforcer.ts` - Budget enforcement (legacy test)
22
- - `src/analytics/costAnalytics.ts` - Cost analytics (no tests)
23
- - `src/security/guardrails.ts` - GuardrailEngine (NO TESTS - CRITICAL)
24
- - `src/observability/middleware.ts` - Express middleware (not tested)
25
- - `src/routing/crossModelValidation.ts` - Cross-model validation (not tested)
26
- - `src/observability/fatigueDetector.ts` - Fatigue detection (not tested)
27
-
28
- ### Coverage Summary
29
- | Module | Coverage | Status |
30
- |--------|----------|--------|
31
- | Routing | Partial | ensembleVoting, providerRetry, queryTypePresets |
32
- | Memory | Good | episodicMemory well tested |
33
- | Observability | Partial | Tracer, MetricsCollector tested; middleware not |
34
- | Security | NONE | GuardrailEngine untested |
35
- | Cost | Partial | budgetEnforcer legacy test; costAnalytics untested |
36
- | SDK | Structure only | No behavioral tests |
37
-
38
- ### Critical Gaps Identified
39
- 1. **GuardrailEngine** - Zero tests for security-critical code
40
- 2. **EnsembleOrchestrator** - Core P0 feature lacks integration tests
41
- 3. **CostAnalytics** - No tests for savings calculation accuracy
42
- 4. **SDK Class** - Only type checking, no behavioral tests
43
- 5. **Middleware** - Not tested
44
-
45
- ### Output
46
- Created: `council-votes/coverage-vote.md`
47
- Vote: Finding #1 (GuardrailEngine) as highest priority
48
-
49
- ---
package/scripts/banner.js DELETED
@@ -1,29 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * A3M Router — Terminal ASCII Art Banner
4
- *
5
- * Printed on CLI startup to reinforce A3M branding.
6
- * Usage:
7
- * node scripts/banner.js
8
- * // or import './banner' in CLI entry point
9
- */
10
-
11
- const A3M_BANNER = `
12
- ╔══════════════════════════════════════════════════════════╗
13
- ║ ╔═╗╔═╗╔╗╔╔═╗ ║
14
- ║ ╠═╣║ ║║║║║ ║ ║
15
- ║ ╩ ╩╚═╝╝╚╝╚═╝ ║
16
- ║ ║
17
- ║ Parallel Multi-LLM Execution Engine ║
18
- ║ ║
19
- ║ 47+ Providers · Ensemble Voting · 62% Cost Savings ║
20
- ║ ║
21
- ║ ${'\x1b[2m'}https://github.com/Das-rebel/a3m-router${'\x1b[0m'}${' '.repeat(19)}║
22
- ╚══════════════════════════════════════════════════════════╝
23
- `;
24
-
25
- module.exports = A3M_BANNER;
26
-
27
- if (require.main === module) {
28
- process.stdout.write(A3M_BANNER);
29
- }