adaptive-memory-multi-model-router 2.14.49 → 2.14.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +82 -0
- package/.env.example +303 -0
- package/.github/DISCUSSIONS_WELCOME.md +27 -0
- package/.github/DISCUSSION_TEMPLATE.yml +5 -0
- package/.github/FUNDING.yml +2 -0
- package/.github/ISSUE_TEMPLATE/bug_report.md +94 -0
- package/.github/ISSUE_TEMPLATE/config.yml +17 -0
- package/.github/ISSUE_TEMPLATE/feature_request.md +71 -0
- package/.github/PULL_REQUEST_TEMPLATE.md +71 -0
- package/.github/dependabot.yml +9 -0
- package/.github/workflows/ci.yml +263 -0
- package/.github/workflows/codeql.yml +38 -0
- package/.github/workflows/npm-publish.yml +20 -0
- package/.github/workflows/pages.yml +37 -0
- package/.github/workflows/stale.yml +54 -0
- package/.publish-tick +1 -0
- package/.well-known/ai-plugin.json +16 -0
- package/AGENT_COUNCIL_FINDINGS.md +142 -0
- package/ARCHITECTURE.md +346 -0
- package/AUDIT_REPORT.md +28 -0
- package/CODE_OF_CONDUCT.md +128 -0
- package/CONTRIBUTING.md +50 -0
- package/CONTRIBUTORS.md +20 -0
- package/Dockerfile +53 -0
- package/Dockerfile.proxy +33 -0
- package/HEALTH_REPORT.md +118 -0
- package/IMPROVEMENT_PLAN.md +107 -0
- package/LANDING.md +43 -0
- package/LAUNCH-PAIN-DRIVEN.md +339 -0
- package/LAUNCH.md +337 -0
- package/LAUNCH_CHECKLIST.md +141 -0
- package/LAUNCH_SNAPSHOT.md +260 -0
- package/MANIFESTO.md +41 -0
- package/POPULARITY_BOOSTERS.md +285 -0
- package/PR_STATUS_REPORT.md +148 -0
- package/README.md +25 -14
- package/REDESIGN.md +95 -0
- package/RUNKIT.md +83 -0
- package/SECURITY.md +29 -0
- package/SUBMISSIONS.md +43 -0
- package/_schema.html +53 -0
- package/ai-plugin.json +16 -0
- package/articles/AI_AGENT_LLM_ROUTING.md +150 -0
- package/articles/CHINESE_DIRECTORIES.md +100 -0
- package/articles/CHINESE_SUBMISSIONS_READY.md +322 -0
- package/articles/COMPETITOR_ALERTS.md +31 -0
- package/articles/COMPLETE_POSTING_DIRECTORY.md +147 -0
- package/articles/CONTENT_STRUCTURE.md +292 -0
- package/articles/DEVTO_COST_GUIDE.md +473 -0
- package/articles/DEVTO_FINAL.md +416 -0
- package/articles/DEVTO_MULTI_PROVIDER.md +542 -0
- package/articles/DEVTO_READY.md +255 -0
- package/articles/DEVTO_V2_ANNOUNCEMENT.md +160 -0
- package/articles/DEVTO_VIRAL_GROWTH.md +280 -0
- package/articles/FRESH_devto.md +460 -0
- package/articles/FRESH_devto_2026_05.md +73 -0
- package/articles/FRESH_hackernews.md +14 -0
- package/articles/FRESH_reddit_ml.md +90 -0
- package/articles/FRESH_reddit_node.md +198 -0
- package/articles/FRESH_reddit_sideproject.md +72 -0
- package/articles/FRESH_reddit_webdev.md +130 -0
- package/articles/FROM_ZERO_TO_10K.md +107 -0
- package/articles/HN_10X_BETTER.md +430 -0
- package/articles/HN_ACCOUNT_GUIDE.md +21 -0
- package/articles/HN_CHINESE_STYLE.md +308 -0
- package/articles/HN_FINAL.md +148 -0
- package/articles/HN_POSTED_VERSION.md +56 -0
- package/articles/HN_POST_READY.md +137 -0
- package/articles/HN_RESEARCH.md +364 -0
- package/articles/HN_SHOW_routerarena.md +17 -0
- package/articles/HN_TIMING_GUIDE.md +52 -0
- package/articles/INDIEHACKERS_POST.md +52 -0
- package/articles/INDIEHACKERS_READY.md +120 -0
- package/articles/LLM_BENCHMARK_DEEP_DIVE.md +153 -0
- package/articles/MASTER_POSTING_DIRECTORY.md +189 -0
- package/articles/NEWSLETTER_SEND_NOW.md +259 -0
- package/articles/NEWSLETTER_SUBMISSIONS.md +112 -0
- package/articles/PAIN-DRIVEN-devto-v2.md +308 -0
- package/articles/PAIN-DRIVEN-devto-v3.md +268 -0
- package/articles/PAIN-DRIVEN-devto.md +242 -0
- package/articles/PAIN-DRIVEN-hackernews-v2.md +138 -0
- package/articles/PAIN-DRIVEN-hackernews-v3.md +151 -0
- package/articles/PAIN-DRIVEN-hackernews.md +131 -0
- package/articles/PAIN-DRIVEN-reddit-v2.md +301 -0
- package/articles/PAIN-DRIVEN-reddit-v3.md +236 -0
- package/articles/PAIN-DRIVEN-reddit.md +218 -0
- package/articles/PAIN-DRIVEN-twitter-v2.md +110 -0
- package/articles/PAIN-DRIVEN-twitter-v3.md +121 -0
- package/articles/PAIN-DRIVEN-twitter.md +120 -0
- package/articles/PORTKEY_VS_A3M.md +147 -0
- package/articles/POSTING_KIT_2026_05.md +67 -0
- package/articles/PRESS_KIT_routerarena.md +77 -0
- package/articles/PRODUCTHUNT_LISTING.md +48 -0
- package/articles/PRODUCTHUNT_READY.md +106 -0
- package/articles/PR_PLAN_vault.md +125 -0
- package/articles/REDDIT_FINAL.md +232 -0
- package/articles/REDDIT_POST.md +67 -0
- package/articles/REDDIT_SUBMISSION_READY.md +348 -0
- package/articles/ROUTERARENA_9677.md +78 -0
- package/articles/ROUTERARENA_LEADER.md +45 -0
- package/articles/SHOW_HN_FINAL.md +29 -0
- package/articles/TWEETS_10K_DOWNLOADS.md +47 -0
- package/articles/TWEETS_BENCHMARK_FIRST.md +46 -0
- package/articles/TWEETS_MCP_PLAY.md +51 -0
- package/articles/TWEETS_SEQUENTIAL_BROKEN.md +49 -0
- package/articles/TWEETS_WHY_BUILD.md +54 -0
- package/articles/TWEETS_routerarena_leader.md +53 -0
- package/articles/TWEET_STORM_READY.md +165 -0
- package/articles/TWITTER_FINAL.md +167 -0
- package/articles/WHY_10X_BETTER.md +261 -0
- package/articles/WHY_CHINESE_STYLE_BETTER.md +323 -0
- package/articles/ai-discoverability-llm-routing.md +210 -0
- package/articles/devto-llm-routing.md +138 -0
- package/articles/hackernews-show-hn.md +54 -0
- package/articles/hashnode-llm-cost-optimization.md +125 -0
- package/articles/hn_show_2026_05.md +11 -0
- package/articles/medium-building-llm-router.md +205 -0
- package/articles/reddit-ml.md +76 -0
- package/articles/twitter-thread-cost-savings.md +50 -0
- package/articles/youtube-tutorial-script.md +262 -0
- package/assets/a3m_3blue1brown.mp4 +0 -0
- package/assets/banner.svg +109 -0
- package/assets/chart-cost-v2.svg +91 -0
- package/assets/chart-cost-v3.svg +143 -0
- package/assets/chart-features-v2.svg +132 -0
- package/assets/chart-features-v3.svg +211 -0
- package/assets/chart-growth-v2.svg +122 -0
- package/assets/chart-growth-v3.svg +189 -0
- package/assets/cost-comparison.svg +134 -0
- package/assets/cost-simple.svg +64 -0
- package/assets/demo-hn.gif +0 -0
- package/assets/feature-matrix.svg +136 -0
- package/assets/growth-chart-animated.svg +76 -0
- package/assets/growth-chart.svg +82 -0
- package/assets/growth-simple.svg +69 -0
- package/assets/hero-diagram.svg +81 -0
- package/assets/logo-new.svg +21 -0
- package/assets/logo.svg +68 -0
- package/assets/provider-comparison.svg +121 -0
- package/assets/social-preview-new.svg +100 -0
- package/assets/social-preview.svg +194 -0
- package/assets/social-v2.svg +130 -0
- package/assets/social-v3.svg +212 -0
- package/benchmark-provider-results.json +245 -0
- package/benchmark-results.json +54 -0
- package/council-votes/architecture-vote.md +121 -0
- package/council-votes/coverage-vote.md +93 -0
- package/data/adaptive-benchmark.json +92 -0
- package/data/benchmark-results.json +47 -0
- package/data/labeled-benchmark.json +88 -0
- package/demo/3blue1brown_video.py +285 -0
- package/demo/3blue1brown_video_v2.py +310 -0
- package/demo/IMPROVED_PROMPTS.md +229 -0
- package/demo/VEO3_PROMPTS.md +269 -0
- package/demo/VIDEO_PRODUCTION_GUIDE.md +333 -0
- package/demo/a3m_3blue1brown.mp4 +0 -0
- package/demo/asciinema-demo.sh +195 -0
- package/demo/demo-hn.tape +74 -0
- package/demo/demo-script.md +53 -0
- package/demo/demo-script.sh +62 -0
- package/demo/demo.svg +75 -0
- package/demo/frame1_ai_data_center.png +0 -0
- package/demo/frame1_sunset_video.mp4 +0 -0
- package/demo/frame2_cost_comparison.png +0 -0
- package/demo/frame2_cost_comparison_fallback.png +0 -0
- package/demo/frame3_parallel_execution.png +0 -0
- package/demo/frame3_parallel_execution_fallback.png +0 -0
- package/demo/frame4_providers.png +0 -0
- package/demo/frame4_providers_fallback.png +0 -0
- package/demo/frame5_endcard.png +0 -0
- package/demo/frame5_endcard_fallback.png +0 -0
- package/demo/new_frame1_hook.png +0 -0
- package/demo/new_frame2_proof.png +0 -0
- package/demo/new_frame3_wow.png +0 -0
- package/demo/new_frame4_social.png +0 -0
- package/demo/new_frame5_cta.png +0 -0
- package/demo/package.json +13 -0
- package/demo/product-video-final.mp4 +0 -0
- package/demo/product-video-hype-v1.mp4 +0 -0
- package/demo/product-video-v1.mp4 +0 -0
- package/demo/public/index.html +762 -0
- package/demo/recording.cast +55 -0
- package/demo/server.js +405 -0
- package/demo-new.tape +71 -0
- package/demo-real.sh +198 -0
- package/demo-simple.tape +205 -0
- package/demo.html +520 -0
- package/demo.sh +85 -0
- package/demo.tape +259 -0
- package/dist/analytics/costAnalytics.d.ts.map +1 -0
- package/dist/analytics/costAnalytics.js.map +1 -0
- package/dist/benchmark/comprehensive.js.map +1 -0
- package/dist/benchmark/reproducible.d.ts.map +1 -0
- package/dist/benchmark/reproducible.js.map +1 -0
- package/dist/cache/prefixCache.d.ts.map +1 -0
- package/dist/cache/prefixCache.js.map +1 -0
- package/dist/cache/responseCache.d.ts.map +1 -0
- package/dist/cache/responseCache.js.map +1 -0
- package/dist/cache/semanticCache.d.ts.map +1 -0
- package/dist/cache/semanticCache.js.map +1 -0
- package/dist/cli/setupWizard.d.ts.map +1 -0
- package/dist/cli/setupWizard.js.map +1 -0
- package/dist/cost/budgetEnforcer.d.ts.map +1 -0
- package/dist/cost/budgetEnforcer.js.map +1 -0
- package/dist/cost/costTracker.d.ts.map +1 -0
- package/dist/cost/costTracker.js.map +1 -0
- package/dist/ensemble/multiRoundDialog.js.map +1 -0
- package/dist/ensemble/shapleyValue.js.map +1 -0
- package/dist/integrations/langchainAdapter.d.ts.map +1 -0
- package/dist/integrations/langchainAdapter.js.map +1 -0
- package/dist/integrations/oauth.d.ts.map +1 -0
- package/dist/integrations/oauth.js.map +1 -0
- package/dist/integrations/scienceAdapter.js.map +1 -0
- package/dist/memory/autoFetch.d.ts.map +1 -0
- package/dist/memory/autoFetch.js.map +1 -0
- package/dist/memory/episodicMemory.d.ts.map +1 -0
- package/dist/memory/episodicMemory.js.map +1 -0
- package/dist/memory/hybridMemory.js.map +1 -0
- package/dist/memory/memoryTree.d.ts.map +1 -0
- package/dist/memory/memoryTree.js.map +1 -0
- package/dist/memory/obsidianVault.d.ts.map +1 -0
- package/dist/memory/obsidianVault.js.map +1 -0
- package/dist/memory/reasoningBank.js.map +1 -0
- package/dist/observability/changeWatch.d.ts.map +1 -0
- package/dist/observability/changeWatch.js.map +1 -0
- package/dist/observability/fatigueDetector.d.ts.map +1 -0
- package/dist/observability/fatigueDetector.js.map +1 -0
- package/dist/observability/index.d.ts.map +1 -0
- package/dist/observability/index.js.map +1 -0
- package/dist/observability/metrics.d.ts.map +1 -0
- package/dist/observability/metrics.js.map +1 -0
- package/dist/observability/middleware.d.ts.map +1 -0
- package/dist/observability/middleware.js.map +1 -0
- package/dist/observability/tracer.d.ts.map +1 -0
- package/dist/observability/tracer.js.map +1 -0
- package/dist/observability/types.d.ts.map +1 -0
- package/dist/observability/types.js.map +1 -0
- package/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
- package/dist/orchestration/haloOrchestrator.js.map +1 -0
- package/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
- package/dist/orchestration/mctsWorkflow.js.map +1 -0
- package/dist/providers/localProvider.d.ts.map +1 -0
- package/dist/providers/localProvider.js.map +1 -0
- package/dist/providers/providerConfig.d.ts.map +1 -0
- package/dist/providers/providerConfig.js.map +1 -0
- package/dist/providers/registry.d.ts.map +1 -0
- package/dist/providers/registry.js.map +1 -0
- package/dist/routing/advancedRouter.d.ts.map +1 -0
- package/dist/routing/advancedRouter.js +1 -1
- package/dist/routing/advancedRouter.js.map +1 -0
- package/dist/routing/crossModelValidation.d.ts.map +1 -0
- package/dist/routing/crossModelValidation.js.map +1 -0
- package/dist/routing/providerHealth.d.ts.map +1 -0
- package/dist/routing/providerHealth.js.map +1 -0
- package/dist/routing/providerRetry.d.ts.map +1 -0
- package/dist/routing/providerRetry.js.map +1 -0
- package/dist/scripts/banner.js +29 -0
- package/dist/security/guardrails.d.ts.map +1 -0
- package/dist/security/guardrails.js.map +1 -0
- package/dist/server/dashboard.d.ts.map +1 -0
- package/dist/server/dashboard.js.map +1 -0
- package/dist/server/modelMapper.d.ts.map +1 -0
- package/dist/server/modelMapper.js.map +1 -0
- package/dist/server/proxyServer.d.ts.map +1 -0
- package/dist/server/proxyServer.js.map +1 -0
- package/dist/skills/__tests__/skill_manager.test.d.ts +2 -0
- package/dist/skills/__tests__/skill_manager.test.d.ts.map +1 -0
- package/dist/skills/__tests__/skill_manager.test.js +268 -0
- package/dist/skills/__tests__/skill_manager.test.js.map +1 -0
- package/dist/tools/tmlpdTools.d.ts.map +1 -0
- package/dist/tools/tmlpdTools.js.map +1 -0
- package/dist/tui/dashboard.d.ts.map +1 -0
- package/dist/tui/dashboard.js.map +1 -0
- package/dist/tui/index.d.ts.map +1 -0
- package/dist/tui/index.js.map +1 -0
- package/dist/utils/batchProcessor.d.ts.map +1 -0
- package/dist/utils/batchProcessor.js.map +1 -0
- package/dist/utils/compression.d.ts.map +1 -0
- package/dist/utils/compression.js.map +1 -0
- package/dist/utils/costUtils.d.ts.map +1 -0
- package/dist/utils/costUtils.js.map +1 -0
- package/dist/utils/reliability.d.ts.map +1 -0
- package/dist/utils/reliability.js.map +1 -0
- package/dist/utils/sorting.d.ts.map +1 -0
- package/dist/utils/sorting.js.map +1 -0
- package/dist/utils/speculativeDecoding.d.ts.map +1 -0
- package/dist/utils/speculativeDecoding.js.map +1 -0
- package/dist/utils/tokenUtils.d.ts.map +1 -0
- package/dist/utils/tokenUtils.js.map +1 -0
- package/docs/.nojekyll +0 -0
- package/docs/ANALYSIS_PRINCIPLES.md +162 -0
- package/docs/API.md +855 -0
- package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
- package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
- package/docs/BENCHMARK.md +170 -0
- package/docs/CHINESE_PROVIDER_RELIABILITY.md +37 -0
- package/docs/CITATIONS.md +74 -0
- package/docs/CLAIMS_AND_EVIDENCE.md +58 -0
- package/docs/CONFIGURATION.md +476 -0
- package/docs/COUNCIL_DECISION.json +816 -0
- package/docs/COUNCIL_SUMMARY.md +319 -0
- package/docs/COUNCIL_V2.2_DECISION.md +416 -0
- package/docs/ENGINEERING_SPEC.md +55 -0
- package/docs/FACTORY_RESET.md +34 -0
- package/docs/GEO.md +66 -0
- package/docs/GEO_OPTIMIZATION.md +30 -0
- package/docs/GEO_ROOT_CAUSE.md +136 -0
- package/docs/GEO_STATUS.md +85 -0
- package/docs/GEO_TEST_RESULTS.md +176 -0
- package/docs/HN_CHECKLIST.md +38 -0
- package/docs/HN_FOUNDER_COMMENT.md +17 -0
- package/docs/HN_SUBMISSION_FINAL.md +180 -0
- package/docs/HN_SUBMISSION_V3.md +56 -0
- package/docs/IMPROVEMENT_ROADMAP.md +515 -0
- package/docs/INTEGRATIONS.md +420 -0
- package/docs/LANGCHAIN_INTEGRATION.md +147 -0
- package/docs/LLM_COUNCIL_DECISION.md +508 -0
- package/docs/MIDDLEWARE_CHAIN.md +35 -0
- package/docs/PROMO_CHECKLIST.md +200 -0
- package/docs/QUICKSTART.md +271 -0
- package/docs/QUICK_START.md +43 -0
- package/docs/QUICK_START_VISIBILITY.md +782 -0
- package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
- package/docs/RELEASE_CHECKLIST.md +32 -0
- package/docs/REPRODUCIBILITY.md +63 -0
- package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
- package/docs/ROUTING_RUBRIC.md +197 -0
- package/docs/SEO_AUDIT.md +186 -0
- package/docs/SOCIAL_LISTENING.md +219 -0
- package/docs/TMLPD_QNA.md +751 -0
- package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
- package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
- package/docs/UPDATE_TOPICS.md +15 -0
- package/docs/USE_CASES.md +59 -0
- package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
- package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
- package/docs/VERCEL_AI_SDK.md +209 -0
- package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
- package/docs/_config.yml +49 -0
- package/docs/ai-plugin.json +16 -0
- package/docs/api.html +513 -0
- package/docs/architecture-diagram.md +40 -0
- package/docs/benchmark-chart.png +0 -0
- package/docs/benchmark.html +387 -0
- package/docs/blog/routerarena-9677.html +92 -0
- package/docs/blog/routerarena-number-one.html +73 -0
- package/docs/cli-cheatsheet.md +339 -0
- package/docs/compare.md +109 -0
- package/docs/comparison-litellm.md +88 -0
- package/docs/comparison.md +108 -0
- package/docs/cost-chart-ascii.md +42 -0
- package/docs/cost-comparison-chart.svg +88 -0
- package/docs/curl-examples.md +247 -0
- package/docs/demo-auto.html +264 -0
- package/docs/demo.html +416 -0
- package/docs/geo/GENERATIVE_ENGINE_OPTIMIZATION.md +232 -0
- package/docs/index.html +507 -0
- package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
- package/docs/launch-content/README.md +457 -0
- package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
- package/docs/launch-content/assets/cumulative_savings.png +0 -0
- package/docs/launch-content/assets/parallel_speedup.png +0 -0
- package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
- package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
- package/docs/launch-content/generate_charts.py +313 -0
- package/docs/launch-content/hn_show_post.md +139 -0
- package/docs/launch-content/partner_outreach_templates.md +745 -0
- package/docs/launch-content/reddit_posts.md +467 -0
- package/docs/launch-content/twitter_thread.txt +460 -0
- package/{llms.txt.bak → docs/llms.txt} +6 -6
- package/docs/npm-downloads-chart.svg +43 -0
- package/docs/openapi.json +139 -0
- package/docs/openapi.yaml +1318 -0
- package/docs/quick-start.html +366 -0
- package/docs/robots.txt +52 -0
- package/docs/sitemap.xml +57 -0
- package/docs/styles.css +682 -0
- package/docs/well-known/ai-plugin.json +16 -0
- package/docs/wellknown/ai-plugin.json +16 -0
- package/docs-site/assets/og-banner.svg +194 -0
- package/docs-site/index.html +632 -0
- package/eval/README.md +46 -0
- package/eval/baselines/main.json +12 -0
- package/eval/benchmark_dataset.jsonl +16 -0
- package/eval/check_golden_routes.js +64 -0
- package/eval/datasets/catalog.json +33 -0
- package/eval/datasets/slices/cn_provider_reliability_v1.jsonl +3 -0
- package/eval/datasets/slices/cost_pressure_v1.jsonl +3 -0
- package/eval/datasets/slices/safety_guardrails_v1.jsonl +3 -0
- package/eval/evals.json +199 -0
- package/eval/fault_injection_thresholds.json +3 -0
- package/eval/generate_report.js +128 -0
- package/eval/golden_routes.json +114 -0
- package/eval/lib/experiment_registry.js +24 -0
- package/eval/run_eval.js +197 -0
- package/eval/run_fault_injection.js +201 -0
- package/eval/run_shadow_eval.js +85 -0
- package/eval/thresholds.json +9 -0
- package/examples/QUICKSTART.md +183 -0
- package/examples/README.md +61 -0
- package/examples/a3m-sdk.js +124 -0
- package/examples/basic-route.js +54 -0
- package/examples/chat-loop.js +202 -0
- package/examples/classify-then-route.js +102 -0
- package/examples/cost-compare.js +120 -0
- package/examples/ensemble.js +160 -0
- package/examples/whatsapp-telegram-bridge-demo.js +302 -0
- package/examples/whatsapp-telegram-bridge.js +269 -0
- package/hf-space/README.md +23 -0
- package/hf-space/app.py +240 -0
- package/hf-space/requirements.txt +1 -0
- package/huggingface_space/README.md +35 -0
- package/huggingface_space/app.py +126 -0
- package/huggingface_space/create_space.py +208 -0
- package/huggingface_space/requirements.txt +1 -0
- package/index.html +1 -1
- package/mcp-server/README.md +188 -0
- package/mcp-server/package.json +29 -0
- package/mcp-server/src/index.ts +744 -0
- package/mcp-server/tsconfig.json +19 -0
- package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
- package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
- package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
- package/openclaw-alexa-bridge/test_fixes.js +77 -0
- package/package.json +76 -272
- package/playground/README.md +51 -0
- package/playground/codesandbox.json +12 -0
- package/playground/index.js +39 -0
- package/proxy/README.md +227 -0
- package/proxy/package-lock.json +831 -0
- package/proxy/package.json +17 -0
- package/proxy/rate-limit.js +145 -0
- package/proxy/rate-limit.test.js +311 -0
- package/proxy/server.js +970 -0
- package/python/README.md +102 -0
- package/python/a3m/__init__.py +6 -0
- package/python/a3m/client.py +190 -0
- package/python/a3m/models.py +40 -0
- package/python/a3m/sync_client.py +61 -0
- package/python/examples.py +53 -0
- package/python/integrations.py +330 -0
- package/python/pyproject.toml +23 -0
- package/python/setup.py +28 -0
- package/python/tmlpd.py +369 -0
- package/qna/REDDIT_GAP_ANALYSIS.md +299 -0
- package/qna/TMLPD_QNA.md +751 -0
- package/research/FINDING_001_safety.md +28 -0
- package/research/FINDING_002_error_diversity.md +32 -0
- package/research/FINDING_003_confidence_weighted_voting.md +32 -0
- package/research/FINDING_004_cross_model_semantic_detection.md +37 -0
- package/research/FINDING_005_knowledge_gap_orthogonality.md +34 -0
- package/research/HALLUCINATION_RESEARCH.md +27 -0
- package/research/ensemble-voting.md +324 -0
- package/research/loss-functions.md +545 -0
- package/research-log.md +49 -0
- package/scripts/banner.js +29 -0
- package/scripts/benchmark-local-routerarena.ts +176 -0
- package/scripts/benchmark.js +145 -0
- package/scripts/benchmark.sh +61 -0
- package/scripts/compare-providers.sh +230 -0
- package/scripts/content-planner.js +25 -0
- package/scripts/create-labeled-benchmark.ts +105 -0
- package/scripts/cross_post.py +443 -0
- package/scripts/local-router-benchmark.ts +154 -0
- package/scripts/post-all.sh +41 -0
- package/scripts/publish_fcc.py +106 -0
- package/scripts/push-to-gitee.sh +25 -0
- package/scripts/routerarena_ensemble.js +144 -0
- package/scripts/routing-benchmark-v2.js +373 -0
- package/scripts/routing-benchmark-v3.js +118 -0
- package/scripts/routing-benchmark.js +462 -0
- package/scripts/run-labeled-benchmark.mjs +104 -0
- package/scripts/run-mmlu-benchmark.js +176 -0
- package/scripts/run-provider-benchmark.js +244 -0
- package/scripts/update-npm-badges.js +158 -0
- package/skill/SKILL.md +238 -0
- package/src/__tests__/integration/tmpld_integration.test.py +540 -0
- package/src/ensemble.ts +2 -0
- package/src/routing/advancedRouter.ts +1 -1
- package/src/skills/__tests__/skill_manager.test.ts +328 -0
- package/submissions/benchmarks/ALL_PLATFORMS_SUBMISSION.md +94 -0
- package/submissions/benchmarks/LLMROUTERBENCH_SUBMISSION.md +121 -0
- package/submissions/benchmarks/MMRBENCH_SUBMISSION.md +94 -0
- package/submissions/benchmarks/ROUTERARENA_UPDATE.md +83 -0
- package/submissions/benchmarks/ROUTERBENCH_SUBMISSION.md +225 -0
- package/test-council/1-structure-tests.test.js +353 -0
- package/test-council/1-structure-tests.test.ts +353 -0
- package/test-council/2-edge-case-tests.test.ts +361 -0
- package/test-council/3-performance-tests.test.ts +652 -0
- package/test-council/4-integration-tests.test.ts +391 -0
- package/test-council/5-agent-council-eval.test.ts +413 -0
- package/test-council/AGENT_COUNCIL_ARCHITECTURE.md +349 -0
- package/test-council/TEST_COUNCIL_REPORT.md +201 -0
- package/test-council/agents/edge-case-agent.ts +363 -0
- package/test-council/agents/performance-agent.ts +426 -0
- package/test-council/agents/structure-agent.ts +227 -0
- package/test-council/council.md +183 -0
- package/tests/__mocks__/tokenUtils.ts +8 -0
- package/tests/memory/episodicMemory.test.ts +227 -0
- package/tests/package-lock.json +1785 -0
- package/tests/package.json +19 -0
- package/tests/routing/ensembleVoting.test.ts +236 -0
- package/tests/routing/providerRetry.test.ts +360 -0
- package/tests/routing/queryTypePresets.test.ts +208 -0
- package/tests/security/guardrailEngine.test.ts +700 -0
- package/tests/tsconfig.json +21 -0
- package/tests/vitest.config.ts +18 -0
- package/tmlpd-pi-extension/README.md +66 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cli.js +59 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
- package/tmlpd-pi-extension/dist/index.d.ts +723 -0
- package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/index.js +239 -0
- package/tmlpd-pi-extension/dist/index.js.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
- package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
- package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
- package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
- package/tmlpd-pi-extension/package-lock.json +79 -0
- package/tmlpd-pi-extension/package.json +172 -0
- package/tmlpd-pi-extension/python/examples.py +53 -0
- package/tmlpd-pi-extension/python/integrations.py +330 -0
- package/tmlpd-pi-extension/python/setup.py +28 -0
- package/tmlpd-pi-extension/python/tmlpd.py +369 -0
- package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
- package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
- package/tmlpd-pi-extension/skill/SKILL.md +238 -0
- package/tmlpd-pi-extension/src/cache/responseCache.ts +147 -0
- package/tmlpd-pi-extension/src/cost/costTracker.ts +302 -0
- package/tmlpd-pi-extension/src/index.ts +232 -0
- package/tmlpd-pi-extension/src/memory/episodicMemory.ts +257 -0
- package/tmlpd-pi-extension/src/orchestration/haloOrchestrator.ts +266 -0
- package/tmlpd-pi-extension/src/orchestration/mctsWorkflow.ts +262 -0
- package/tmlpd-pi-extension/src/providers/localProvider.ts +406 -0
- package/tmlpd-pi-extension/src/providers/registry.ts +164 -0
- package/tmlpd-pi-extension/src/routing/ensembleVoting.ts +159 -0
- package/tmlpd-pi-extension/src/routing/queryTypePresets.ts +136 -0
- package/tmlpd-pi-extension/src/tools/tmlpdTools.ts +433 -0
- package/tmlpd-pi-extension/src/utils/batchProcessor.ts +232 -0
- package/tmlpd-pi-extension/src/utils/compression.ts +325 -0
- package/tmlpd-pi-extension/src/utils/reliability.ts +221 -0
- package/tmlpd-pi-extension/src/utils/tokenUtils.ts +145 -0
- package/tmlpd-pi-extension/tsconfig.json +18 -0
- package/tsconfig.build.json +29 -0
- package/tsconfig.json +18 -0
- package/README.md.bak +0 -1185
- package/src/routing/advancedRouter.ts.bak +0 -650
- package/test.js.bak +0 -376
- /package/{llms-full.txt.bak → docs/llms-full.txt} +0 -0
package/ARCHITECTURE.md
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
## A3M Router — Adaptive Memory Multi-Model Router
|
|
4
|
+
|
|
5
|
+
A multi-provider LLM routing and orchestration engine. Routes prompts across 47+ providers, executes them in parallel with ensemble voting, and adapts model selection based on learned quality profiles, cost constraints, and task complexity.
|
|
6
|
+
|
|
7
|
+
## High-Level Overview
|
|
8
|
+
|
|
9
|
+
The system has three layers:
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
User / API / CLI / TUI
|
|
13
|
+
|
|
|
14
|
+
[Proxy Server / LangChain Adapter]
|
|
15
|
+
|
|
|
16
|
+
[Routing Engine] ←── [Memory System] ←── [Semantic Cache]
|
|
17
|
+
|
|
|
18
|
+
[Provider Layer] ←── [Retry Handler] ←── [Guardrails]
|
|
19
|
+
|
|
|
20
|
+
[47+ LLM APIs]
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
- **TypeScript Core** — routing, provider config, cost tracking, observability, cache, guardrails, proxy server, TUI
|
|
24
|
+
- **Python Layer** — Universal Model Router (learned routing), HALO orchestration (hierarchical planning), MCTS workflow search
|
|
25
|
+
- **Integrations** — LangChain adapter, MCP server, OpenAI-compatible proxy, CLI/TUI
|
|
26
|
+
|
|
27
|
+
## Directory Structure
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
src/
|
|
31
|
+
index.ts # Main entry point — exports all public APIs, createA3MRouter()
|
|
32
|
+
sdk.ts # A3MRouter SDK class — route(), routeBatch(), recommend(), serve(), analyze()
|
|
33
|
+
routing/
|
|
34
|
+
providerRetry.ts # Per-provider retry with exponential backoff + jitter, context window validation
|
|
35
|
+
providerHealth.ts # Provider health monitoring
|
|
36
|
+
universal_router.py # UniversalModelRouter — learned routing with online adaptation (Python)
|
|
37
|
+
providers/
|
|
38
|
+
providerConfig.ts # 47+ provider definitions, config loading, health checks, runtime registration
|
|
39
|
+
registry.py # Python provider registry with health monitoring
|
|
40
|
+
base.py # Python base provider classes
|
|
41
|
+
anthropic.py # Anthropic provider implementation (Python)
|
|
42
|
+
cerebras.py # Cerebras provider implementation (Python)
|
|
43
|
+
memory/
|
|
44
|
+
memoryTree.ts # MemoryTree — hierarchical chunk storage with search
|
|
45
|
+
autoFetch.ts # Automatic memory fetching
|
|
46
|
+
obsidianVault.ts # Obsidian vault integration
|
|
47
|
+
agentic_memory.py # Agentic memory (Python)
|
|
48
|
+
semantic_memory.py # Semantic memory (Python)
|
|
49
|
+
simple_memory.py # Simple memory (Python)
|
|
50
|
+
working_memory.py # Working memory (Python)
|
|
51
|
+
cost/
|
|
52
|
+
costTracker.ts # Per-request cost tracking
|
|
53
|
+
budgetEnforcer.ts # Budget limits, spend records, alerts
|
|
54
|
+
analytics/
|
|
55
|
+
costAnalytics.ts # Advanced cost analytics, savings reports, projections
|
|
56
|
+
cache/
|
|
57
|
+
semanticCache.ts # Embedding-based semantic cache with cosine similarity
|
|
58
|
+
research/ # Cache research files
|
|
59
|
+
security/
|
|
60
|
+
guardrails.ts # Prompt injection, PII detection, content filtering, output validation
|
|
61
|
+
observability/
|
|
62
|
+
index.ts # Observable exports
|
|
63
|
+
types.ts # Span, Metric, RouteTrace types
|
|
64
|
+
tracer.ts # Distributed tracing
|
|
65
|
+
metrics.ts # Metrics collector
|
|
66
|
+
middleware.ts # Express-style observability middleware
|
|
67
|
+
server/
|
|
68
|
+
proxyServer.ts # OpenAI-compatible HTTP proxy — POST /v1/chat/completions, GET /v1/models
|
|
69
|
+
modelMapper.ts # Model name resolution
|
|
70
|
+
dashboard.ts # Server dashboard
|
|
71
|
+
integrations/
|
|
72
|
+
langchainAdapter.ts # Drop-in ChatOpenAI replacement for LangChain
|
|
73
|
+
oauth.ts # OAuth integration
|
|
74
|
+
cli/
|
|
75
|
+
setupWizard.ts # Interactive setup wizard
|
|
76
|
+
tui/
|
|
77
|
+
index.ts # TUI launch wrapper
|
|
78
|
+
dashboard.ts # Blessed-based terminal dashboard
|
|
79
|
+
orchestration/ # (Python) HALO hierarchical orchestration
|
|
80
|
+
halo_orchestrator.py # HALO orchestrator — 3-tier planning
|
|
81
|
+
task_planner.py # Task decomposition into subtasks
|
|
82
|
+
role_assigner.py # Agent role assignment
|
|
83
|
+
execution_engine.py # Parallel execution with verification
|
|
84
|
+
mcts_workflow.py # MCTS-based workflow search
|
|
85
|
+
workflows/ # (Python) Workflow executors
|
|
86
|
+
router.py # Workflow router
|
|
87
|
+
orchestrator.py # Workflow orchestrator
|
|
88
|
+
chaining_executor.py # Sequential chain execution
|
|
89
|
+
parallelization_executor.py # Parallel task execution
|
|
90
|
+
difficulty_integration.py # Difficulty-aware routing
|
|
91
|
+
agents/
|
|
92
|
+
skill_enhanced_agent.py # Skill-enhanced agent (Python)
|
|
93
|
+
state/
|
|
94
|
+
simple_checkpoint.py # State checkpointing (Python)
|
|
95
|
+
types/
|
|
96
|
+
langchain.d.ts # LangChain type declarations
|
|
97
|
+
utils/ # (referenced from index.ts exports)
|
|
98
|
+
tokenUtils.ts # Token counting and estimation
|
|
99
|
+
|
|
100
|
+
python/
|
|
101
|
+
a3m/ # Python SDK for A3M Router
|
|
102
|
+
tmlpd.py # TMLPD Python client
|
|
103
|
+
examples.py # Usage examples
|
|
104
|
+
integrations.py # Python integration helpers
|
|
105
|
+
|
|
106
|
+
mcp-server/ # MCP (Model Context Protocol) server for AI agent integration
|
|
107
|
+
integrations/ # Additional integration entry points
|
|
108
|
+
eval/ # Evaluation framework and benchmarks
|
|
109
|
+
test/ tests/ # Test suites (TypeScript + Python)
|
|
110
|
+
docs/ # GitHub Pages documentation site
|
|
111
|
+
demo/ # Demo scripts and recordings
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Key Components
|
|
115
|
+
|
|
116
|
+
### 1. Ensemble Voting (P0)
|
|
117
|
+
|
|
118
|
+
The unique differentiator. Routes the same query to multiple providers in parallel, then merges responses using confidence-weighted voting. No other LLM router does this — everyone does sequential fallback (try A, then B, then C).
|
|
119
|
+
|
|
120
|
+
The ensemble flow:
|
|
121
|
+
1. Query enters the routing engine
|
|
122
|
+
2. Classifier extracts features (complexity, domain, length, code/math presence)
|
|
123
|
+
3. Top-N candidate models selected by tier, cost, and quality profile
|
|
124
|
+
4. Query dispatched to all N providers in parallel
|
|
125
|
+
5. Responses collected and merged with confidence weighting
|
|
126
|
+
6. Best merged result returned with fallback alternatives
|
|
127
|
+
|
|
128
|
+
### 2. Query Classification
|
|
129
|
+
|
|
130
|
+
The routing engine (`sdk.ts` → `extractQueryFeatures`) classifies queries on 10+ signals:
|
|
131
|
+
|
|
132
|
+
| Signal | Description |
|
|
133
|
+
|--------|-------------|
|
|
134
|
+
| complexity | 0.0–1.0, based on keyword density and reasoning indicators |
|
|
135
|
+
| has_code | Code block or programming keyword presence |
|
|
136
|
+
| has_math | Mathematical expression detection |
|
|
137
|
+
| is_multilingual | Non-English character ratio |
|
|
138
|
+
| is_translation | Translation verb detection |
|
|
139
|
+
| is_creative | Creative writing indicators |
|
|
140
|
+
| requires_reasoning | Step-by-step reasoning triggers |
|
|
141
|
+
| domain | Detected domain (legal, medical, security, finance, devops, data) |
|
|
142
|
+
|
|
143
|
+
Classification routes to the `free` / `cheap` / `mid` / `premium` cost tier, targeting 70.32 accuracy within +/-1 tier (RouterArena score (#1 of 19 routers, arXiv:2510.00202)).
|
|
144
|
+
|
|
145
|
+
### 3. Memory System
|
|
146
|
+
|
|
147
|
+
The `MemoryTree` (`memory/memoryTree.ts`) canonicalizes data into ≤3k-token chunks, scores each by relevance, and builds hierarchical summary trees. Supports:
|
|
148
|
+
- **Search**: keyword matching with score ranking
|
|
149
|
+
- **Context retrieval**: top-scored chunks for routing enrichment
|
|
150
|
+
- **Obsidian export**: markdown serialization
|
|
151
|
+
- **Stats**: tree depth, chunk count, memory utilization
|
|
152
|
+
|
|
153
|
+
Python memory variants (`agentic_memory.py`, `semantic_memory.py`, `working_memory.py`) provide agent-specific memory stores for the orchestration layer.
|
|
154
|
+
|
|
155
|
+
### 4. Provider Routing
|
|
156
|
+
|
|
157
|
+
The provider system (`providers/providerConfig.ts`) defines 47+ providers across five tiers:
|
|
158
|
+
|
|
159
|
+
| Tier | Providers | Purpose |
|
|
160
|
+
|------|-----------|---------|
|
|
161
|
+
| free | Ollama, LM Studio, vLLM, Google (free tier), NVIDIA NIM | Local / zero-cost |
|
|
162
|
+
| cheap | Groq, Cerebras, DeepInfra, Together, Fireworks, Novita, SambaNova, Anyscale, Replicate | Inference-optimized |
|
|
163
|
+
| mid | DeepSeek, Mistral, Perplexity, Cohere, AI21, Qwen (DashScope), StepFun | Good quality/price |
|
|
164
|
+
| premium | OpenAI, Anthropic, xAI (Grok) | Frontier models |
|
|
165
|
+
| enterprise | Azure OpenAI, AWS Bedrock, Google Vertex | Cloud-managed |
|
|
166
|
+
|
|
167
|
+
Each provider has:
|
|
168
|
+
- `baseUrl`, `apiKeyEnv` (env var name), `models` list
|
|
169
|
+
- `costPerK` (input/output), `tier`, `format` (openai/anthropic/google/cohere/aws-bedrock/google-vertex)
|
|
170
|
+
- `type` (api/cli/local), `priority` (selection order), `maxTokens`
|
|
171
|
+
|
|
172
|
+
Configuration sources (in priority order):
|
|
173
|
+
1. Environment variables (`*_API_KEY`)
|
|
174
|
+
2. `~/.config/a3m-router/providers.json`
|
|
175
|
+
3. Runtime registration via `registerProvider()`
|
|
176
|
+
|
|
177
|
+
### 5. Security (Guardrails Engine)
|
|
178
|
+
|
|
179
|
+
The `GuardrailEngine` (`security/guardrails.ts`) provides configurable input/output checks:
|
|
180
|
+
- **Prompt injection**: score-based detection (0–100)
|
|
181
|
+
- **PII detection and redaction**: emails, phones, SSNs, credit cards, IPs
|
|
182
|
+
- **Content filtering**: configurable blocklist, regex patterns
|
|
183
|
+
- **Language detection**: for intelligent routing decisions
|
|
184
|
+
- **Output validation**: quality checks, hallucination detection
|
|
185
|
+
- **Custom guardrails**: user-defined check functions
|
|
186
|
+
|
|
187
|
+
### 6. Observability
|
|
188
|
+
|
|
189
|
+
Three subsystems:
|
|
190
|
+
- **Tracer**: distributed tracing with span creation, completion, and route trace construction
|
|
191
|
+
- **MetricsCollector**: runtime metrics — request counts, latencies, error rates, cache hit rates
|
|
192
|
+
- **Middlewares**: Express-style `observabilityMiddleware`, `observabilityPlugin`, `budgetAlertMiddleware`
|
|
193
|
+
|
|
194
|
+
### 7. Semantic Cache
|
|
195
|
+
|
|
196
|
+
Embedding-based cache (`cache/semanticCache.ts`) stores query-response pairs. On lookup, computes cosine similarity against stored embeddings. Supports configurable threshold (default 0.92), TTL, LRU eviction (1000 entries), and multiple embedders (nomic via Ollama, OpenAI, or local).
|
|
197
|
+
|
|
198
|
+
### 8. Cost Tracking
|
|
199
|
+
|
|
200
|
+
Tiered cost management:
|
|
201
|
+
- **CostTracker**: per-request recording with provider, model, tokens, latency
|
|
202
|
+
- **CostAnalytics**: savings reports, monthly projections, provider breakdowns, CSV/JSON export
|
|
203
|
+
- **BudgetEnforcer**: hard budget caps with pre-request checks and alerts
|
|
204
|
+
|
|
205
|
+
### 9. Proxy Server
|
|
206
|
+
|
|
207
|
+
OpenAI-compatible HTTP proxy (`server/proxyServer.ts`) using only Node.js built-in `http` module. Endpoints:
|
|
208
|
+
- `POST /v1/chat/completions` — OpenAI-compatible chat
|
|
209
|
+
- `POST /v1/completions` — Text completions
|
|
210
|
+
- `GET /v1/models` — Available models
|
|
211
|
+
- `GET /health` — Provider health status
|
|
212
|
+
|
|
213
|
+
Any OpenAI SDK can point to this proxy to get A3M routing automatically.
|
|
214
|
+
|
|
215
|
+
### 10. HALO Orchestration (Python)
|
|
216
|
+
|
|
217
|
+
Hierarchical Autonomous Logic-Oriented Orchestration based on arXiv:2505.13516. Three tiers:
|
|
218
|
+
|
|
219
|
+
1. **TaskPlanner**: decomposes complex tasks into subtasks with dependency resolution
|
|
220
|
+
2. **RoleAssigner**: assigns specialized agents (roles) to each subtask
|
|
221
|
+
3. **ExecutionEngine**: executes subtasks in parallel with verification and adaptive refinement
|
|
222
|
+
|
|
223
|
+
Optionally uses **MCTS** (Monte Carlo Tree Search) to explore different execution strategies and learn optimal workflows per task type.
|
|
224
|
+
|
|
225
|
+
### 11. MCP Server
|
|
226
|
+
|
|
227
|
+
Model Context Protocol server for AI agent integration. Allows AI agents (Claude, etc.) to use the A3M Router as a tool for parallel multi-LLM execution.
|
|
228
|
+
|
|
229
|
+
### 12. LangChain Integration
|
|
230
|
+
|
|
231
|
+
`A3MChatModel` (`integrations/langchainAdapter.ts`) is a drop-in replacement for `ChatOpenAI`. Routes all LLM calls through A3M for cost optimization and intelligent provider selection. Supports streaming, tool calling, and batch processing.
|
|
232
|
+
|
|
233
|
+
## Data Flow
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
1. User sends query (via SDK, proxy, CLI, TUI, or LangChain)
|
|
237
|
+
2. GuardrailsEngine checks input (injection, PII, content, length)
|
|
238
|
+
3. SemanticCache looks up embedding match (skip if cache hit)
|
|
239
|
+
4. RoutingEngine classifies query (complexity, domain, features)
|
|
240
|
+
5. Router selects optimal provider(s) using:
|
|
241
|
+
- Learned quality profiles (UniversalModelRouter)
|
|
242
|
+
- Cost constraints (BudgetEnforcer)
|
|
243
|
+
- Retry configuration (ProviderRetryHandler)
|
|
244
|
+
6. (Optional) Multiple providers called in parallel for ensemble voting
|
|
245
|
+
7. ProviderRetryHandler executes with exponential backoff + jitter
|
|
246
|
+
8. GuardrailsEngine validates output
|
|
247
|
+
9. Response returned + recorded in:
|
|
248
|
+
- CostTracker (per-request cost)
|
|
249
|
+
- CostAnalytics (aggregate stats)
|
|
250
|
+
- Observability (tracing + metrics)
|
|
251
|
+
- SemanticCache (store for future hits)
|
|
252
|
+
- MemoryTree (context enrichment)
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
## Design Decisions and Trade-offs
|
|
256
|
+
|
|
257
|
+
| Decision | Rationale | Trade-off |
|
|
258
|
+
|----------|-----------|-----------|
|
|
259
|
+
| **TypeScript primary** | npm ecosystem reach, serverless compatibility, Vercel/Netlify/Cloudflare Workers | Python users need separate SDK |
|
|
260
|
+
| **Node.js built-in http** for proxy | Zero dependencies, 19.5 KB total bundle | Less feature-rich than Express |
|
|
261
|
+
| **Embedding-based cache** | Semantic similarity beats exact-match for LLM queries | Requires Ollama or OpenAI embedder |
|
|
262
|
+
| **Per-provider retry config** | Chinese providers need longer timeouts + more retries (network latency, rate limits) | More config surface |
|
|
263
|
+
| **In-memory storage** | Zero infra, instant setup, 19.5 KB | No persistence across restarts (memory tree serializable to markdown) |
|
|
264
|
+
| **Online learning (Python router)** | Adapts to unseen models and changing quality | Requires feedback loop, cold start with heuristics |
|
|
265
|
+
| **MCTS for workflow search** | Finds optimal strategies for complex tasks | 3-10x slower than greedy for simple tasks |
|
|
266
|
+
| **47+ baked-in providers** | Zero-config multi-provider out of box | Maintenance burden as APIs change |
|
|
267
|
+
|
|
268
|
+
## Extension Points
|
|
269
|
+
|
|
270
|
+
### Adding a New Provider
|
|
271
|
+
|
|
272
|
+
```typescript
|
|
273
|
+
import { registerProvider, ProviderDefinition } from 'adaptive-memory-multi-model-router';
|
|
274
|
+
|
|
275
|
+
registerProvider('my-provider', {
|
|
276
|
+
name: 'My Provider',
|
|
277
|
+
baseUrl: 'https://api.myprovider.com/v1/chat/completions',
|
|
278
|
+
apiKeyEnv: 'MY_PROVIDER_API_KEY',
|
|
279
|
+
models: ['model-name'],
|
|
280
|
+
costPerK: { input: 1.0, output: 2.0 },
|
|
281
|
+
tier: 'mid', // free | cheap | mid | premium | enterprise
|
|
282
|
+
format: 'openai', // openai | anthropic | google | cohere | aws-bedrock | google-vertex
|
|
283
|
+
type: 'api', // api | cli | local
|
|
284
|
+
priority: 15,
|
|
285
|
+
maxTokens: 8192,
|
|
286
|
+
});
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
Or via config file at `~/.config/a3m-router/providers.json`:
|
|
290
|
+
```json
|
|
291
|
+
{
|
|
292
|
+
"providers": {
|
|
293
|
+
"my-provider": {
|
|
294
|
+
"name": "My Provider",
|
|
295
|
+
"baseUrl": "https://api.myprovider.com/v1/chat/completions",
|
|
296
|
+
"apiKeyEnv": "MY_PROVIDER_API_KEY",
|
|
297
|
+
"models": ["model-name"],
|
|
298
|
+
"tier": "mid"
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
### Adding a Custom Retry Strategy
|
|
305
|
+
|
|
306
|
+
```typescript
|
|
307
|
+
import { createRetryHandler } from 'adaptive-memory-multi-model-router';
|
|
308
|
+
|
|
309
|
+
const handler = createRetryHandler({
|
|
310
|
+
'my-slow-provider': {
|
|
311
|
+
timeout: 60000,
|
|
312
|
+
retry: { maxRetries: 5, initialDelayMs: 5000 },
|
|
313
|
+
},
|
|
314
|
+
});
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
### Adding Custom Guardrails
|
|
318
|
+
|
|
319
|
+
```typescript
|
|
320
|
+
import { GuardrailEngine } from 'adaptive-memory-multi-model-router';
|
|
321
|
+
|
|
322
|
+
const guardrails = new GuardrailEngine({
|
|
323
|
+
userGuardrails: [
|
|
324
|
+
(content) => ({
|
|
325
|
+
passed: !content.includes('blocked-term'),
|
|
326
|
+
blocked: content.includes('blocked-term'),
|
|
327
|
+
reason: content.includes('blocked-term') ? 'Blocked term detected' : undefined,
|
|
328
|
+
}),
|
|
329
|
+
],
|
|
330
|
+
});
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
### Adding Ensemble Voting Strategies
|
|
334
|
+
|
|
335
|
+
The ensemble system is extensible by adding new voting strategies to the parallel execution pipeline. Current strategy: confidence-weighted average across multiple provider responses.
|
|
336
|
+
|
|
337
|
+
### Custom Routing Strategies
|
|
338
|
+
|
|
339
|
+
The `UniversalModelRouter` (Python) learns routing profiles from execution data. To implement a custom strategy:
|
|
340
|
+
1. Subclass or wrap `routeQuery` in TypeScript
|
|
341
|
+
2. Or extend `UniversalModelRouter._calculate_combined_score` in Python
|
|
342
|
+
3. Register custom feature extractors via `extractQueryFeatures`
|
|
343
|
+
|
|
344
|
+
### MCP Server Extensions
|
|
345
|
+
|
|
346
|
+
The MCP server at `mcp-server/` exposes routing as tools. Add tools by extending the MCP tool definitions.
|
package/AUDIT_REPORT.md
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Dependency Audit Report
|
|
2
|
+
|
|
3
|
+
**Date:** 2026-05-28
|
|
4
|
+
**Project:** adaptive-memory-multi-model-router (v2.13.18)
|
|
5
|
+
|
|
6
|
+
## Summary
|
|
7
|
+
|
|
8
|
+
| Metric | Value |
|
|
9
|
+
|--------|-------|
|
|
10
|
+
| Total dependencies | 5 (3 prod + 2 dev) |
|
|
11
|
+
| Vulnerabilities | 0 (none found) |
|
|
12
|
+
| Outdated packages | 1 updated |
|
|
13
|
+
|
|
14
|
+
## Vulnerabilities
|
|
15
|
+
|
|
16
|
+
**0 vulnerabilities found.** The dependency tree is clean with no reported security issues across all direct and transitive dependencies.
|
|
17
|
+
|
|
18
|
+
## Updated Packages
|
|
19
|
+
|
|
20
|
+
| Package | From | To | Type | Reason |
|
|
21
|
+
|---------|------|----|------|--------|
|
|
22
|
+
| `@types/node` | 25.8.0 | 25.9.1 | devDependency | Updated via `npm update` within `^25.8.0` semver range |
|
|
23
|
+
|
|
24
|
+
## Notes
|
|
25
|
+
|
|
26
|
+
- `@langchain/core` is listed as `MISSING` in `npm outdated` output — this is expected. It is an **optional peer dependency** (`"optional": true` in `peerDependenciesMeta`) and is not required for core functionality.
|
|
27
|
+
- All other dependencies (`blessed@0.1.81`, `nanoid@5.1.11`, `typescript@6.0.3`) are up-to-date within their semver ranges.
|
|
28
|
+
- No breaking changes were introduced — `npm update` only applied compatible version bumps within declared semver ranges.
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
|
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our
|
|
6
|
+
community a harassment-free experience for everyone, regardless of age, body
|
|
7
|
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
|
8
|
+
identity and expression, level of experience, education, socio-economic status,
|
|
9
|
+
nationality, personal appearance, race, religion, or sexual identity
|
|
10
|
+
and orientation.
|
|
11
|
+
|
|
12
|
+
We pledge to act and interact in ways that contribute to an open, welcoming,
|
|
13
|
+
diverse, inclusive, and healthy community.
|
|
14
|
+
|
|
15
|
+
## Our Standards
|
|
16
|
+
|
|
17
|
+
Examples of behavior that contributes to a positive environment for our
|
|
18
|
+
community include:
|
|
19
|
+
|
|
20
|
+
* Demonstrating empathy and kindness toward other people
|
|
21
|
+
* Being respectful of differing opinions, viewpoints, and experiences
|
|
22
|
+
* Giving and gracefully accepting constructive feedback
|
|
23
|
+
* Accepting responsibility and apologizing to those affected by our mistakes,
|
|
24
|
+
and learning from the experience
|
|
25
|
+
* Focusing on what is best not just for us as individuals, but for the
|
|
26
|
+
overall community
|
|
27
|
+
|
|
28
|
+
Examples of unacceptable behavior include:
|
|
29
|
+
|
|
30
|
+
* The use of sexualized language or imagery, and sexual attention or
|
|
31
|
+
advances of any kind
|
|
32
|
+
* Trolling, insulting or derogatory comments, and personal or political attacks
|
|
33
|
+
* Public or private harassment
|
|
34
|
+
* Publishing others' private information, such as a physical or email
|
|
35
|
+
address, without their explicit permission
|
|
36
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
|
37
|
+
professional setting
|
|
38
|
+
|
|
39
|
+
## Enforcement Responsibilities
|
|
40
|
+
|
|
41
|
+
Community leaders are responsible for clarifying and enforcing our standards of
|
|
42
|
+
acceptable behavior and will take appropriate and fair corrective action in
|
|
43
|
+
response to any behavior that they deem inappropriate, threatening, offensive,
|
|
44
|
+
or harmful.
|
|
45
|
+
|
|
46
|
+
Community leaders have the right and responsibility to remove, edit, or reject
|
|
47
|
+
comments, commits, code, wiki edits, issues, and other contributions that are
|
|
48
|
+
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
|
49
|
+
decisions when appropriate.
|
|
50
|
+
|
|
51
|
+
## Scope
|
|
52
|
+
|
|
53
|
+
This Code of Conduct applies within all community spaces, and also applies when
|
|
54
|
+
an individual is officially representing the community in public spaces.
|
|
55
|
+
Examples of representing our community include using an official e-mail address,
|
|
56
|
+
posting via an official social media account, or acting as an appointed
|
|
57
|
+
representative at an online or offline event.
|
|
58
|
+
|
|
59
|
+
## Enforcement
|
|
60
|
+
|
|
61
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
|
62
|
+
reported to the community leaders responsible for enforcement at
|
|
63
|
+
[Sdas22@gmail.com](mailto:Sdas22@gmail.com).
|
|
64
|
+
All complaints will be reviewed and investigated promptly and fairly.
|
|
65
|
+
|
|
66
|
+
All community leaders are obligated to respect the privacy and security of the
|
|
67
|
+
reporter of any incident.
|
|
68
|
+
|
|
69
|
+
## Enforcement Guidelines
|
|
70
|
+
|
|
71
|
+
Community leaders will follow these Community Impact Guidelines in determining
|
|
72
|
+
the consequences for any action they deem in violation of this Code of Conduct:
|
|
73
|
+
|
|
74
|
+
### 1. Correction
|
|
75
|
+
|
|
76
|
+
**Community Impact**: Use of inappropriate language or other behavior deemed
|
|
77
|
+
unprofessional or unwelcome in the community.
|
|
78
|
+
|
|
79
|
+
**Consequence**: A private, written warning from community leaders, providing
|
|
80
|
+
clarity around the nature of the violation and an explanation of why the
|
|
81
|
+
behavior was inappropriate. A public apology may be requested.
|
|
82
|
+
|
|
83
|
+
### 2. Warning
|
|
84
|
+
|
|
85
|
+
**Community Impact**: A violation through a single incident or series
|
|
86
|
+
of actions.
|
|
87
|
+
|
|
88
|
+
**Consequence**: A warning with consequences for continued behavior. No
|
|
89
|
+
interaction with the people involved, including unsolicited interaction with
|
|
90
|
+
those enforcing the Code of Conduct, for a specified period of time. This
|
|
91
|
+
includes avoiding interactions in community spaces as well as external channels
|
|
92
|
+
like social media. Violating these terms may lead to a temporary or
|
|
93
|
+
permanent ban.
|
|
94
|
+
|
|
95
|
+
### 3. Temporary Ban
|
|
96
|
+
|
|
97
|
+
**Community Impact**: A serious violation of community standards, including
|
|
98
|
+
sustained inappropriate behavior.
|
|
99
|
+
|
|
100
|
+
**Consequence**: A temporary ban from any sort of interaction or public
|
|
101
|
+
communication with the community for a specified period of time. No public or
|
|
102
|
+
private interaction with the people involved, including unsolicited interaction
|
|
103
|
+
with those enforcing the Code of Conduct, is allowed during this period.
|
|
104
|
+
Violating these terms may lead to a permanent ban.
|
|
105
|
+
|
|
106
|
+
### 4. Permanent Ban
|
|
107
|
+
|
|
108
|
+
**Community Impact**: Demonstrating a pattern of violation of community
|
|
109
|
+
standards, including sustained inappropriate behavior, harassment of an
|
|
110
|
+
individual, or aggression toward or disparagement of classes of individuals.
|
|
111
|
+
|
|
112
|
+
**Consequence**: A permanent ban from any sort of public interaction within
|
|
113
|
+
the community.
|
|
114
|
+
|
|
115
|
+
## Attribution
|
|
116
|
+
|
|
117
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
|
118
|
+
version 2.0, available at
|
|
119
|
+
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
|
|
120
|
+
|
|
121
|
+
Community Impact Guidelines were inspired by [Mozilla's code of conduct
|
|
122
|
+
enforcement ladder](https://github.com/mozilla/diversity).
|
|
123
|
+
|
|
124
|
+
[homepage]: https://www.contributor-covenant.org
|
|
125
|
+
|
|
126
|
+
For answers to common questions about this code of conduct, see the FAQ at
|
|
127
|
+
https://www.contributor-covenant.org/faq. Translations are available at
|
|
128
|
+
https://www.contributor-covenant.org/translations.
|
package/CONTRIBUTING.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Contributing to A3M Router
|
|
2
|
+
|
|
3
|
+
Thanks for helping build the fastest-growing open-source LLM router! 🚀
|
|
4
|
+
|
|
5
|
+
## Quick Setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
git clone https://github.com/Das-rebel/a3m-router.git
|
|
9
|
+
cd a3m-router
|
|
10
|
+
npm install
|
|
11
|
+
npm run build
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Project Structure
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
src/
|
|
18
|
+
├── providers/ # 47+ LLM provider configurations
|
|
19
|
+
├── routing/ # UCB1 + MCTS routing engine
|
|
20
|
+
├── cache/ # Semantic deduplication cache
|
|
21
|
+
├── proxy/ # OpenAI-compatible proxy server
|
|
22
|
+
└── tui/ # Terminal UI overlay
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Development
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
npm run build # Compile TypeScript
|
|
29
|
+
npm test # Run tests
|
|
30
|
+
node dist/tui/dashboard.js # Launch TUI
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## PR Guidelines
|
|
34
|
+
|
|
35
|
+
- Keep the package under 20KB (no ML deps)
|
|
36
|
+
- Add provider configs in `src/providers/`
|
|
37
|
+
- Route logic in `src/routing/`
|
|
38
|
+
- TUI changes in `src/tui/`
|
|
39
|
+
- Update CHANGELOG.md
|
|
40
|
+
|
|
41
|
+
## Adding a Provider
|
|
42
|
+
|
|
43
|
+
1. Add config in `src/providers/providerConfig.ts`
|
|
44
|
+
2. Set tier: `free` / `cheap` / `mid` / `premium`
|
|
45
|
+
3. Add to `PROVIDER_TIERS` map
|
|
46
|
+
4. PR with latency benchmarks
|
|
47
|
+
|
|
48
|
+
## Questions?
|
|
49
|
+
|
|
50
|
+
Open an issue or reach out on [GitHub Discussions](https://github.com/Das-rebel/a3m-router/discussions).
|
package/CONTRIBUTORS.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Contributors ✨
|
|
2
|
+
|
|
3
|
+
Thanks to everyone who has contributed to A3M Router!
|
|
4
|
+
|
|
5
|
+
## Core Team
|
|
6
|
+
- **Megha Mukherjee** — Creator & Lead Developer
|
|
7
|
+
- Parallel ensemble architecture
|
|
8
|
+
- Provider routing system
|
|
9
|
+
- Confidence scoring algorithm
|
|
10
|
+
|
|
11
|
+
## Contributors
|
|
12
|
+
<!--
|
|
13
|
+
Add yourself here! Open a PR with your name and contribution.
|
|
14
|
+
Format: - [Name](https://github.com/username) — What you contributed
|
|
15
|
+
-->
|
|
16
|
+
|
|
17
|
+
## Special Thanks
|
|
18
|
+
- The open-source LLM community
|
|
19
|
+
- Everyone who starred, forked, or shared the project
|
|
20
|
+
- npm downloaders (10K+ and counting! ❤️)
|
package/Dockerfile
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# A3M Router — Adaptive Memory Multi-Model Router
|
|
3
|
+
# Multi-stage Docker build for minimal production image
|
|
4
|
+
# =============================================================================
|
|
5
|
+
|
|
6
|
+
# ---------------------------------------------------------------------------
|
|
7
|
+
# Stage 1: Build
|
|
8
|
+
# ---------------------------------------------------------------------------
|
|
9
|
+
FROM node:20-slim AS build
|
|
10
|
+
|
|
11
|
+
WORKDIR /app
|
|
12
|
+
|
|
13
|
+
# Copy package manifests first for layer caching
|
|
14
|
+
COPY package.json package-lock.json ./
|
|
15
|
+
RUN npm ci
|
|
16
|
+
|
|
17
|
+
# Copy source code
|
|
18
|
+
COPY tsconfig.json tsconfig.build.json ./
|
|
19
|
+
COPY src/ src/
|
|
20
|
+
|
|
21
|
+
# Build TypeScript → dist/
|
|
22
|
+
RUN npm run build
|
|
23
|
+
|
|
24
|
+
# Prune dev dependencies (optional, to keep only production deps if any)
|
|
25
|
+
# RUN npm prune --production
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Stage 2: Run
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
FROM node:20-alpine AS run
|
|
31
|
+
|
|
32
|
+
WORKDIR /app
|
|
33
|
+
|
|
34
|
+
# Create non-root user for security
|
|
35
|
+
RUN addgroup -S appgroup && adduser -S appuser -G appgroup
|
|
36
|
+
|
|
37
|
+
# Copy built artifacts from build stage
|
|
38
|
+
COPY --from=build /app/dist ./dist
|
|
39
|
+
COPY --from=build /app/node_modules ./node_modules
|
|
40
|
+
COPY --from=build /app/package.json ./
|
|
41
|
+
|
|
42
|
+
# Default port for the A3M Router API/health endpoint
|
|
43
|
+
EXPOSE 3000
|
|
44
|
+
|
|
45
|
+
# Switch to non-root user
|
|
46
|
+
USER appuser
|
|
47
|
+
|
|
48
|
+
# Health check
|
|
49
|
+
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
|
50
|
+
CMD wget --no-verbose --tries=1 --spider http://localhost:3000/health || exit 1
|
|
51
|
+
|
|
52
|
+
# Default command — start the main router server
|
|
53
|
+
CMD ["node", "dist/index.js"]
|
package/Dockerfile.proxy
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# A3M Router — OpenAI-Compatible Proxy
|
|
3
|
+
# Standalone proxy service for OpenAI-compatible API routing via A3M
|
|
4
|
+
# =============================================================================
|
|
5
|
+
|
|
6
|
+
FROM node:20-alpine
|
|
7
|
+
|
|
8
|
+
WORKDIR /app
|
|
9
|
+
|
|
10
|
+
# Create non-root user
|
|
11
|
+
RUN addgroup -S appgroup && adduser -S appuser -G appgroup
|
|
12
|
+
|
|
13
|
+
# Copy proxy package manifests for layer caching
|
|
14
|
+
COPY proxy/package*.json ./
|
|
15
|
+
|
|
16
|
+
# Install production dependencies only
|
|
17
|
+
RUN npm ci --production
|
|
18
|
+
|
|
19
|
+
# Copy proxy source code
|
|
20
|
+
COPY proxy/ ./
|
|
21
|
+
|
|
22
|
+
# OpenAI-compatible proxy default port
|
|
23
|
+
EXPOSE 8787
|
|
24
|
+
|
|
25
|
+
# Switch to non-root user
|
|
26
|
+
USER appuser
|
|
27
|
+
|
|
28
|
+
# Health check
|
|
29
|
+
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
|
30
|
+
CMD wget --no-verbose --tries=1 --spider http://localhost:8787/health || exit 1
|
|
31
|
+
|
|
32
|
+
# Start proxy server
|
|
33
|
+
CMD ["node", "server.js"]
|