effgen 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. {effgen-0.2.2/effgen.egg-info → effgen-0.2.4}/PKG-INFO +58 -1
  2. {effgen-0.2.2 → effgen-0.2.4}/README.md +125 -10
  3. {effgen-0.2.2 → effgen-0.2.4}/README_PYPI.md +37 -0
  4. {effgen-0.2.2 → effgen-0.2.4}/effgen/__init__.py +140 -2
  5. {effgen-0.2.2 → effgen-0.2.4}/effgen/api/__init__.py +1 -1
  6. {effgen-0.2.2 → effgen-0.2.4}/effgen/cache/__init__.py +1 -1
  7. {effgen-0.2.2 → effgen-0.2.4}/effgen/cli.py +317 -11
  8. {effgen-0.2.2 → effgen-0.2.4}/effgen/client/client.py +2 -2
  9. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/__init__.py +3 -3
  10. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/agent.py +128 -16
  11. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/structured_output.py +1 -1
  12. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/tool_calling.py +1 -1
  13. {effgen-0.2.2 → effgen-0.2.4}/effgen/memory/token_budget.py +1 -1
  14. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/__init__.py +95 -2
  15. effgen-0.2.4/effgen/models/_cost.py +467 -0
  16. effgen-0.2.4/effgen/models/_cost_store.py +207 -0
  17. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/_rate_limit.py +184 -17
  18. effgen-0.2.4/effgen/models/_rate_limit_store.py +392 -0
  19. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/anthropic_adapter.py +54 -5
  20. effgen-0.2.4/effgen/models/auth.py +56 -0
  21. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/capabilities.py +16 -0
  22. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/cerebras_adapter.py +194 -93
  23. effgen-0.2.4/effgen/models/errors.py +342 -0
  24. effgen-0.2.4/effgen/models/fireworks_adapter.py +706 -0
  25. effgen-0.2.4/effgen/models/fireworks_models.py +1379 -0
  26. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/gemini_adapter.py +83 -18
  27. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/gemini_models.py +37 -0
  28. effgen-0.2.4/effgen/models/groq_adapter.py +734 -0
  29. effgen-0.2.4/effgen/models/groq_models.py +298 -0
  30. effgen-0.2.4/effgen/models/hf_inference_adapter.py +860 -0
  31. effgen-0.2.4/effgen/models/hf_inference_models.py +382 -0
  32. effgen-0.2.4/effgen/models/latency_tracker.py +220 -0
  33. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/model_loader.py +136 -1
  34. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/openai_adapter.py +56 -8
  35. effgen-0.2.4/effgen/models/registry.py +244 -0
  36. effgen-0.2.4/effgen/models/replicate_adapter.py +876 -0
  37. effgen-0.2.4/effgen/models/replicate_models.py +836 -0
  38. effgen-0.2.4/effgen/models/router.py +872 -0
  39. effgen-0.2.4/effgen/models/routing/__init__.py +8 -0
  40. effgen-0.2.4/effgen/models/routing/_probe.py +203 -0
  41. effgen-0.2.4/effgen/models/routing/cost.py +290 -0
  42. effgen-0.2.4/effgen/models/routing/first_available.py +72 -0
  43. effgen-0.2.4/effgen/models/routing/latency.py +237 -0
  44. effgen-0.2.4/effgen/models/routing/retry.py +168 -0
  45. effgen-0.2.4/effgen/models/together_adapter.py +742 -0
  46. effgen-0.2.4/effgen/models/together_models.py +2637 -0
  47. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/transformers_engine.py +34 -6
  48. {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/ingest.py +1 -1
  49. {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/reranker.py +1 -1
  50. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/base_tool.py +2 -2
  51. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/calculator.py +2 -2
  52. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/json_tool.py +4 -4
  53. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp/protocol.py +1 -1
  54. {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/validators.py +2 -2
  55. {effgen-0.2.2 → effgen-0.2.4/effgen.egg-info}/PKG-INFO +58 -1
  56. {effgen-0.2.2 → effgen-0.2.4}/effgen.egg-info/SOURCES.txt +21 -0
  57. {effgen-0.2.2 → effgen-0.2.4}/effgen.egg-info/requires.txt +25 -0
  58. {effgen-0.2.2 → effgen-0.2.4}/effgen.egg-info/top_level.txt +0 -1
  59. {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/conversational_agent.py +2 -2
  60. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/native_tools_hybrid_agent.py +2 -3
  61. {effgen-0.2.2 → effgen-0.2.4}/examples/tools/coding_agent.py +3 -4
  62. {effgen-0.2.2 → effgen-0.2.4}/examples/utils/sweep_model.py +3 -3
  63. {effgen-0.2.2 → effgen-0.2.4}/pyproject.toml +27 -1
  64. effgen-0.2.2/effgen/models/_cost.py +0 -234
  65. effgen-0.2.2/effgen/models/errors.py +0 -50
  66. effgen-0.2.2/effgen/models/router.py +0 -396
  67. {effgen-0.2.2 → effgen-0.2.4}/LICENSE +0 -0
  68. {effgen-0.2.2 → effgen-0.2.4}/effgen/api/embeddings.py +0 -0
  69. {effgen-0.2.2 → effgen-0.2.4}/effgen/api/middleware.py +0 -0
  70. {effgen-0.2.2 → effgen-0.2.4}/effgen/api/openai_compat.py +0 -0
  71. {effgen-0.2.2 → effgen-0.2.4}/effgen/api/pool.py +0 -0
  72. {effgen-0.2.2 → effgen-0.2.4}/effgen/api/queue.py +0 -0
  73. {effgen-0.2.2 → effgen-0.2.4}/effgen/api/tenancy.py +0 -0
  74. {effgen-0.2.2 → effgen-0.2.4}/effgen/cache/prompt_cache.py +0 -0
  75. {effgen-0.2.2 → effgen-0.2.4}/effgen/cache/result_cache.py +0 -0
  76. {effgen-0.2.2 → effgen-0.2.4}/effgen/client/__init__.py +0 -0
  77. {effgen-0.2.2 → effgen-0.2.4}/effgen/client/exceptions.py +0 -0
  78. {effgen-0.2.2 → effgen-0.2.4}/effgen/completion.py +0 -0
  79. {effgen-0.2.2 → effgen-0.2.4}/effgen/config/__init__.py +0 -0
  80. {effgen-0.2.2 → effgen-0.2.4}/effgen/config/loader.py +0 -0
  81. {effgen-0.2.2 → effgen-0.2.4}/effgen/config/schemas/agent_config.schema.json +0 -0
  82. {effgen-0.2.2 → effgen-0.2.4}/effgen/config/schemas/model_config.schema.json +0 -0
  83. {effgen-0.2.2 → effgen-0.2.4}/effgen/config/schemas/tool_config.schema.json +0 -0
  84. {effgen-0.2.2 → effgen-0.2.4}/effgen/config/validator.py +0 -0
  85. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/aggregation.py +0 -0
  86. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/background.py +0 -0
  87. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/batch.py +0 -0
  88. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/checkpoint.py +0 -0
  89. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/clarification.py +0 -0
  90. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/complexity_analyzer.py +0 -0
  91. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/decomposition_engine.py +0 -0
  92. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/execution_tracker.py +0 -0
  93. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/feedback.py +0 -0
  94. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/human_loop.py +0 -0
  95. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/lifecycle.py +0 -0
  96. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/message_bus.py +0 -0
  97. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/orchestrator.py +0 -0
  98. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/router.py +0 -0
  99. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/session.py +0 -0
  100. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/shared_state.py +0 -0
  101. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/state.py +0 -0
  102. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/sub_agent_manager.py +0 -0
  103. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/task.py +0 -0
  104. {effgen-0.2.2 → effgen-0.2.4}/effgen/core/workflow.py +0 -0
  105. {effgen-0.2.2 → effgen-0.2.4}/effgen/debug/__init__.py +0 -0
  106. {effgen-0.2.2 → effgen-0.2.4}/effgen/debug/inspector.py +0 -0
  107. {effgen-0.2.2 → effgen-0.2.4}/effgen/domains/__init__.py +0 -0
  108. {effgen-0.2.2 → effgen-0.2.4}/effgen/domains/base.py +0 -0
  109. {effgen-0.2.2 → effgen-0.2.4}/effgen/domains/expander.py +0 -0
  110. {effgen-0.2.2 → effgen-0.2.4}/effgen/domains/presets.py +0 -0
  111. {effgen-0.2.2 → effgen-0.2.4}/effgen/eval/__init__.py +0 -0
  112. {effgen-0.2.2 → effgen-0.2.4}/effgen/eval/comparison.py +0 -0
  113. {effgen-0.2.2 → effgen-0.2.4}/effgen/eval/evaluator.py +0 -0
  114. {effgen-0.2.2 → effgen-0.2.4}/effgen/eval/regression.py +0 -0
  115. {effgen-0.2.2 → effgen-0.2.4}/effgen/eval/suites.py +0 -0
  116. {effgen-0.2.2 → effgen-0.2.4}/effgen/execution/__init__.py +0 -0
  117. {effgen-0.2.2 → effgen-0.2.4}/effgen/execution/docker_sandbox.py +0 -0
  118. {effgen-0.2.2 → effgen-0.2.4}/effgen/execution/sandbox.py +0 -0
  119. {effgen-0.2.2 → effgen-0.2.4}/effgen/execution/validators.py +0 -0
  120. {effgen-0.2.2 → effgen-0.2.4}/effgen/gpu/__init__.py +0 -0
  121. {effgen-0.2.2 → effgen-0.2.4}/effgen/gpu/allocator.py +0 -0
  122. {effgen-0.2.2 → effgen-0.2.4}/effgen/gpu/monitor.py +0 -0
  123. {effgen-0.2.2 → effgen-0.2.4}/effgen/gpu/utils.py +0 -0
  124. {effgen-0.2.2 → effgen-0.2.4}/effgen/guardrails/__init__.py +0 -0
  125. {effgen-0.2.2 → effgen-0.2.4}/effgen/guardrails/base.py +0 -0
  126. {effgen-0.2.2 → effgen-0.2.4}/effgen/guardrails/content.py +0 -0
  127. {effgen-0.2.2 → effgen-0.2.4}/effgen/guardrails/injection.py +0 -0
  128. {effgen-0.2.2 → effgen-0.2.4}/effgen/guardrails/presets.py +0 -0
  129. {effgen-0.2.2 → effgen-0.2.4}/effgen/guardrails/tool_safety.py +0 -0
  130. {effgen-0.2.2 → effgen-0.2.4}/effgen/hardware/__init__.py +0 -0
  131. {effgen-0.2.2 → effgen-0.2.4}/effgen/hardware/platform.py +0 -0
  132. {effgen-0.2.2 → effgen-0.2.4}/effgen/memory/__init__.py +0 -0
  133. {effgen-0.2.2 → effgen-0.2.4}/effgen/memory/long_term.py +0 -0
  134. {effgen-0.2.2 → effgen-0.2.4}/effgen/memory/short_term.py +0 -0
  135. {effgen-0.2.2 → effgen-0.2.4}/effgen/memory/vector_store.py +0 -0
  136. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/anthropic_cache.py +0 -0
  137. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/anthropic_models.py +0 -0
  138. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/base.py +0 -0
  139. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/batching.py +0 -0
  140. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/cerebras_models.py +0 -0
  141. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/gemini_files.py +0 -0
  142. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/gguf_engine.py +0 -0
  143. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/lazy.py +0 -0
  144. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/mlx_engine.py +0 -0
  145. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/mlx_vlm_engine.py +0 -0
  146. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/openai_models.py +0 -0
  147. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/openai_schema.py +0 -0
  148. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/pool.py +0 -0
  149. {effgen-0.2.2 → effgen-0.2.4}/effgen/models/vllm_engine.py +0 -0
  150. {effgen-0.2.2 → effgen-0.2.4}/effgen/presets/__init__.py +0 -0
  151. {effgen-0.2.2 → effgen-0.2.4}/effgen/presets/registry.py +0 -0
  152. {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/__init__.py +0 -0
  153. {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/agent_system_prompt.py +0 -0
  154. {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/chain_manager.py +0 -0
  155. {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/optimizer.py +0 -0
  156. {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/template_manager.py +0 -0
  157. {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/templates/analysis.yaml +0 -0
  158. {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/templates/coding.yaml +0 -0
  159. {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/templates/general.yaml +0 -0
  160. {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/templates/reasoning.yaml +0 -0
  161. {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/tool_prompt_generator.py +0 -0
  162. {effgen-0.2.2 → effgen-0.2.4}/effgen/py.typed +0 -0
  163. {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/__init__.py +0 -0
  164. {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/attribution.py +0 -0
  165. {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/chunking.py +0 -0
  166. {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/context_builder.py +0 -0
  167. {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/search.py +0 -0
  168. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/__init__.py +0 -0
  169. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/__init__.py +0 -0
  170. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/agentic_search.py +0 -0
  171. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/anthropic_native.py +0 -0
  172. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/bash_tool.py +0 -0
  173. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/code_executor.py +0 -0
  174. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/communication.py +0 -0
  175. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/data_analysis.py +0 -0
  176. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/datetime_tool.py +0 -0
  177. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/devops.py +0 -0
  178. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/file_ops.py +0 -0
  179. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/finance.py +0 -0
  180. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/gemini_native.py +0 -0
  181. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/knowledge.py +0 -0
  182. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/openai_native.py +0 -0
  183. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/python_repl.py +0 -0
  184. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/retrieval.py +0 -0
  185. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/text_processing.py +0 -0
  186. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/url_fetch.py +0 -0
  187. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/weather.py +0 -0
  188. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/web_search.py +0 -0
  189. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/wikipedia_tool.py +0 -0
  190. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/fallback.py +0 -0
  191. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/plugin.py +0 -0
  192. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/__init__.py +0 -0
  193. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/a2a/__init__.py +0 -0
  194. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/a2a/agent_card.py +0 -0
  195. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/a2a/client.py +0 -0
  196. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/a2a/protocol.py +0 -0
  197. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/acp/__init__.py +0 -0
  198. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/acp/client.py +0 -0
  199. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/acp/protocol.py +0 -0
  200. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/acp/server.py +0 -0
  201. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp/__init__.py +0 -0
  202. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp/client.py +0 -0
  203. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp/server.py +0 -0
  204. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp_official/__init__.py +0 -0
  205. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp_official/client.py +0 -0
  206. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp_official/server.py +0 -0
  207. {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/registry.py +0 -0
  208. {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/__init__.py +0 -0
  209. {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/circuit_breaker.py +0 -0
  210. {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/health.py +0 -0
  211. {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/logging.py +0 -0
  212. {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/metrics.py +0 -0
  213. {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/prometheus_metrics.py +0 -0
  214. {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/structured_logging.py +0 -0
  215. {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/tracing.py +0 -0
  216. {effgen-0.2.2 → effgen-0.2.4}/effgen.egg-info/dependency_links.txt +0 -0
  217. {effgen-0.2.2 → effgen-0.2.4}/effgen.egg-info/entry_points.txt +0 -0
  218. {effgen-0.2.2 → effgen-0.2.4}/effgen.egg-info/not-zip-safe +0 -0
  219. {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/advanced_streaming_agent.py +0 -0
  220. {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/agent_communication.py +0 -0
  221. {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/async_concurrent_agent.py +0 -0
  222. {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/data_processing_agent.py +0 -0
  223. {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/error_recovery_agent.py +0 -0
  224. {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/multi_agent_pipeline.py +0 -0
  225. {effgen-0.2.2 → effgen-0.2.4}/examples/basic/agent_viz_mlx.py +0 -0
  226. {effgen-0.2.2 → effgen-0.2.4}/examples/basic/basic_agent.py +0 -0
  227. {effgen-0.2.2 → effgen-0.2.4}/examples/basic/basic_agent_mlx.py +0 -0
  228. {effgen-0.2.2 → effgen-0.2.4}/examples/basic/basic_agent_vllm.py +0 -0
  229. {effgen-0.2.2 → effgen-0.2.4}/examples/basic/calculator_agent.py +0 -0
  230. {effgen-0.2.2 → effgen-0.2.4}/examples/basic/chat_gui_mlx.py +0 -0
  231. {effgen-0.2.2 → effgen-0.2.4}/examples/basic/qa_agent.py +0 -0
  232. {effgen-0.2.2 → effgen-0.2.4}/examples/basic/tool_builder_gui.py +0 -0
  233. {effgen-0.2.2 → effgen-0.2.4}/examples/basic/tool_tester_gui.py +0 -0
  234. {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/basic_cerebras.py +0 -0
  235. {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_agent.py +0 -0
  236. {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_all_models.py +0 -0
  237. {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_cost_tracker.py +0 -0
  238. {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_hard_agent.py +0 -0
  239. {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_load_model.py +0 -0
  240. {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_multi_turn.py +0 -0
  241. {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_rate_limits.py +0 -0
  242. {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_streaming.py +0 -0
  243. {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_tool_calling.py +0 -0
  244. {effgen-0.2.2 → effgen-0.2.4}/examples/data/download_arc.py +0 -0
  245. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/__init__.py +0 -0
  246. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/basic_chat.py +0 -0
  247. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/caching_and_structured_agent.py +0 -0
  248. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/multi_turn_chat.py +0 -0
  249. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/native_tools_code_interpreter.py +0 -0
  250. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/native_tools_file_search.py +2 -2
  251. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/native_tools_web_search.py +0 -0
  252. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/openai_agent.py +0 -0
  253. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/prompt_caching.py +0 -0
  254. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/reasoning_models.py +0 -0
  255. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/structured_outputs.py +0 -0
  256. {effgen-0.2.2 → effgen-0.2.4}/examples/openai/tool_calling.py +0 -0
  257. {effgen-0.2.2 → effgen-0.2.4}/examples/plugins_presets/plugin_example.py +0 -0
  258. {effgen-0.2.2 → effgen-0.2.4}/examples/plugins_presets/preset_agents.py +0 -0
  259. {effgen-0.2.2 → effgen-0.2.4}/examples/tools/advanced_multi_tool_agent.py +0 -0
  260. {effgen-0.2.2 → effgen-0.2.4}/examples/tools/file_operations_agent.py +0 -0
  261. {effgen-0.2.2 → effgen-0.2.4}/examples/tools/multi_tool_agent.py +0 -0
  262. {effgen-0.2.2 → effgen-0.2.4}/examples/web_retrieval/agentic_search_agent.py +0 -0
  263. {effgen-0.2.2 → effgen-0.2.4}/examples/web_retrieval/memory_agent.py +0 -0
  264. {effgen-0.2.2 → effgen-0.2.4}/examples/web_retrieval/retrieval_agent.py +0 -0
  265. {effgen-0.2.2 → effgen-0.2.4}/examples/web_retrieval/streaming_agent.py +0 -0
  266. {effgen-0.2.2 → effgen-0.2.4}/examples/web_retrieval/weather_agent.py +0 -0
  267. {effgen-0.2.2 → effgen-0.2.4}/examples/web_retrieval/web_agent.py +0 -0
  268. {effgen-0.2.2 → effgen-0.2.4}/setup.cfg +0 -0
  269. {effgen-0.2.2 → effgen-0.2.4}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: effgen
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: A comprehensive framework for building agents with Small Language Models
5
5
  Home-page: https://github.com/ctrl-gaurav/effGen
6
6
  Author: Gaurav Srivastava
@@ -108,6 +108,16 @@ Provides-Extra: gguf
108
108
  Requires-Dist: llama-cpp-python>=0.2.0; extra == "gguf"
109
109
  Provides-Extra: cerebras
110
110
  Requires-Dist: cerebras-cloud-sdk>=1.0; extra == "cerebras"
111
+ Provides-Extra: groq
112
+ Requires-Dist: groq>=0.15; extra == "groq"
113
+ Provides-Extra: together
114
+ Requires-Dist: together>=1.3; extra == "together"
115
+ Provides-Extra: fireworks
116
+ Requires-Dist: fireworks-ai>=0.15; extra == "fireworks"
117
+ Provides-Extra: replicate
118
+ Requires-Dist: replicate>=1.0; extra == "replicate"
119
+ Provides-Extra: hf
120
+ Requires-Dist: huggingface_hub>=0.26; extra == "hf"
111
121
  Provides-Extra: flash-attn
112
122
  Requires-Dist: flash-attn>=2.3.0; extra == "flash-attn"
113
123
  Provides-Extra: vector-db
@@ -127,6 +137,11 @@ Provides-Extra: monitoring
127
137
  Requires-Dist: wandb>=0.16.0; extra == "monitoring"
128
138
  Requires-Dist: tensorboard>=2.15.0; extra == "monitoring"
129
139
  Provides-Extra: all
140
+ Requires-Dist: pytest>=7.4.0; extra == "all"
141
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "all"
142
+ Requires-Dist: pytest-cov>=4.1.0; extra == "all"
143
+ Requires-Dist: pytest-timeout>=2.2.0; extra == "all"
144
+ Requires-Dist: pytest-forked>=1.6.0; extra == "all"
130
145
  Requires-Dist: vllm>=0.2.7; extra == "all"
131
146
  Requires-Dist: faiss-cpu>=1.7.4; extra == "all"
132
147
  Requires-Dist: chromadb>=0.4.18; extra == "all"
@@ -148,6 +163,11 @@ Requires-Dist: rouge-score>=0.1.2; extra == "all"
148
163
  Requires-Dist: nltk>=3.8.0; extra == "all"
149
164
  Requires-Dist: llama-cpp-python>=0.2.0; extra == "all"
150
165
  Requires-Dist: cerebras-cloud-sdk>=1.0; extra == "all"
166
+ Requires-Dist: groq>=0.15; extra == "all"
167
+ Requires-Dist: together>=1.3; extra == "all"
168
+ Requires-Dist: fireworks-ai>=0.15; extra == "all"
169
+ Requires-Dist: replicate>=1.0; extra == "all"
170
+ Requires-Dist: huggingface_hub>=0.26; extra == "all"
151
171
  Requires-Dist: bitsandbytes>=0.46.1; extra == "all"
152
172
  Requires-Dist: datasets>=2.14.0; extra == "all"
153
173
  Dynamic: author
@@ -194,6 +214,8 @@ Dynamic: requires-python
194
214
 
195
215
  | | Date | Update |
196
216
  |:---:|:---|:---|
217
+ | 🚀 | **14 May 2026** | **v0.2.4 Released**: ModelRouter with CostBased/LatencyBased/FirstAvailable policies, transparent provider failover, cross-process SQLite rate-limit coordination, persistent cost tracker + `effgen cost` dashboard CLI. [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#024---2026-05-14) |
218
+ | 🚀 | **4 May 2026** | **v0.2.3 Released**: 5 new cloud backends (Groq, Together AI, Fireworks, Replicate, HuggingFace Inference) — 9 providers total. Unified ProviderRegistry, `effgen doctor` auth check, backend parity matrix. [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#023---2026-05-04) |
197
219
  | 🚀 | **25 Apr 2026** | **v0.2.1 Released**: Cerebras backend (4 free-tier models, streaming, native tool-calling, rate-limit coordinator, cost tracking) + OpenAI gpt-5/gpt-5.4-nano/o-series with `reasoning_effort`, prompt caching, structured outputs v2, and OpenAI native tools (web_search, code_interpreter, file_search). [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#021---2026-04-25) |
198
220
  | 🚀 | **9 Apr 2026** | **v0.2.0 Released**: Major release — native tool calling, guardrails, multi-agent orchestration, RAG pipeline, 31 tools, eval framework, production API server, MLX Apple Silicon support, Python & TypeScript SDKs. [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#020---2026-04-09) |
199
221
  | 🍎 | **8 Apr 2026** | **MLX & Apple Silicon support merged** (PR #4): Native Metal GPU acceleration via MLX & MLX-VLM backends. `pip install effgen[mlx]` |
@@ -392,6 +414,41 @@ Production API<br/>
392
414
 
393
415
  ---
394
416
 
417
+ ## 🆕 ModelRouter — Smart Multi-Provider Routing (v0.2.4)
418
+
419
+ Route requests across 9 cloud providers automatically — pick the cheapest, fastest, or first available:
420
+
421
+ ```python
422
+ from effgen import PolicyBasedRouter, RoutingContext, CostBasedPolicy, LatencyBasedPolicy
423
+ from effgen.models.capabilities import Capability
424
+
425
+ # Build a router: try fastest first, fall back to cheapest
426
+ router = PolicyBasedRouter(policies=[LatencyBasedPolicy(), CostBasedPolicy()])
427
+
428
+ ctx = RoutingContext(
429
+ prompt_tokens_estimate=500,
430
+ user_budget_usd=0.01, # stay within $0.01
431
+ latency_budget_ms=3000, # need response in under 3s
432
+ required_capabilities={Capability.chat},
433
+ )
434
+
435
+ decision = router.route(ctx)
436
+ print(decision.chosen) # e.g., ProviderModelPair("cerebras", "llama3.1-8b")
437
+ print(decision.eliminated) # [(pair, reason), ...] — fully explainable
438
+ ```
439
+
440
+ **Transparent failover** — `route_and_execute` retries on rate-limits, 5xx errors, or timeouts and seamlessly moves to the next-best provider.
441
+
442
+ **Cost dashboard** — track every API call:
443
+
444
+ ```bash
445
+ effgen cost today # per-provider per-model table
446
+ effgen cost week # rolling 7-day view
447
+ effgen cost set-budget 1.0 # set $1/day cap
448
+ ```
449
+
450
+ ---
451
+
395
452
  ## 🎯 Agent Presets
396
453
 
397
454
  Get started instantly with ready-to-use agent configurations:
@@ -36,6 +36,8 @@
36
36
 
37
37
  | | Date | Update |
38
38
  |:---:|:---|:---|
39
+ | 🚀 | **14 May 2026** | **v0.2.4 Released**: ModelRouter with CostBased/LatencyBased/FirstAvailable policies, transparent provider failover, cross-process SQLite rate-limit coordination, persistent cost tracker + `effgen cost` dashboard CLI. [See changelog](CHANGELOG.md#024---2026-05-14) |
40
+ | 🚀 | **4 May 2026** | **v0.2.3 Released**: 5 new cloud backends (Groq, Together AI, Fireworks, Replicate, HuggingFace Inference) — 9 providers total. Unified ProviderRegistry, `effgen doctor` auth check, backend parity matrix. [See changelog](CHANGELOG.md#023---2026-05-04) |
39
41
  | 🚀 | **28 Apr 2026** | **v0.2.2 Released**: Gemini 3.x/2.5/2.0 registry, `thinking_budget`, Google Search grounding, Files API, Gemini native tools (GoogleSearch, UrlContext, CodeExecution). Anthropic Claude 4.7 registry, extended thinking, prompt caching (`cache_control`), streaming polish, experimental native tools. [See changelog](CHANGELOG.md#022---2026-04-28) |
40
42
  | 🚀 | **25 Apr 2026** | **v0.2.1 Released**: Cerebras backend (4 free-tier models, streaming, native tool-calling, rate-limit coordinator, cost tracking) + OpenAI gpt-5/gpt-5.4-nano/o-series with `reasoning_effort`, prompt caching, structured outputs v2, and OpenAI native tools (web_search, code_interpreter, file_search). [See changelog](CHANGELOG.md#021---2026-04-25) |
41
43
  | 🚀 | **9 Apr 2026** | **v0.2.0 Released**: Major release — native tool calling, guardrails, multi-agent orchestration, RAG pipeline, 31 tools, eval framework, production API server, MLX Apple Silicon support, Python & TypeScript SDKs. [See changelog](CHANGELOG.md#020---2026-04-09) |
@@ -270,10 +272,91 @@ Production API<br/>
270
272
 
271
273
  ---
272
274
 
273
- ## 🆕 What's New in v0.2.2
275
+ ## 🆕 What's New in v0.2.4
274
276
 
275
277
  <details open>
276
- <summary><b>Top 5 features in v0.2.2</b></summary>
278
+ <summary><b>Top 5 features in v0.2.4 — ModelRouter & Cost Optimizer</b></summary>
279
+
280
+ 1. **`PolicyBasedRouter`** — composable routing engine with three built-in policies. Pick the cheapest provider within your budget, the fastest under your SLA, or simply the first available — and combine them freely.
281
+
282
+ ```python
283
+ from effgen import PolicyBasedRouter, RoutingContext, CostBasedPolicy, LatencyBasedPolicy
284
+ from effgen.models.capabilities import Capability
285
+
286
+ router = PolicyBasedRouter(policies=[LatencyBasedPolicy(), CostBasedPolicy()])
287
+ ctx = RoutingContext(
288
+ prompt_tokens_estimate=500,
289
+ user_budget_usd=0.01,
290
+ latency_budget_ms=3000,
291
+ required_capabilities={Capability.chat},
292
+ )
293
+ decision = router.route(ctx)
294
+ print(decision.chosen) # e.g., ProviderModelPair("cerebras", "llama3.1-8b")
295
+ print(decision.eliminated) # [(pair, reason), ...] — fully explainable
296
+ ```
297
+
298
+ 2. **Transparent failover** — `route_and_execute(ctx, fn)` retries on rate-limits / 5xx / timeouts and seamlessly moves to the next-best provider. Each hop fires a `RouterEvent` to registered subscribers.
299
+
300
+ ```python
301
+ from effgen import load_model
302
+
303
+ def call_provider(pair):
304
+ model = load_model(pair.model_id, provider=pair.provider)
305
+ return model.generate("Hello!").text
306
+
307
+ router.subscribe(
308
+ lambda event: print(
309
+ f"Failover: {event.from_provider}/{event.from_model} "
310
+ f"→ {event.to_provider}/{event.to_model}"
311
+ )
312
+ )
313
+ result = router.route_and_execute(ctx, call_provider)
314
+ ```
315
+
316
+ 3. **Cross-process SQLite rate-limit coordination** — share a single rate-limit budget across multiple workers:
317
+
318
+ ```python
319
+ from effgen import RateLimitCoordinator, SQLiteRateLimitStore
320
+
321
+ store = SQLiteRateLimitStore("~/.effgen/rate_limits.sqlite")
322
+ coordinator = RateLimitCoordinator(storage=store) # WAL-mode, BEGIN IMMEDIATE
323
+ ```
324
+
325
+ 4. **Persistent cost tracking + `effgen cost` CLI** — every API call persists to SQLite; query spend instantly:
326
+
327
+ ```bash
328
+ effgen cost today # per-provider per-model table
329
+ effgen cost week # rolling 7-day view
330
+ effgen cost by-provider # lifetime totals
331
+ effgen cost set-budget 1.0 # set $1/day cap (BudgetExceededError at 100%)
332
+ ```
333
+
334
+ 5. **Fully explainable decisions + budget guard** — `RouterDecision` records every eliminated provider and why (`"rate_limited"`, `"no_key"`, `"cost_exceeds_budget"`, `"latency_exceeds_sla"`). Configure a daily spend cap; the router automatically fails over to a free-tier provider when the budget is hit.
335
+
336
+ </details>
337
+
338
+ <details>
339
+ <summary><b>Top 5 features from v0.2.3</b></summary>
340
+
341
+ 1. **5 new cloud backends** — `GroqAdapter`, `TogetherAdapter`, `FireworksAdapter`, `ReplicateAdapter`, `HFInferenceAdapter` — each with streaming, native tools, rate-limit coordination, and cost tracking. 9 providers total.
342
+
343
+ ```python
344
+ model = load_model("llama-3.1-8b-instant", provider="groq")
345
+ model = load_model("Qwen/Qwen2.5-72B-Instruct", provider="hf")
346
+ ```
347
+
348
+ 2. **Unified ProviderRegistry** — `list_providers()`, `list_models(provider)`, `lookup(model_id)` consolidated across all 9 adapters. `AmbiguousModelError` on bare IDs shared across providers.
349
+
350
+ 3. **`effgen doctor`** — new CLI command showing which providers have API keys configured.
351
+
352
+ 4. **Backend parity matrix** — canonical agentic task ("(17 × 23) + sqrt(144) = 403") runs identically across all providers; streaming and error surfaces verified uniform. See `docs/providers/parity.md`.
353
+
354
+ 5. **HuggingFace Router support** — `HFInferenceAdapter` with 124-model dynamic catalog, `refresh_models()` + `check_drift()`, `ModelUnavailableError` with `suggest_alternatives()`, and custom Inference Endpoint URL.
355
+
356
+ </details>
357
+
358
+ <details>
359
+ <summary><b>Top 5 features from v0.2.2 (and earlier)</b></summary>
277
360
 
278
361
  1. **Gemini 3.x/2.5/2.0 + Gemma families** — full model registry with correct context windows, output limits, and feature flags; SDK migrated to `google-genai>=1.0.0`.
279
362
 
@@ -586,15 +669,47 @@ result = agent.run("What does the documentation say about configuration?")
586
669
 
587
670
  ## 🤖 Multi-Model Support
588
671
 
589
- effGen supports **7 inference backends** and is tested across 11+ model families:
672
+ effGen supports **9 cloud inference providers** + 4 local backends, tested across 11+ model families:
673
+
674
+ | Backend | Platform | Install | Best For |
675
+ |---------|----------|---------|----------|
676
+ | **MLX** | Apple Silicon (M1/M2/M3/M4) | `effgen[mlx]` | Native Metal GPU, unified memory, 4/8-bit quantization |
677
+ | **MLX-VLM** | Apple Silicon | `effgen[mlx-vlm]` | Vision-Language models (Qwen2-VL, LLaVA, Phi-3 Vision, 30+ architectures) |
678
+ | **vLLM** | NVIDIA GPU | `effgen[vllm]` | High-throughput batch inference |
679
+ | **Transformers** | Any (CPU/GPU) | *(bundled)* | Universal compatibility, local models |
680
+ | **OpenAI** | Cloud API | *(bundled)* | gpt-5/gpt-5.4/o-series, reasoning_effort, structured outputs, native tools |
681
+ | **Anthropic** | Cloud API | *(bundled)* | Claude 4.7/4.x, extended thinking, prompt caching, native tools |
682
+ | **Google Gemini** | Cloud API | *(bundled)* | Gemini 3.x/2.5/2.0, thinking_budget, grounding, Files API, native tools |
683
+ | **Cerebras** | Cloud API | `effgen[cerebras]` | 4 free-tier models (llama3.1-8b, qwen-3-235b), ultra-low latency |
684
+ | **Groq** | Cloud API | `effgen[groq]` | 16 models (llama-3.3-70b, mixtral, qwen3-32b), ultra-fast free-tier inference |
685
+ | **Together AI** | Cloud API | `effgen[together]` | 163-model catalog (llama, deepseek, qwen, mistral), per-model pricing |
686
+ | **Fireworks** | Cloud API | `effgen[fireworks]` | 80 chat models (54 tool-capable), serverless + dedicated |
687
+ | **Replicate** | Cloud API | `effgen[replicate]` | 38 models, async run-poll, SSE streaming, compute-second billing |
688
+ | **HuggingFace** | Cloud API | `effgen[hf]` | 124-model HF Router catalog, custom Inference Endpoints, free serverless tier |
689
+
690
+ ### Provider Auth Check
691
+
692
+ ```bash
693
+ # See which API keys are configured
694
+ effgen doctor
695
+ ```
696
+
697
+ ### Quick Cloud Start
590
698
 
591
- | Backend | Platform | Best For |
592
- |---------|----------|----------|
593
- | **MLX** | Apple Silicon (M1/M2/M3/M4) | Native Metal GPU, unified memory, 4/8-bit quantization |
594
- | **MLX-VLM** | Apple Silicon | Vision-Language models (Qwen2-VL, LLaVA, Phi-3 Vision, 30+ architectures) |
595
- | **vLLM** | NVIDIA GPU | High-throughput batch inference |
596
- | **Transformers** | Any (CPU/GPU) | Universal compatibility |
597
- | **API** | Cloud | OpenAI (gpt-5/gpt-5.4/o-series + reasoning_effort), Anthropic (Claude 4.7/4.x + thinking + caching), Google Gemini (3.x/2.5/2.0 + thinking_budget + grounding + Files API + native tools), Cerebras (4 free-tier models, streaming + native tools) |
699
+ ```python
700
+ from effgen import load_model, Agent
701
+ from effgen.core.agent import AgentConfig
702
+ from effgen.tools.builtin import Calculator
703
+
704
+ # Any of the 9 cloud providers
705
+ model = load_model("llama-3.1-8b-instant", provider="groq") # Groq
706
+ # model = load_model("meta-llama/Llama-3.3-70B-Instruct-Turbo", provider="together")
707
+ # model = load_model("Qwen/Qwen2.5-72B-Instruct", provider="hf")
708
+
709
+ agent = Agent(config=AgentConfig(name="agent", model=model, tools=[Calculator()]))
710
+ result = agent.run("What is (17 * 23) + sqrt(144)?")
711
+ print(result.output) # → 403
712
+ ```
598
713
 
599
714
  ### Top Recommended Models
600
715
 
@@ -37,6 +37,8 @@
37
37
 
38
38
  | | Date | Update |
39
39
  |:---:|:---|:---|
40
+ | 🚀 | **14 May 2026** | **v0.2.4 Released**: ModelRouter with CostBased/LatencyBased/FirstAvailable policies, transparent provider failover, cross-process SQLite rate-limit coordination, persistent cost tracker + `effgen cost` dashboard CLI. [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#024---2026-05-14) |
41
+ | 🚀 | **4 May 2026** | **v0.2.3 Released**: 5 new cloud backends (Groq, Together AI, Fireworks, Replicate, HuggingFace Inference) — 9 providers total. Unified ProviderRegistry, `effgen doctor` auth check, backend parity matrix. [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#023---2026-05-04) |
40
42
  | 🚀 | **25 Apr 2026** | **v0.2.1 Released**: Cerebras backend (4 free-tier models, streaming, native tool-calling, rate-limit coordinator, cost tracking) + OpenAI gpt-5/gpt-5.4-nano/o-series with `reasoning_effort`, prompt caching, structured outputs v2, and OpenAI native tools (web_search, code_interpreter, file_search). [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#021---2026-04-25) |
41
43
  | 🚀 | **9 Apr 2026** | **v0.2.0 Released**: Major release — native tool calling, guardrails, multi-agent orchestration, RAG pipeline, 31 tools, eval framework, production API server, MLX Apple Silicon support, Python & TypeScript SDKs. [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#020---2026-04-09) |
42
44
  | 🍎 | **8 Apr 2026** | **MLX & Apple Silicon support merged** (PR #4): Native Metal GPU acceleration via MLX & MLX-VLM backends. `pip install effgen[mlx]` |
@@ -235,6 +237,41 @@ Production API<br/>
235
237
 
236
238
  ---
237
239
 
240
+ ## 🆕 ModelRouter — Smart Multi-Provider Routing (v0.2.4)
241
+
242
+ Route requests across 9 cloud providers automatically — pick the cheapest, fastest, or first available:
243
+
244
+ ```python
245
+ from effgen import PolicyBasedRouter, RoutingContext, CostBasedPolicy, LatencyBasedPolicy
246
+ from effgen.models.capabilities import Capability
247
+
248
+ # Build a router: try fastest first, fall back to cheapest
249
+ router = PolicyBasedRouter(policies=[LatencyBasedPolicy(), CostBasedPolicy()])
250
+
251
+ ctx = RoutingContext(
252
+ prompt_tokens_estimate=500,
253
+ user_budget_usd=0.01, # stay within $0.01
254
+ latency_budget_ms=3000, # need response in under 3s
255
+ required_capabilities={Capability.chat},
256
+ )
257
+
258
+ decision = router.route(ctx)
259
+ print(decision.chosen) # e.g., ProviderModelPair("cerebras", "llama3.1-8b")
260
+ print(decision.eliminated) # [(pair, reason), ...] — fully explainable
261
+ ```
262
+
263
+ **Transparent failover** — `route_and_execute` retries on rate-limits, 5xx errors, or timeouts and seamlessly moves to the next-best provider.
264
+
265
+ **Cost dashboard** — track every API call:
266
+
267
+ ```bash
268
+ effgen cost today # per-provider per-model table
269
+ effgen cost week # rolling 7-day view
270
+ effgen cost set-budget 1.0 # set $1/day cap
271
+ ```
272
+
273
+ ---
274
+
238
275
  ## 🎯 Agent Presets
239
276
 
240
277
  Get started instantly with ready-to-use agent configurations:
@@ -9,7 +9,7 @@ This framework enables SLMs to function as powerful agentic systems through:
9
9
  - Comprehensive configuration management
10
10
  """
11
11
 
12
- __version__ = "0.2.2"
12
+ __version__ = "0.2.4"
13
13
  __author__ = "effGen Team"
14
14
  __license__ = "Apache-2.0"
15
15
 
@@ -74,30 +74,94 @@ from effgen.models import (
74
74
  AnthropicAdapter,
75
75
  BaseModel,
76
76
  CerebrasAdapter,
77
+ CostBasedPolicy,
78
+ CostTracker,
79
+ FireworksAdapter,
80
+ FirstAvailablePolicy,
77
81
  GeminiAdapter,
78
82
  GenerationConfig,
79
83
  GenerationResult,
84
+ GroqAdapter,
85
+ HFInferenceAdapter,
86
+ LatencyBasedPolicy,
87
+ LatencyTracker,
80
88
  ModelLoader,
81
89
  OpenAIAdapter,
90
+ PolicyBasedRouter,
91
+ ProviderModelPair,
92
+ ReplicateAdapter,
93
+ RetryPolicy,
94
+ RouterDecision,
95
+ RouterEvent,
96
+ RoutingContext,
97
+ RoutingPolicy,
98
+ SQLiteCostStore,
82
99
  StreamChunk,
100
+ TogetherAdapter,
83
101
  TransformersEngine,
84
102
  VLLMEngine,
85
103
  load_model,
86
104
  )
87
105
  from effgen.models._rate_limit import RateLimitCoordinator, RateLimitExceeded # noqa: I001
106
+ from effgen.models._rate_limit_store import SQLiteRateLimitStore # noqa: I001
107
+ from effgen.models.auth import check_keys
88
108
  from effgen.models.cerebras_models import available_models as cerebras_available_models
89
109
  from effgen.models.cerebras_models import free_tier_models as cerebras_free_tier_models
90
110
  from effgen.models.cerebras_models import model_info as cerebras_model_info
91
- from effgen.models.errors import ModelRefusalError, ToolIncompatibleError
111
+ from effgen.models.errors import ( # noqa: I001
112
+ AllCandidatesExhaustedError,
113
+ AmbiguousModelError,
114
+ BudgetExceededError,
115
+ InvalidRequestError,
116
+ ModelAuthError,
117
+ ModelNotFoundError,
118
+ ModelRefusalError,
119
+ ModelTimeoutError,
120
+ ModelUnavailableError,
121
+ NoCandidateWithinBudgetError,
122
+ ProviderTransientError,
123
+ ToolIncompatibleError,
124
+ )
125
+ from effgen.models.fireworks_models import available_models as fireworks_available_models
126
+ from effgen.models.fireworks_models import chat_models as fireworks_chat_models
127
+ from effgen.models.fireworks_models import pricing_table as fireworks_pricing_table
128
+ from effgen.models.fireworks_models import refresh_models as fireworks_refresh_models
129
+ from effgen.models.fireworks_models import tool_capable_models as fireworks_tool_capable_models
92
130
  from effgen.models.gemini_models import available_models as gemini_available_models
93
131
  from effgen.models.gemini_models import free_tier_models as gemini_free_tier_models
94
132
  from effgen.models.gemini_models import model_info as gemini_model_info
95
133
  from effgen.models.gemini_models import recommended_models as gemini_recommended_models
134
+ from effgen.models.groq_models import available_models as groq_available_models
135
+ from effgen.models.groq_models import chat_models as groq_chat_models
136
+ from effgen.models.groq_models import tool_capable_models as groq_tool_capable_models
137
+ from effgen.models.hf_inference_models import available_models as hf_available_models
138
+ from effgen.models.hf_inference_models import catalog_summary as hf_catalog_summary
139
+ from effgen.models.hf_inference_models import chat_models as hf_chat_models
140
+ from effgen.models.hf_inference_models import cheapest_provider as hf_cheapest_provider
141
+ from effgen.models.hf_inference_models import check_drift as hf_check_drift
142
+ from effgen.models.hf_inference_models import get_model_info as hf_get_model_info
143
+ from effgen.models.hf_inference_models import list_providers_for as hf_list_providers_for
144
+ from effgen.models.hf_inference_models import refresh_models as hf_refresh_models
145
+ from effgen.models.hf_inference_models import serverless_models as hf_serverless_models
146
+ from effgen.models.hf_inference_models import suggest_alternatives as hf_suggest_alternatives
147
+ from effgen.models.hf_inference_models import tool_capable_models as hf_tool_capable_models
96
148
  from effgen.models.openai_models import available_models as openai_available_models
97
149
  from effgen.models.openai_models import chat_models as openai_chat_models
98
150
  from effgen.models.openai_models import model_info as openai_model_info
99
151
  from effgen.models.openai_models import reasoning_models as openai_reasoning_models # noqa: I001
100
152
  from effgen.models.openai_schema import to_openai_schema
153
+ from effgen.models.registry import ProviderRegistry, list_models, list_providers, lookup
154
+ from effgen.models.replicate_models import available_models as replicate_available_models
155
+ from effgen.models.replicate_models import get_model_info as replicate_get_model_info
156
+ from effgen.models.replicate_models import refresh_models as replicate_refresh_models
157
+ from effgen.models.replicate_models import streaming_models as replicate_streaming_models
158
+ from effgen.models.replicate_models import tool_capable_models as replicate_tool_capable_models
159
+ from effgen.models.together_models import available_models as together_available_models
160
+ from effgen.models.together_models import chat_models as together_chat_models
161
+ from effgen.models.together_models import pricing_table as together_pricing_table
162
+ from effgen.models.together_models import refresh_models as together_refresh_models
163
+ from effgen.models.together_models import serverless_models as together_serverless_models
164
+ from effgen.models.together_models import tool_capable_models as together_tool_capable_models
101
165
 
102
166
  # Preset imports
103
167
  from effgen.presets import create_agent, list_presets
@@ -204,17 +268,60 @@ __all__ = [
204
268
  "StreamChunk",
205
269
  "GeminiAdapter",
206
270
  "CerebrasAdapter",
271
+ "GroqAdapter",
272
+ "TogetherAdapter",
273
+ "FireworksAdapter",
274
+ "ReplicateAdapter",
275
+ "HFInferenceAdapter",
207
276
  "ModelLoader",
208
277
  "GenerationConfig",
209
278
  "GenerationResult",
279
+ # Router (v0.2.4+)
280
+ "PolicyBasedRouter",
281
+ "RoutingPolicy",
282
+ "RoutingContext",
283
+ "RouterDecision",
284
+ "RouterEvent",
285
+ "ProviderModelPair",
286
+ "FirstAvailablePolicy",
287
+ "CostBasedPolicy",
288
+ "LatencyBasedPolicy",
289
+ "RetryPolicy",
290
+ # Tracking (v0.2.4+)
291
+ "LatencyTracker",
292
+ "CostTracker",
293
+ "SQLiteCostStore",
210
294
  "RateLimitCoordinator",
211
295
  "RateLimitExceeded",
296
+ "SQLiteRateLimitStore",
297
+ # Errors
212
298
  "ModelRefusalError",
299
+ "ModelAuthError",
300
+ "ModelTimeoutError",
301
+ "ModelUnavailableError",
302
+ "ModelNotFoundError",
303
+ "AmbiguousModelError",
304
+ "NoCandidateWithinBudgetError",
305
+ "ToolIncompatibleError",
306
+ "AllCandidatesExhaustedError",
307
+ "BudgetExceededError",
308
+ "ProviderTransientError",
309
+ "InvalidRequestError",
213
310
  "to_openai_schema",
311
+ # Provider registry + auth
312
+ "ProviderRegistry",
313
+ "list_providers",
314
+ "list_models",
315
+ "lookup",
316
+ "check_keys",
214
317
  # Cerebras helpers
215
318
  "cerebras_available_models",
216
319
  "cerebras_free_tier_models",
217
320
  "cerebras_model_info",
321
+ # Groq helpers
322
+ "groq_available_models",
323
+ "groq_chat_models",
324
+ "groq_tool_capable_models",
218
325
  # OpenAI helpers
219
326
  "openai_available_models",
220
327
  "openai_chat_models",
@@ -225,6 +332,37 @@ __all__ = [
225
332
  "gemini_free_tier_models",
226
333
  "gemini_model_info",
227
334
  "gemini_recommended_models",
335
+ # Together helpers
336
+ "together_available_models",
337
+ "together_chat_models",
338
+ "together_tool_capable_models",
339
+ "together_pricing_table",
340
+ "together_refresh_models",
341
+ "together_serverless_models",
342
+ # Fireworks helpers
343
+ "fireworks_available_models",
344
+ "fireworks_chat_models",
345
+ "fireworks_tool_capable_models",
346
+ "fireworks_pricing_table",
347
+ "fireworks_refresh_models",
348
+ # Replicate helpers
349
+ "replicate_available_models",
350
+ "replicate_streaming_models",
351
+ "replicate_tool_capable_models",
352
+ "replicate_refresh_models",
353
+ "replicate_get_model_info",
354
+ # HF Inference helpers
355
+ "hf_available_models",
356
+ "hf_chat_models",
357
+ "hf_tool_capable_models",
358
+ "hf_serverless_models",
359
+ "hf_suggest_alternatives",
360
+ "hf_get_model_info",
361
+ "hf_refresh_models",
362
+ "hf_check_drift",
363
+ "hf_catalog_summary",
364
+ "hf_list_providers_for",
365
+ "hf_cheapest_provider",
228
366
 
229
367
  # Tools
230
368
  "BaseTool",
@@ -1,6 +1,6 @@
1
1
  """effGen API Server v2 — Production Gateway.
2
2
 
3
- Phase 12 modules:
3
+ Modules:
4
4
  - openai_compat: OpenAI-compatible /v1/chat/completions and /v1/completions
5
5
  - queue: RequestQueue with priority, fair scheduling, backpressure
6
6
  - pool: AgentPool with min/max size and auto-scaling
@@ -1,4 +1,4 @@
1
- """effGen caching subsystem (Phase 14).
1
+ """effGen caching subsystem.
2
2
 
3
3
  Provides prompt-prefix caching and result caching for tools and agents.
4
4
  All components are pure-Python and have no required external dependencies.