effgen 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {effgen-0.2.2/effgen.egg-info → effgen-0.2.4}/PKG-INFO +58 -1
- {effgen-0.2.2 → effgen-0.2.4}/README.md +125 -10
- {effgen-0.2.2 → effgen-0.2.4}/README_PYPI.md +37 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/__init__.py +140 -2
- {effgen-0.2.2 → effgen-0.2.4}/effgen/api/__init__.py +1 -1
- {effgen-0.2.2 → effgen-0.2.4}/effgen/cache/__init__.py +1 -1
- {effgen-0.2.2 → effgen-0.2.4}/effgen/cli.py +317 -11
- {effgen-0.2.2 → effgen-0.2.4}/effgen/client/client.py +2 -2
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/__init__.py +3 -3
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/agent.py +128 -16
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/structured_output.py +1 -1
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/tool_calling.py +1 -1
- {effgen-0.2.2 → effgen-0.2.4}/effgen/memory/token_budget.py +1 -1
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/__init__.py +95 -2
- effgen-0.2.4/effgen/models/_cost.py +467 -0
- effgen-0.2.4/effgen/models/_cost_store.py +207 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/_rate_limit.py +184 -17
- effgen-0.2.4/effgen/models/_rate_limit_store.py +392 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/anthropic_adapter.py +54 -5
- effgen-0.2.4/effgen/models/auth.py +56 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/capabilities.py +16 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/cerebras_adapter.py +194 -93
- effgen-0.2.4/effgen/models/errors.py +342 -0
- effgen-0.2.4/effgen/models/fireworks_adapter.py +706 -0
- effgen-0.2.4/effgen/models/fireworks_models.py +1379 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/gemini_adapter.py +83 -18
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/gemini_models.py +37 -0
- effgen-0.2.4/effgen/models/groq_adapter.py +734 -0
- effgen-0.2.4/effgen/models/groq_models.py +298 -0
- effgen-0.2.4/effgen/models/hf_inference_adapter.py +860 -0
- effgen-0.2.4/effgen/models/hf_inference_models.py +382 -0
- effgen-0.2.4/effgen/models/latency_tracker.py +220 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/model_loader.py +136 -1
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/openai_adapter.py +56 -8
- effgen-0.2.4/effgen/models/registry.py +244 -0
- effgen-0.2.4/effgen/models/replicate_adapter.py +876 -0
- effgen-0.2.4/effgen/models/replicate_models.py +836 -0
- effgen-0.2.4/effgen/models/router.py +872 -0
- effgen-0.2.4/effgen/models/routing/__init__.py +8 -0
- effgen-0.2.4/effgen/models/routing/_probe.py +203 -0
- effgen-0.2.4/effgen/models/routing/cost.py +290 -0
- effgen-0.2.4/effgen/models/routing/first_available.py +72 -0
- effgen-0.2.4/effgen/models/routing/latency.py +237 -0
- effgen-0.2.4/effgen/models/routing/retry.py +168 -0
- effgen-0.2.4/effgen/models/together_adapter.py +742 -0
- effgen-0.2.4/effgen/models/together_models.py +2637 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/transformers_engine.py +34 -6
- {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/ingest.py +1 -1
- {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/reranker.py +1 -1
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/base_tool.py +2 -2
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/calculator.py +2 -2
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/json_tool.py +4 -4
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp/protocol.py +1 -1
- {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/validators.py +2 -2
- {effgen-0.2.2 → effgen-0.2.4/effgen.egg-info}/PKG-INFO +58 -1
- {effgen-0.2.2 → effgen-0.2.4}/effgen.egg-info/SOURCES.txt +21 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen.egg-info/requires.txt +25 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen.egg-info/top_level.txt +0 -1
- {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/conversational_agent.py +2 -2
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/native_tools_hybrid_agent.py +2 -3
- {effgen-0.2.2 → effgen-0.2.4}/examples/tools/coding_agent.py +3 -4
- {effgen-0.2.2 → effgen-0.2.4}/examples/utils/sweep_model.py +3 -3
- {effgen-0.2.2 → effgen-0.2.4}/pyproject.toml +27 -1
- effgen-0.2.2/effgen/models/_cost.py +0 -234
- effgen-0.2.2/effgen/models/errors.py +0 -50
- effgen-0.2.2/effgen/models/router.py +0 -396
- {effgen-0.2.2 → effgen-0.2.4}/LICENSE +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/api/embeddings.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/api/middleware.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/api/openai_compat.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/api/pool.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/api/queue.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/api/tenancy.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/cache/prompt_cache.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/cache/result_cache.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/client/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/client/exceptions.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/completion.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/config/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/config/loader.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/config/schemas/agent_config.schema.json +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/config/schemas/model_config.schema.json +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/config/schemas/tool_config.schema.json +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/config/validator.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/aggregation.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/background.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/batch.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/checkpoint.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/clarification.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/complexity_analyzer.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/decomposition_engine.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/execution_tracker.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/feedback.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/human_loop.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/lifecycle.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/message_bus.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/orchestrator.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/router.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/session.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/shared_state.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/state.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/sub_agent_manager.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/task.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/core/workflow.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/debug/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/debug/inspector.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/domains/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/domains/base.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/domains/expander.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/domains/presets.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/eval/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/eval/comparison.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/eval/evaluator.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/eval/regression.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/eval/suites.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/execution/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/execution/docker_sandbox.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/execution/sandbox.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/execution/validators.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/gpu/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/gpu/allocator.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/gpu/monitor.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/gpu/utils.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/guardrails/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/guardrails/base.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/guardrails/content.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/guardrails/injection.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/guardrails/presets.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/guardrails/tool_safety.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/hardware/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/hardware/platform.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/memory/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/memory/long_term.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/memory/short_term.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/memory/vector_store.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/anthropic_cache.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/anthropic_models.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/base.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/batching.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/cerebras_models.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/gemini_files.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/gguf_engine.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/lazy.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/mlx_engine.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/mlx_vlm_engine.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/openai_models.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/openai_schema.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/pool.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/models/vllm_engine.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/presets/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/presets/registry.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/agent_system_prompt.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/chain_manager.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/optimizer.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/template_manager.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/templates/analysis.yaml +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/templates/coding.yaml +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/templates/general.yaml +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/templates/reasoning.yaml +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/prompts/tool_prompt_generator.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/py.typed +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/attribution.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/chunking.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/context_builder.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/rag/search.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/agentic_search.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/anthropic_native.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/bash_tool.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/code_executor.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/communication.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/data_analysis.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/datetime_tool.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/devops.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/file_ops.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/finance.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/gemini_native.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/knowledge.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/openai_native.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/python_repl.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/retrieval.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/text_processing.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/url_fetch.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/weather.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/web_search.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/builtin/wikipedia_tool.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/fallback.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/plugin.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/a2a/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/a2a/agent_card.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/a2a/client.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/a2a/protocol.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/acp/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/acp/client.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/acp/protocol.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/acp/server.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp/client.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp/server.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp_official/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp_official/client.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/protocols/mcp_official/server.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/tools/registry.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/circuit_breaker.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/health.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/logging.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/metrics.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/prometheus_metrics.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/structured_logging.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen/utils/tracing.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen.egg-info/dependency_links.txt +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen.egg-info/entry_points.txt +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/effgen.egg-info/not-zip-safe +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/advanced_streaming_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/agent_communication.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/async_concurrent_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/data_processing_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/error_recovery_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/advanced/multi_agent_pipeline.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/basic/agent_viz_mlx.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/basic/basic_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/basic/basic_agent_mlx.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/basic/basic_agent_vllm.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/basic/calculator_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/basic/chat_gui_mlx.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/basic/qa_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/basic/tool_builder_gui.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/basic/tool_tester_gui.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/basic_cerebras.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_all_models.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_cost_tracker.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_hard_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_load_model.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_multi_turn.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_rate_limits.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_streaming.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/cerebras/cerebras_tool_calling.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/data/download_arc.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/__init__.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/basic_chat.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/caching_and_structured_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/multi_turn_chat.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/native_tools_code_interpreter.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/native_tools_file_search.py +2 -2
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/native_tools_web_search.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/openai_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/prompt_caching.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/reasoning_models.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/structured_outputs.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/openai/tool_calling.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/plugins_presets/plugin_example.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/plugins_presets/preset_agents.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/tools/advanced_multi_tool_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/tools/file_operations_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/tools/multi_tool_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/web_retrieval/agentic_search_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/web_retrieval/memory_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/web_retrieval/retrieval_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/web_retrieval/streaming_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/web_retrieval/weather_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/examples/web_retrieval/web_agent.py +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/setup.cfg +0 -0
- {effgen-0.2.2 → effgen-0.2.4}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: effgen
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: A comprehensive framework for building agents with Small Language Models
|
|
5
5
|
Home-page: https://github.com/ctrl-gaurav/effGen
|
|
6
6
|
Author: Gaurav Srivastava
|
|
@@ -108,6 +108,16 @@ Provides-Extra: gguf
|
|
|
108
108
|
Requires-Dist: llama-cpp-python>=0.2.0; extra == "gguf"
|
|
109
109
|
Provides-Extra: cerebras
|
|
110
110
|
Requires-Dist: cerebras-cloud-sdk>=1.0; extra == "cerebras"
|
|
111
|
+
Provides-Extra: groq
|
|
112
|
+
Requires-Dist: groq>=0.15; extra == "groq"
|
|
113
|
+
Provides-Extra: together
|
|
114
|
+
Requires-Dist: together>=1.3; extra == "together"
|
|
115
|
+
Provides-Extra: fireworks
|
|
116
|
+
Requires-Dist: fireworks-ai>=0.15; extra == "fireworks"
|
|
117
|
+
Provides-Extra: replicate
|
|
118
|
+
Requires-Dist: replicate>=1.0; extra == "replicate"
|
|
119
|
+
Provides-Extra: hf
|
|
120
|
+
Requires-Dist: huggingface_hub>=0.26; extra == "hf"
|
|
111
121
|
Provides-Extra: flash-attn
|
|
112
122
|
Requires-Dist: flash-attn>=2.3.0; extra == "flash-attn"
|
|
113
123
|
Provides-Extra: vector-db
|
|
@@ -127,6 +137,11 @@ Provides-Extra: monitoring
|
|
|
127
137
|
Requires-Dist: wandb>=0.16.0; extra == "monitoring"
|
|
128
138
|
Requires-Dist: tensorboard>=2.15.0; extra == "monitoring"
|
|
129
139
|
Provides-Extra: all
|
|
140
|
+
Requires-Dist: pytest>=7.4.0; extra == "all"
|
|
141
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "all"
|
|
142
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "all"
|
|
143
|
+
Requires-Dist: pytest-timeout>=2.2.0; extra == "all"
|
|
144
|
+
Requires-Dist: pytest-forked>=1.6.0; extra == "all"
|
|
130
145
|
Requires-Dist: vllm>=0.2.7; extra == "all"
|
|
131
146
|
Requires-Dist: faiss-cpu>=1.7.4; extra == "all"
|
|
132
147
|
Requires-Dist: chromadb>=0.4.18; extra == "all"
|
|
@@ -148,6 +163,11 @@ Requires-Dist: rouge-score>=0.1.2; extra == "all"
|
|
|
148
163
|
Requires-Dist: nltk>=3.8.0; extra == "all"
|
|
149
164
|
Requires-Dist: llama-cpp-python>=0.2.0; extra == "all"
|
|
150
165
|
Requires-Dist: cerebras-cloud-sdk>=1.0; extra == "all"
|
|
166
|
+
Requires-Dist: groq>=0.15; extra == "all"
|
|
167
|
+
Requires-Dist: together>=1.3; extra == "all"
|
|
168
|
+
Requires-Dist: fireworks-ai>=0.15; extra == "all"
|
|
169
|
+
Requires-Dist: replicate>=1.0; extra == "all"
|
|
170
|
+
Requires-Dist: huggingface_hub>=0.26; extra == "all"
|
|
151
171
|
Requires-Dist: bitsandbytes>=0.46.1; extra == "all"
|
|
152
172
|
Requires-Dist: datasets>=2.14.0; extra == "all"
|
|
153
173
|
Dynamic: author
|
|
@@ -194,6 +214,8 @@ Dynamic: requires-python
|
|
|
194
214
|
|
|
195
215
|
| | Date | Update |
|
|
196
216
|
|:---:|:---|:---|
|
|
217
|
+
| 🚀 | **14 May 2026** | **v0.2.4 Released**: ModelRouter with CostBased/LatencyBased/FirstAvailable policies, transparent provider failover, cross-process SQLite rate-limit coordination, persistent cost tracker + `effgen cost` dashboard CLI. [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#024---2026-05-14) |
|
|
218
|
+
| 🚀 | **4 May 2026** | **v0.2.3 Released**: 5 new cloud backends (Groq, Together AI, Fireworks, Replicate, HuggingFace Inference) — 9 providers total. Unified ProviderRegistry, `effgen doctor` auth check, backend parity matrix. [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#023---2026-05-04) |
|
|
197
219
|
| 🚀 | **25 Apr 2026** | **v0.2.1 Released**: Cerebras backend (4 free-tier models, streaming, native tool-calling, rate-limit coordinator, cost tracking) + OpenAI gpt-5/gpt-5.4-nano/o-series with `reasoning_effort`, prompt caching, structured outputs v2, and OpenAI native tools (web_search, code_interpreter, file_search). [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#021---2026-04-25) |
|
|
198
220
|
| 🚀 | **9 Apr 2026** | **v0.2.0 Released**: Major release — native tool calling, guardrails, multi-agent orchestration, RAG pipeline, 31 tools, eval framework, production API server, MLX Apple Silicon support, Python & TypeScript SDKs. [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#020---2026-04-09) |
|
|
199
221
|
| 🍎 | **8 Apr 2026** | **MLX & Apple Silicon support merged** (PR #4): Native Metal GPU acceleration via MLX & MLX-VLM backends. `pip install effgen[mlx]` |
|
|
@@ -392,6 +414,41 @@ Production API<br/>
|
|
|
392
414
|
|
|
393
415
|
---
|
|
394
416
|
|
|
417
|
+
## 🆕 ModelRouter — Smart Multi-Provider Routing (v0.2.4)
|
|
418
|
+
|
|
419
|
+
Route requests across 9 cloud providers automatically — pick the cheapest, fastest, or first available:
|
|
420
|
+
|
|
421
|
+
```python
|
|
422
|
+
from effgen import PolicyBasedRouter, RoutingContext, CostBasedPolicy, LatencyBasedPolicy
|
|
423
|
+
from effgen.models.capabilities import Capability
|
|
424
|
+
|
|
425
|
+
# Build a router: try fastest first, fall back to cheapest
|
|
426
|
+
router = PolicyBasedRouter(policies=[LatencyBasedPolicy(), CostBasedPolicy()])
|
|
427
|
+
|
|
428
|
+
ctx = RoutingContext(
|
|
429
|
+
prompt_tokens_estimate=500,
|
|
430
|
+
user_budget_usd=0.01, # stay within $0.01
|
|
431
|
+
latency_budget_ms=3000, # need response in under 3s
|
|
432
|
+
required_capabilities={Capability.chat},
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
decision = router.route(ctx)
|
|
436
|
+
print(decision.chosen) # e.g., ProviderModelPair("cerebras", "llama3.1-8b")
|
|
437
|
+
print(decision.eliminated) # [(pair, reason), ...] — fully explainable
|
|
438
|
+
```
|
|
439
|
+
|
|
440
|
+
**Transparent failover** — `route_and_execute` retries on rate-limits, 5xx errors, or timeouts and seamlessly moves to the next-best provider.
|
|
441
|
+
|
|
442
|
+
**Cost dashboard** — track every API call:
|
|
443
|
+
|
|
444
|
+
```bash
|
|
445
|
+
effgen cost today # per-provider per-model table
|
|
446
|
+
effgen cost week # rolling 7-day view
|
|
447
|
+
effgen cost set-budget 1.0 # set $1/day cap
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
---
|
|
451
|
+
|
|
395
452
|
## 🎯 Agent Presets
|
|
396
453
|
|
|
397
454
|
Get started instantly with ready-to-use agent configurations:
|
|
@@ -36,6 +36,8 @@
|
|
|
36
36
|
|
|
37
37
|
| | Date | Update |
|
|
38
38
|
|:---:|:---|:---|
|
|
39
|
+
| 🚀 | **14 May 2026** | **v0.2.4 Released**: ModelRouter with CostBased/LatencyBased/FirstAvailable policies, transparent provider failover, cross-process SQLite rate-limit coordination, persistent cost tracker + `effgen cost` dashboard CLI. [See changelog](CHANGELOG.md#024---2026-05-14) |
|
|
40
|
+
| 🚀 | **4 May 2026** | **v0.2.3 Released**: 5 new cloud backends (Groq, Together AI, Fireworks, Replicate, HuggingFace Inference) — 9 providers total. Unified ProviderRegistry, `effgen doctor` auth check, backend parity matrix. [See changelog](CHANGELOG.md#023---2026-05-04) |
|
|
39
41
|
| 🚀 | **28 Apr 2026** | **v0.2.2 Released**: Gemini 3.x/2.5/2.0 registry, `thinking_budget`, Google Search grounding, Files API, Gemini native tools (GoogleSearch, UrlContext, CodeExecution). Anthropic Claude 4.7 registry, extended thinking, prompt caching (`cache_control`), streaming polish, experimental native tools. [See changelog](CHANGELOG.md#022---2026-04-28) |
|
|
40
42
|
| 🚀 | **25 Apr 2026** | **v0.2.1 Released**: Cerebras backend (4 free-tier models, streaming, native tool-calling, rate-limit coordinator, cost tracking) + OpenAI gpt-5/gpt-5.4-nano/o-series with `reasoning_effort`, prompt caching, structured outputs v2, and OpenAI native tools (web_search, code_interpreter, file_search). [See changelog](CHANGELOG.md#021---2026-04-25) |
|
|
41
43
|
| 🚀 | **9 Apr 2026** | **v0.2.0 Released**: Major release — native tool calling, guardrails, multi-agent orchestration, RAG pipeline, 31 tools, eval framework, production API server, MLX Apple Silicon support, Python & TypeScript SDKs. [See changelog](CHANGELOG.md#020---2026-04-09) |
|
|
@@ -270,10 +272,91 @@ Production API<br/>
|
|
|
270
272
|
|
|
271
273
|
---
|
|
272
274
|
|
|
273
|
-
## 🆕 What's New in v0.2.
|
|
275
|
+
## 🆕 What's New in v0.2.4
|
|
274
276
|
|
|
275
277
|
<details open>
|
|
276
|
-
<summary><b>Top 5 features in v0.2.
|
|
278
|
+
<summary><b>Top 5 features in v0.2.4 — ModelRouter & Cost Optimizer</b></summary>
|
|
279
|
+
|
|
280
|
+
1. **`PolicyBasedRouter`** — composable routing engine with three built-in policies. Pick the cheapest provider within your budget, the fastest under your SLA, or simply the first available — and combine them freely.
|
|
281
|
+
|
|
282
|
+
```python
|
|
283
|
+
from effgen import PolicyBasedRouter, RoutingContext, CostBasedPolicy, LatencyBasedPolicy
|
|
284
|
+
from effgen.models.capabilities import Capability
|
|
285
|
+
|
|
286
|
+
router = PolicyBasedRouter(policies=[LatencyBasedPolicy(), CostBasedPolicy()])
|
|
287
|
+
ctx = RoutingContext(
|
|
288
|
+
prompt_tokens_estimate=500,
|
|
289
|
+
user_budget_usd=0.01,
|
|
290
|
+
latency_budget_ms=3000,
|
|
291
|
+
required_capabilities={Capability.chat},
|
|
292
|
+
)
|
|
293
|
+
decision = router.route(ctx)
|
|
294
|
+
print(decision.chosen) # e.g., ProviderModelPair("cerebras", "llama3.1-8b")
|
|
295
|
+
print(decision.eliminated) # [(pair, reason), ...] — fully explainable
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
2. **Transparent failover** — `route_and_execute(ctx, fn)` retries on rate-limits / 5xx / timeouts and seamlessly moves to the next-best provider. Each hop fires a `RouterEvent` to registered subscribers.
|
|
299
|
+
|
|
300
|
+
```python
|
|
301
|
+
from effgen import load_model
|
|
302
|
+
|
|
303
|
+
def call_provider(pair):
|
|
304
|
+
model = load_model(pair.model_id, provider=pair.provider)
|
|
305
|
+
return model.generate("Hello!").text
|
|
306
|
+
|
|
307
|
+
router.subscribe(
|
|
308
|
+
lambda event: print(
|
|
309
|
+
f"Failover: {event.from_provider}/{event.from_model} "
|
|
310
|
+
f"→ {event.to_provider}/{event.to_model}"
|
|
311
|
+
)
|
|
312
|
+
)
|
|
313
|
+
result = router.route_and_execute(ctx, call_provider)
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
3. **Cross-process SQLite rate-limit coordination** — share a single rate-limit budget across multiple workers:
|
|
317
|
+
|
|
318
|
+
```python
|
|
319
|
+
from effgen import RateLimitCoordinator, SQLiteRateLimitStore
|
|
320
|
+
|
|
321
|
+
store = SQLiteRateLimitStore("~/.effgen/rate_limits.sqlite")
|
|
322
|
+
coordinator = RateLimitCoordinator(storage=store) # WAL-mode, BEGIN IMMEDIATE
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
4. **Persistent cost tracking + `effgen cost` CLI** — every API call persists to SQLite; query spend instantly:
|
|
326
|
+
|
|
327
|
+
```bash
|
|
328
|
+
effgen cost today # per-provider per-model table
|
|
329
|
+
effgen cost week # rolling 7-day view
|
|
330
|
+
effgen cost by-provider # lifetime totals
|
|
331
|
+
effgen cost set-budget 1.0 # set $1/day cap (BudgetExceededError at 100%)
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
5. **Fully explainable decisions + budget guard** — `RouterDecision` records every eliminated provider and why (`"rate_limited"`, `"no_key"`, `"cost_exceeds_budget"`, `"latency_exceeds_sla"`). Configure a daily spend cap; the router automatically fails over to a free-tier provider when the budget is hit.
|
|
335
|
+
|
|
336
|
+
</details>
|
|
337
|
+
|
|
338
|
+
<details>
|
|
339
|
+
<summary><b>Top 5 features from v0.2.3</b></summary>
|
|
340
|
+
|
|
341
|
+
1. **5 new cloud backends** — `GroqAdapter`, `TogetherAdapter`, `FireworksAdapter`, `ReplicateAdapter`, `HFInferenceAdapter` — each with streaming, native tools, rate-limit coordination, and cost tracking. 9 providers total.
|
|
342
|
+
|
|
343
|
+
```python
|
|
344
|
+
model = load_model("llama-3.1-8b-instant", provider="groq")
|
|
345
|
+
model = load_model("Qwen/Qwen2.5-72B-Instruct", provider="hf")
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
2. **Unified ProviderRegistry** — `list_providers()`, `list_models(provider)`, `lookup(model_id)` consolidated across all 9 adapters. `AmbiguousModelError` on bare IDs shared across providers.
|
|
349
|
+
|
|
350
|
+
3. **`effgen doctor`** — new CLI command showing which providers have API keys configured.
|
|
351
|
+
|
|
352
|
+
4. **Backend parity matrix** — canonical agentic task ("(17 × 23) + sqrt(144) = 403") runs identically across all providers; streaming and error surfaces verified uniform. See `docs/providers/parity.md`.
|
|
353
|
+
|
|
354
|
+
5. **HuggingFace Router support** — `HFInferenceAdapter` with 124-model dynamic catalog, `refresh_models()` + `check_drift()`, `ModelUnavailableError` with `suggest_alternatives()`, and custom Inference Endpoint URL.
|
|
355
|
+
|
|
356
|
+
</details>
|
|
357
|
+
|
|
358
|
+
<details>
|
|
359
|
+
<summary><b>Top 5 features from v0.2.2 (and earlier)</b></summary>
|
|
277
360
|
|
|
278
361
|
1. **Gemini 3.x/2.5/2.0 + Gemma families** — full model registry with correct context windows, output limits, and feature flags; SDK migrated to `google-genai>=1.0.0`.
|
|
279
362
|
|
|
@@ -586,15 +669,47 @@ result = agent.run("What does the documentation say about configuration?")
|
|
|
586
669
|
|
|
587
670
|
## 🤖 Multi-Model Support
|
|
588
671
|
|
|
589
|
-
effGen supports **
|
|
672
|
+
effGen supports **9 cloud inference providers** + 4 local backends, tested across 11+ model families:
|
|
673
|
+
|
|
674
|
+
| Backend | Platform | Install | Best For |
|
|
675
|
+
|---------|----------|---------|----------|
|
|
676
|
+
| **MLX** | Apple Silicon (M1/M2/M3/M4) | `effgen[mlx]` | Native Metal GPU, unified memory, 4/8-bit quantization |
|
|
677
|
+
| **MLX-VLM** | Apple Silicon | `effgen[mlx-vlm]` | Vision-Language models (Qwen2-VL, LLaVA, Phi-3 Vision, 30+ architectures) |
|
|
678
|
+
| **vLLM** | NVIDIA GPU | `effgen[vllm]` | High-throughput batch inference |
|
|
679
|
+
| **Transformers** | Any (CPU/GPU) | *(bundled)* | Universal compatibility, local models |
|
|
680
|
+
| **OpenAI** | Cloud API | *(bundled)* | gpt-5/gpt-5.4/o-series, reasoning_effort, structured outputs, native tools |
|
|
681
|
+
| **Anthropic** | Cloud API | *(bundled)* | Claude 4.7/4.x, extended thinking, prompt caching, native tools |
|
|
682
|
+
| **Google Gemini** | Cloud API | *(bundled)* | Gemini 3.x/2.5/2.0, thinking_budget, grounding, Files API, native tools |
|
|
683
|
+
| **Cerebras** | Cloud API | `effgen[cerebras]` | 4 free-tier models (llama3.1-8b, qwen-3-235b), ultra-low latency |
|
|
684
|
+
| **Groq** | Cloud API | `effgen[groq]` | 16 models (llama-3.3-70b, mixtral, qwen3-32b), ultra-fast free-tier inference |
|
|
685
|
+
| **Together AI** | Cloud API | `effgen[together]` | 163-model catalog (llama, deepseek, qwen, mistral), per-model pricing |
|
|
686
|
+
| **Fireworks** | Cloud API | `effgen[fireworks]` | 80 chat models (54 tool-capable), serverless + dedicated |
|
|
687
|
+
| **Replicate** | Cloud API | `effgen[replicate]` | 38 models, async run-poll, SSE streaming, compute-second billing |
|
|
688
|
+
| **HuggingFace** | Cloud API | `effgen[hf]` | 124-model HF Router catalog, custom Inference Endpoints, free serverless tier |
|
|
689
|
+
|
|
690
|
+
### Provider Auth Check
|
|
691
|
+
|
|
692
|
+
```bash
|
|
693
|
+
# See which API keys are configured
|
|
694
|
+
effgen doctor
|
|
695
|
+
```
|
|
696
|
+
|
|
697
|
+
### Quick Cloud Start
|
|
590
698
|
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
699
|
+
```python
|
|
700
|
+
from effgen import load_model, Agent
|
|
701
|
+
from effgen.core.agent import AgentConfig
|
|
702
|
+
from effgen.tools.builtin import Calculator
|
|
703
|
+
|
|
704
|
+
# Any of the 9 cloud providers
|
|
705
|
+
model = load_model("llama-3.1-8b-instant", provider="groq") # Groq
|
|
706
|
+
# model = load_model("meta-llama/Llama-3.3-70B-Instruct-Turbo", provider="together")
|
|
707
|
+
# model = load_model("Qwen/Qwen2.5-72B-Instruct", provider="hf")
|
|
708
|
+
|
|
709
|
+
agent = Agent(config=AgentConfig(name="agent", model=model, tools=[Calculator()]))
|
|
710
|
+
result = agent.run("What is (17 * 23) + sqrt(144)?")
|
|
711
|
+
print(result.output) # → 403
|
|
712
|
+
```
|
|
598
713
|
|
|
599
714
|
### Top Recommended Models
|
|
600
715
|
|
|
@@ -37,6 +37,8 @@
|
|
|
37
37
|
|
|
38
38
|
| | Date | Update |
|
|
39
39
|
|:---:|:---|:---|
|
|
40
|
+
| 🚀 | **14 May 2026** | **v0.2.4 Released**: ModelRouter with CostBased/LatencyBased/FirstAvailable policies, transparent provider failover, cross-process SQLite rate-limit coordination, persistent cost tracker + `effgen cost` dashboard CLI. [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#024---2026-05-14) |
|
|
41
|
+
| 🚀 | **4 May 2026** | **v0.2.3 Released**: 5 new cloud backends (Groq, Together AI, Fireworks, Replicate, HuggingFace Inference) — 9 providers total. Unified ProviderRegistry, `effgen doctor` auth check, backend parity matrix. [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#023---2026-05-04) |
|
|
40
42
|
| 🚀 | **25 Apr 2026** | **v0.2.1 Released**: Cerebras backend (4 free-tier models, streaming, native tool-calling, rate-limit coordinator, cost tracking) + OpenAI gpt-5/gpt-5.4-nano/o-series with `reasoning_effort`, prompt caching, structured outputs v2, and OpenAI native tools (web_search, code_interpreter, file_search). [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#021---2026-04-25) |
|
|
41
43
|
| 🚀 | **9 Apr 2026** | **v0.2.0 Released**: Major release — native tool calling, guardrails, multi-agent orchestration, RAG pipeline, 31 tools, eval framework, production API server, MLX Apple Silicon support, Python & TypeScript SDKs. [See changelog](https://github.com/ctrl-gaurav/effGen/blob/main/CHANGELOG.md#020---2026-04-09) |
|
|
42
44
|
| 🍎 | **8 Apr 2026** | **MLX & Apple Silicon support merged** (PR #4): Native Metal GPU acceleration via MLX & MLX-VLM backends. `pip install effgen[mlx]` |
|
|
@@ -235,6 +237,41 @@ Production API<br/>
|
|
|
235
237
|
|
|
236
238
|
---
|
|
237
239
|
|
|
240
|
+
## 🆕 ModelRouter — Smart Multi-Provider Routing (v0.2.4)
|
|
241
|
+
|
|
242
|
+
Route requests across 9 cloud providers automatically — pick the cheapest, fastest, or first available:
|
|
243
|
+
|
|
244
|
+
```python
|
|
245
|
+
from effgen import PolicyBasedRouter, RoutingContext, CostBasedPolicy, LatencyBasedPolicy
|
|
246
|
+
from effgen.models.capabilities import Capability
|
|
247
|
+
|
|
248
|
+
# Build a router: try fastest first, fall back to cheapest
|
|
249
|
+
router = PolicyBasedRouter(policies=[LatencyBasedPolicy(), CostBasedPolicy()])
|
|
250
|
+
|
|
251
|
+
ctx = RoutingContext(
|
|
252
|
+
prompt_tokens_estimate=500,
|
|
253
|
+
user_budget_usd=0.01, # stay within $0.01
|
|
254
|
+
latency_budget_ms=3000, # need response in under 3s
|
|
255
|
+
required_capabilities={Capability.chat},
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
decision = router.route(ctx)
|
|
259
|
+
print(decision.chosen) # e.g., ProviderModelPair("cerebras", "llama3.1-8b")
|
|
260
|
+
print(decision.eliminated) # [(pair, reason), ...] — fully explainable
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
**Transparent failover** — `route_and_execute` retries on rate-limits, 5xx errors, or timeouts and seamlessly moves to the next-best provider.
|
|
264
|
+
|
|
265
|
+
**Cost dashboard** — track every API call:
|
|
266
|
+
|
|
267
|
+
```bash
|
|
268
|
+
effgen cost today # per-provider per-model table
|
|
269
|
+
effgen cost week # rolling 7-day view
|
|
270
|
+
effgen cost set-budget 1.0 # set $1/day cap
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
---
|
|
274
|
+
|
|
238
275
|
## 🎯 Agent Presets
|
|
239
276
|
|
|
240
277
|
Get started instantly with ready-to-use agent configurations:
|
|
@@ -9,7 +9,7 @@ This framework enables SLMs to function as powerful agentic systems through:
|
|
|
9
9
|
- Comprehensive configuration management
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
-
__version__ = "0.2.
|
|
12
|
+
__version__ = "0.2.4"
|
|
13
13
|
__author__ = "effGen Team"
|
|
14
14
|
__license__ = "Apache-2.0"
|
|
15
15
|
|
|
@@ -74,30 +74,94 @@ from effgen.models import (
|
|
|
74
74
|
AnthropicAdapter,
|
|
75
75
|
BaseModel,
|
|
76
76
|
CerebrasAdapter,
|
|
77
|
+
CostBasedPolicy,
|
|
78
|
+
CostTracker,
|
|
79
|
+
FireworksAdapter,
|
|
80
|
+
FirstAvailablePolicy,
|
|
77
81
|
GeminiAdapter,
|
|
78
82
|
GenerationConfig,
|
|
79
83
|
GenerationResult,
|
|
84
|
+
GroqAdapter,
|
|
85
|
+
HFInferenceAdapter,
|
|
86
|
+
LatencyBasedPolicy,
|
|
87
|
+
LatencyTracker,
|
|
80
88
|
ModelLoader,
|
|
81
89
|
OpenAIAdapter,
|
|
90
|
+
PolicyBasedRouter,
|
|
91
|
+
ProviderModelPair,
|
|
92
|
+
ReplicateAdapter,
|
|
93
|
+
RetryPolicy,
|
|
94
|
+
RouterDecision,
|
|
95
|
+
RouterEvent,
|
|
96
|
+
RoutingContext,
|
|
97
|
+
RoutingPolicy,
|
|
98
|
+
SQLiteCostStore,
|
|
82
99
|
StreamChunk,
|
|
100
|
+
TogetherAdapter,
|
|
83
101
|
TransformersEngine,
|
|
84
102
|
VLLMEngine,
|
|
85
103
|
load_model,
|
|
86
104
|
)
|
|
87
105
|
from effgen.models._rate_limit import RateLimitCoordinator, RateLimitExceeded # noqa: I001
|
|
106
|
+
from effgen.models._rate_limit_store import SQLiteRateLimitStore # noqa: I001
|
|
107
|
+
from effgen.models.auth import check_keys
|
|
88
108
|
from effgen.models.cerebras_models import available_models as cerebras_available_models
|
|
89
109
|
from effgen.models.cerebras_models import free_tier_models as cerebras_free_tier_models
|
|
90
110
|
from effgen.models.cerebras_models import model_info as cerebras_model_info
|
|
91
|
-
from effgen.models.errors import
|
|
111
|
+
from effgen.models.errors import ( # noqa: I001
|
|
112
|
+
AllCandidatesExhaustedError,
|
|
113
|
+
AmbiguousModelError,
|
|
114
|
+
BudgetExceededError,
|
|
115
|
+
InvalidRequestError,
|
|
116
|
+
ModelAuthError,
|
|
117
|
+
ModelNotFoundError,
|
|
118
|
+
ModelRefusalError,
|
|
119
|
+
ModelTimeoutError,
|
|
120
|
+
ModelUnavailableError,
|
|
121
|
+
NoCandidateWithinBudgetError,
|
|
122
|
+
ProviderTransientError,
|
|
123
|
+
ToolIncompatibleError,
|
|
124
|
+
)
|
|
125
|
+
from effgen.models.fireworks_models import available_models as fireworks_available_models
|
|
126
|
+
from effgen.models.fireworks_models import chat_models as fireworks_chat_models
|
|
127
|
+
from effgen.models.fireworks_models import pricing_table as fireworks_pricing_table
|
|
128
|
+
from effgen.models.fireworks_models import refresh_models as fireworks_refresh_models
|
|
129
|
+
from effgen.models.fireworks_models import tool_capable_models as fireworks_tool_capable_models
|
|
92
130
|
from effgen.models.gemini_models import available_models as gemini_available_models
|
|
93
131
|
from effgen.models.gemini_models import free_tier_models as gemini_free_tier_models
|
|
94
132
|
from effgen.models.gemini_models import model_info as gemini_model_info
|
|
95
133
|
from effgen.models.gemini_models import recommended_models as gemini_recommended_models
|
|
134
|
+
from effgen.models.groq_models import available_models as groq_available_models
|
|
135
|
+
from effgen.models.groq_models import chat_models as groq_chat_models
|
|
136
|
+
from effgen.models.groq_models import tool_capable_models as groq_tool_capable_models
|
|
137
|
+
from effgen.models.hf_inference_models import available_models as hf_available_models
|
|
138
|
+
from effgen.models.hf_inference_models import catalog_summary as hf_catalog_summary
|
|
139
|
+
from effgen.models.hf_inference_models import chat_models as hf_chat_models
|
|
140
|
+
from effgen.models.hf_inference_models import cheapest_provider as hf_cheapest_provider
|
|
141
|
+
from effgen.models.hf_inference_models import check_drift as hf_check_drift
|
|
142
|
+
from effgen.models.hf_inference_models import get_model_info as hf_get_model_info
|
|
143
|
+
from effgen.models.hf_inference_models import list_providers_for as hf_list_providers_for
|
|
144
|
+
from effgen.models.hf_inference_models import refresh_models as hf_refresh_models
|
|
145
|
+
from effgen.models.hf_inference_models import serverless_models as hf_serverless_models
|
|
146
|
+
from effgen.models.hf_inference_models import suggest_alternatives as hf_suggest_alternatives
|
|
147
|
+
from effgen.models.hf_inference_models import tool_capable_models as hf_tool_capable_models
|
|
96
148
|
from effgen.models.openai_models import available_models as openai_available_models
|
|
97
149
|
from effgen.models.openai_models import chat_models as openai_chat_models
|
|
98
150
|
from effgen.models.openai_models import model_info as openai_model_info
|
|
99
151
|
from effgen.models.openai_models import reasoning_models as openai_reasoning_models # noqa: I001
|
|
100
152
|
from effgen.models.openai_schema import to_openai_schema
|
|
153
|
+
from effgen.models.registry import ProviderRegistry, list_models, list_providers, lookup
|
|
154
|
+
from effgen.models.replicate_models import available_models as replicate_available_models
|
|
155
|
+
from effgen.models.replicate_models import get_model_info as replicate_get_model_info
|
|
156
|
+
from effgen.models.replicate_models import refresh_models as replicate_refresh_models
|
|
157
|
+
from effgen.models.replicate_models import streaming_models as replicate_streaming_models
|
|
158
|
+
from effgen.models.replicate_models import tool_capable_models as replicate_tool_capable_models
|
|
159
|
+
from effgen.models.together_models import available_models as together_available_models
|
|
160
|
+
from effgen.models.together_models import chat_models as together_chat_models
|
|
161
|
+
from effgen.models.together_models import pricing_table as together_pricing_table
|
|
162
|
+
from effgen.models.together_models import refresh_models as together_refresh_models
|
|
163
|
+
from effgen.models.together_models import serverless_models as together_serverless_models
|
|
164
|
+
from effgen.models.together_models import tool_capable_models as together_tool_capable_models
|
|
101
165
|
|
|
102
166
|
# Preset imports
|
|
103
167
|
from effgen.presets import create_agent, list_presets
|
|
@@ -204,17 +268,60 @@ __all__ = [
|
|
|
204
268
|
"StreamChunk",
|
|
205
269
|
"GeminiAdapter",
|
|
206
270
|
"CerebrasAdapter",
|
|
271
|
+
"GroqAdapter",
|
|
272
|
+
"TogetherAdapter",
|
|
273
|
+
"FireworksAdapter",
|
|
274
|
+
"ReplicateAdapter",
|
|
275
|
+
"HFInferenceAdapter",
|
|
207
276
|
"ModelLoader",
|
|
208
277
|
"GenerationConfig",
|
|
209
278
|
"GenerationResult",
|
|
279
|
+
# Router (v0.2.4+)
|
|
280
|
+
"PolicyBasedRouter",
|
|
281
|
+
"RoutingPolicy",
|
|
282
|
+
"RoutingContext",
|
|
283
|
+
"RouterDecision",
|
|
284
|
+
"RouterEvent",
|
|
285
|
+
"ProviderModelPair",
|
|
286
|
+
"FirstAvailablePolicy",
|
|
287
|
+
"CostBasedPolicy",
|
|
288
|
+
"LatencyBasedPolicy",
|
|
289
|
+
"RetryPolicy",
|
|
290
|
+
# Tracking (v0.2.4+)
|
|
291
|
+
"LatencyTracker",
|
|
292
|
+
"CostTracker",
|
|
293
|
+
"SQLiteCostStore",
|
|
210
294
|
"RateLimitCoordinator",
|
|
211
295
|
"RateLimitExceeded",
|
|
296
|
+
"SQLiteRateLimitStore",
|
|
297
|
+
# Errors
|
|
212
298
|
"ModelRefusalError",
|
|
299
|
+
"ModelAuthError",
|
|
300
|
+
"ModelTimeoutError",
|
|
301
|
+
"ModelUnavailableError",
|
|
302
|
+
"ModelNotFoundError",
|
|
303
|
+
"AmbiguousModelError",
|
|
304
|
+
"NoCandidateWithinBudgetError",
|
|
305
|
+
"ToolIncompatibleError",
|
|
306
|
+
"AllCandidatesExhaustedError",
|
|
307
|
+
"BudgetExceededError",
|
|
308
|
+
"ProviderTransientError",
|
|
309
|
+
"InvalidRequestError",
|
|
213
310
|
"to_openai_schema",
|
|
311
|
+
# Provider registry + auth
|
|
312
|
+
"ProviderRegistry",
|
|
313
|
+
"list_providers",
|
|
314
|
+
"list_models",
|
|
315
|
+
"lookup",
|
|
316
|
+
"check_keys",
|
|
214
317
|
# Cerebras helpers
|
|
215
318
|
"cerebras_available_models",
|
|
216
319
|
"cerebras_free_tier_models",
|
|
217
320
|
"cerebras_model_info",
|
|
321
|
+
# Groq helpers
|
|
322
|
+
"groq_available_models",
|
|
323
|
+
"groq_chat_models",
|
|
324
|
+
"groq_tool_capable_models",
|
|
218
325
|
# OpenAI helpers
|
|
219
326
|
"openai_available_models",
|
|
220
327
|
"openai_chat_models",
|
|
@@ -225,6 +332,37 @@ __all__ = [
|
|
|
225
332
|
"gemini_free_tier_models",
|
|
226
333
|
"gemini_model_info",
|
|
227
334
|
"gemini_recommended_models",
|
|
335
|
+
# Together helpers
|
|
336
|
+
"together_available_models",
|
|
337
|
+
"together_chat_models",
|
|
338
|
+
"together_tool_capable_models",
|
|
339
|
+
"together_pricing_table",
|
|
340
|
+
"together_refresh_models",
|
|
341
|
+
"together_serverless_models",
|
|
342
|
+
# Fireworks helpers
|
|
343
|
+
"fireworks_available_models",
|
|
344
|
+
"fireworks_chat_models",
|
|
345
|
+
"fireworks_tool_capable_models",
|
|
346
|
+
"fireworks_pricing_table",
|
|
347
|
+
"fireworks_refresh_models",
|
|
348
|
+
# Replicate helpers
|
|
349
|
+
"replicate_available_models",
|
|
350
|
+
"replicate_streaming_models",
|
|
351
|
+
"replicate_tool_capable_models",
|
|
352
|
+
"replicate_refresh_models",
|
|
353
|
+
"replicate_get_model_info",
|
|
354
|
+
# HF Inference helpers
|
|
355
|
+
"hf_available_models",
|
|
356
|
+
"hf_chat_models",
|
|
357
|
+
"hf_tool_capable_models",
|
|
358
|
+
"hf_serverless_models",
|
|
359
|
+
"hf_suggest_alternatives",
|
|
360
|
+
"hf_get_model_info",
|
|
361
|
+
"hf_refresh_models",
|
|
362
|
+
"hf_check_drift",
|
|
363
|
+
"hf_catalog_summary",
|
|
364
|
+
"hf_list_providers_for",
|
|
365
|
+
"hf_cheapest_provider",
|
|
228
366
|
|
|
229
367
|
# Tools
|
|
230
368
|
"BaseTool",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""effGen API Server v2 — Production Gateway.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Modules:
|
|
4
4
|
- openai_compat: OpenAI-compatible /v1/chat/completions and /v1/completions
|
|
5
5
|
- queue: RequestQueue with priority, fair scheduling, backpressure
|
|
6
6
|
- pool: AgentPool with min/max size and auto-scaling
|