aria-code 4.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +32 -0
- agents/base.py +190 -0
- agents/deep/__init__.py +37 -0
- agents/deep/calibration_loop.py +144 -0
- agents/deep/critic.py +125 -0
- agents/deep/deepen.py +193 -0
- agents/deep/models.py +149 -0
- agents/deep/pipeline.py +164 -0
- agents/deep/quant_fusion.py +192 -0
- agents/deep/themes.py +95 -0
- agents/deep/tiers.py +106 -0
- agents/financial/__init__.py +10 -0
- agents/financial/catalyst.py +279 -0
- agents/financial/debate.py +145 -0
- agents/financial/earnings.py +303 -0
- agents/financial/fundamental.py +159 -0
- agents/financial/macro.py +99 -0
- agents/financial/news.py +207 -0
- agents/financial/risk.py +132 -0
- agents/financial/sector.py +279 -0
- agents/financial/synthesis.py +274 -0
- agents/financial/technical.py +258 -0
- agents/portfolio_agent.py +333 -0
- agents/realty/__init__.py +62 -0
- agents/realty/asset_diagnosis.py +150 -0
- agents/realty/business_match.py +165 -0
- agents/realty/cashflow_verify.py +208 -0
- agents/realty/contract_rules.py +209 -0
- agents/realty/energy_anomaly.py +188 -0
- agents/realty/exit_settlement.py +207 -0
- agents/realty/fulfillment_risk.py +205 -0
- agents/realty/ops_optimize.py +159 -0
- agents/realty/revenue_share.py +214 -0
- agents/registry.py +144 -0
- agents/sports/__init__.py +0 -0
- agents/sports/football_agent.py +169 -0
- agents/team.py +289 -0
- aliyun_data_client.py +660 -0
- apps/README.md +12 -0
- apps/__init__.py +2 -0
- apps/channels/README.md +15 -0
- apps/cli/README.md +13 -0
- apps/cli/__init__.py +2 -0
- apps/cli/bootstrap.py +99 -0
- apps/cli/codegen_paths.py +29 -0
- apps/cli/commands/__init__.py +16 -0
- apps/cli/commands/analysis_cmds.py +288 -0
- apps/cli/commands/backtest_cmds.py +1887 -0
- apps/cli/commands/broker_cmds.py +1154 -0
- apps/cli/commands/business_workflow_cmds.py +289 -0
- apps/cli/commands/catalog.py +84 -0
- apps/cli/commands/data_cmds.py +405 -0
- apps/cli/commands/diagnostic_cmds.py +179 -0
- apps/cli/commands/diagnostic_ops_cmds.py +696 -0
- apps/cli/commands/finance_render.py +12 -0
- apps/cli/commands/market.py +399 -0
- apps/cli/commands/market_cmds.py +1276 -0
- apps/cli/commands/market_context.py +425 -0
- apps/cli/commands/market_render.py +7 -0
- apps/cli/commands/model_cmds.py +1579 -0
- apps/cli/commands/ops_cmds.py +668 -0
- apps/cli/commands/portfolio_cmds.py +962 -0
- apps/cli/commands/report.py +377 -0
- apps/cli/commands/scaffold_templates.py +617 -0
- apps/cli/commands/session_cmds.py +179 -0
- apps/cli/commands/session_ux_cmds.py +280 -0
- apps/cli/commands/team.py +588 -0
- apps/cli/commands/team_render.py +8 -0
- apps/cli/commands/ui_cmds.py +358 -0
- apps/cli/commands/workflow_cmds.py +279 -0
- apps/cli/commands/workspace_cmds.py +1414 -0
- apps/cli/config_paths.py +70 -0
- apps/cli/config_store.py +61 -0
- apps/cli/deterministic.py +122 -0
- apps/cli/direct.py +48 -0
- apps/cli/github_app_auth.py +135 -0
- apps/cli/handlers/__init__.py +11 -0
- apps/cli/handlers/broker_handlers.py +122 -0
- apps/cli/handlers/chart_handlers.py +1309 -0
- apps/cli/handlers/market_handlers.py +2509 -0
- apps/cli/handlers/realty_handlers.py +114 -0
- apps/cli/handlers/strategy_advice.py +82 -0
- apps/cli/hooks.py +180 -0
- apps/cli/i18n.py +284 -0
- apps/cli/intent.py +136 -0
- apps/cli/intent_router.py +217 -0
- apps/cli/lifecycle_hooks.py +48 -0
- apps/cli/main.py +29 -0
- apps/cli/market_metadata.py +135 -0
- apps/cli/market_universe.py +265 -0
- apps/cli/message_processing.py +257 -0
- apps/cli/plan_mode.py +139 -0
- apps/cli/plotly_html.py +15 -0
- apps/cli/prediction_feedback.py +202 -0
- apps/cli/preflight.py +497 -0
- apps/cli/project_aria.py +60 -0
- apps/cli/prompts/__init__.py +0 -0
- apps/cli/prompts/coding.py +658 -0
- apps/cli/prompts/system_prompts.py +531 -0
- apps/cli/prompts/ui.py +434 -0
- apps/cli/providers/__init__.py +1 -0
- apps/cli/providers/base.py +271 -0
- apps/cli/providers/chat_routing.py +80 -0
- apps/cli/providers/llm/__init__.py +1 -0
- apps/cli/providers/llm/ollama_stream.py +1170 -0
- apps/cli/providers/llm/sse_stream.py +216 -0
- apps/cli/providers/runtime_bridge.py +185 -0
- apps/cli/runtime_consumer.py +489 -0
- apps/cli/session_export.py +87 -0
- apps/cli/session_jsonl.py +207 -0
- apps/cli/session_store.py +112 -0
- apps/cli/todo_tracker.py +190 -0
- apps/cli/tools/__init__.py +40 -0
- apps/cli/tools/context.py +46 -0
- apps/cli/tools/file_tools.py +112 -0
- apps/cli/tools/market_tools.py +549 -0
- apps/cli/tools/notebook_tools.py +111 -0
- apps/cli/tools/system_tools.py +669 -0
- apps/cli/tools/write_tools.py +715 -0
- apps/cli/tradingview_bridge.py +434 -0
- apps/cli/update_check.py +152 -0
- apps/cli/utils/__init__.py +0 -0
- apps/cli/utils/market_detect.py +1578 -0
- apps/daemon/README.md +14 -0
- apps/vscode/README.md +115 -0
- apps/vscode/package.json +70 -0
- aria_cli.py +11636 -0
- aria_code-4.1.3.dist-info/METADATA +952 -0
- aria_code-4.1.3.dist-info/RECORD +284 -0
- aria_code-4.1.3.dist-info/WHEEL +5 -0
- aria_code-4.1.3.dist-info/entry_points.txt +2 -0
- aria_code-4.1.3.dist-info/licenses/LICENSE +121 -0
- aria_code-4.1.3.dist-info/top_level.txt +50 -0
- aria_daemon.py +1295 -0
- aria_feishu_bot.py +1359 -0
- aria_relay_client.py +182 -0
- aria_relay_server.py +405 -0
- aria_telegram_bot.py +202 -0
- ariarc.py +328 -0
- artifacts.py +491 -0
- backtest_report.py +472 -0
- brokers/__init__.py +72 -0
- brokers/base.py +207 -0
- brokers/capabilities.py +264 -0
- brokers/cn/__init__.py +10 -0
- brokers/cn/easytrader_broker.py +193 -0
- brokers/cn/futu_broker.py +194 -0
- brokers/cn/longbridge_broker.py +190 -0
- brokers/cn/tiger_broker.py +196 -0
- brokers/cn/xtquant_broker.py +175 -0
- brokers/config.py +364 -0
- brokers/intl/__init__.py +5 -0
- brokers/intl/alpaca_broker.py +183 -0
- brokers/intl/ibkr_broker.py +215 -0
- brokers/intl/webull_broker.py +156 -0
- brokers/paper_broker.py +259 -0
- brokers/planning.py +296 -0
- brokers/registry.py +181 -0
- brokers/trading.py +237 -0
- change_store.py +127 -0
- command_safety.py +19 -0
- computer_use_tools.py +504 -0
- dashboard_generator.py +578 -0
- data_analysis_tools.py +808 -0
- data_cleaner.py +483 -0
- data_service.py +481 -0
- datasources/__init__.py +23 -0
- datasources/base.py +166 -0
- datasources/router.py +221 -0
- datasources/sources/__init__.py +15 -0
- datasources/sources/akshare_source.py +269 -0
- datasources/sources/alpha_vantage_source.py +202 -0
- datasources/sources/edgar_source.py +218 -0
- datasources/sources/finnhub_source.py +197 -0
- datasources/sources/fred_source.py +219 -0
- datasources/sources/tushare_source.py +141 -0
- datasources/sources/web_scraper_source.py +278 -0
- datasources/sources/world_bank_source.py +205 -0
- datasources/sources/yfinance_source.py +152 -0
- demo_player.py +204 -0
- doctor.py +508 -0
- file_analysis_tools.py +734 -0
- finance_formulas.py +389 -0
- football_data_client.py +1670 -0
- intent_classifier.py +358 -0
- local_finance_tools.py +3221 -0
- local_llm_provider.py +552 -0
- macro_tools.py +368 -0
- market_data_client.py +1899 -0
- mcp_client.py +506 -0
- memory_manager.py +245 -0
- model_capability.py +416 -0
- notification_tools.py +248 -0
- packages/__init__.py +23 -0
- packages/aria_agents/__init__.py +5 -0
- packages/aria_agents/manifest.py +69 -0
- packages/aria_core/__init__.py +34 -0
- packages/aria_core/architecture.py +192 -0
- packages/aria_core/export.py +124 -0
- packages/aria_core/manifest.py +65 -0
- packages/aria_infra/__init__.py +15 -0
- packages/aria_infra/arthera.py +52 -0
- packages/aria_infra/doctor.py +246 -0
- packages/aria_infra/product.py +37 -0
- packages/aria_mcp/__init__.py +25 -0
- packages/aria_mcp/bridge.py +38 -0
- packages/aria_mcp/config.py +97 -0
- packages/aria_mcp/tools.py +61 -0
- packages/aria_sdk/__init__.py +19 -0
- packages/aria_sdk/client.py +396 -0
- packages/aria_sdk/providers.py +70 -0
- packages/aria_sdk/streaming.py +73 -0
- packages/aria_sdk/types.py +86 -0
- packages/aria_services/__init__.py +55 -0
- packages/aria_services/context.py +258 -0
- packages/aria_services/data.py +11 -0
- packages/aria_services/provider_health.py +189 -0
- packages/aria_services/registry.py +213 -0
- packages/aria_services/usage.py +138 -0
- packages/aria_skills/__init__.py +5 -0
- packages/aria_skills/registry.py +59 -0
- packages/aria_tools/__init__.py +5 -0
- packages/aria_tools/registry.py +128 -0
- packages/quant_engine/__init__.py +6 -0
- packages/quant_engine/sports/__init__.py +72 -0
- packages/quant_engine/sports/calibrator.py +353 -0
- packages/quant_engine/sports/dixon_coles.py +234 -0
- packages/quant_engine/sports/elo.py +299 -0
- packages/quant_engine/sports/form.py +188 -0
- packages/quant_engine/sports/h2h.py +195 -0
- packages/quant_engine/sports/ml_model.py +354 -0
- packages/quant_engine/sports/predictor.py +311 -0
- packages/quant_engine/sports/tracker.py +664 -0
- packages/quant_engine/stochastic/__init__.py +27 -0
- packages/quant_engine/stochastic/gbm_enhanced.py +195 -0
- packages/quant_engine/stochastic/ito_calculus.py +477 -0
- packages/quant_engine/stochastic/kelly_criterion.py +181 -0
- packages/quant_engine/stochastic/monte_carlo_advanced.py +95 -0
- packages/quant_engine/stochastic/options_pricing.py +573 -0
- packages/quant_engine/stochastic/stochastic_processes.py +90 -0
- plan_utils.py +194 -0
- plugin_loader.py +328 -0
- portfolio_ledger.py +262 -0
- privacy/__init__.py +5 -0
- privacy/feedback.py +123 -0
- project_tools.py +525 -0
- providers/__init__.py +30 -0
- providers/llm/__init__.py +19 -0
- providers/llm/anthropic.py +184 -0
- providers/llm/base.py +139 -0
- providers/llm/ollama.py +128 -0
- providers/llm/openai_compat.py +282 -0
- providers/llm/registry.py +358 -0
- realty_data_tools.py +659 -0
- report_generator.py +1314 -0
- runtime/__init__.py +103 -0
- runtime/agent_loop.py +1183 -0
- runtime/approval.py +51 -0
- runtime/events.py +102 -0
- runtime/gateway.py +128 -0
- runtime/lsp.py +346 -0
- runtime/subagent.py +258 -0
- runtime/tool_executor.py +104 -0
- runtime/tool_policy.py +106 -0
- safety/__init__.py +21 -0
- safety/permissions.py +275 -0
- setup_wizard.py +653 -0
- strategy_vault.py +420 -0
- ui/__init__.py +100 -0
- ui/banner.py +310 -0
- ui/completer.py +391 -0
- ui/console.py +271 -0
- ui/image_render.py +243 -0
- ui/input_box.py +376 -0
- ui/picker.py +195 -0
- ui/render/__init__.py +11 -0
- ui/render/finance.py +1480 -0
- ui/render/market.py +225 -0
- ui/render/output.py +681 -0
- ui/render/team.py +346 -0
- ui/robot.py +235 -0
- workspace/__init__.py +6 -0
- workspace/files.py +170 -0
- workspace/verify.py +113 -0
local_llm_provider.py
ADDED
|
@@ -0,0 +1,552 @@
|
|
|
1
|
+
"""
|
|
2
|
+
local_llm_provider.py — Unified async streaming interface for local LLM backends.
|
|
3
|
+
|
|
4
|
+
Supported backends
|
|
5
|
+
------------------
|
|
6
|
+
ollama → http://localhost:11434 (Ollama /api/chat)
|
|
7
|
+
lmstudio → http://localhost:1234 (LM Studio /v1/chat/completions)
|
|
8
|
+
vllm → http://localhost:8000 (vLLM /v1/chat/completions)
|
|
9
|
+
llamacpp → http://localhost:8080 (llama.cpp server /v1/chat/completions)
|
|
10
|
+
jan → http://localhost:1337 (Jan /v1/chat/completions)
|
|
11
|
+
openai → https://api.openai.com (OpenAI-compatible proxy)
|
|
12
|
+
|
|
13
|
+
All non-Ollama backends speak the OpenAI /v1/chat/completions SSE format.
|
|
14
|
+
|
|
15
|
+
Usage::
|
|
16
|
+
|
|
17
|
+
provider = LocalLLMProvider.from_config(config)
|
|
18
|
+
async for event in provider.stream(messages, tools=schemas):
|
|
19
|
+
if event["type"] == "token":
|
|
20
|
+
print(event["text"], end="", flush=True)
|
|
21
|
+
elif event["type"] == "tool_call":
|
|
22
|
+
handle_tool(event["name"], event["arguments"])
|
|
23
|
+
elif event["type"] == "done":
|
|
24
|
+
break
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import json
|
|
30
|
+
import os
|
|
31
|
+
import re
|
|
32
|
+
import urllib.request
|
|
33
|
+
from dataclasses import dataclass, field
|
|
34
|
+
from typing import Any, AsyncIterator, Dict, List, Optional
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# Model resolution helpers
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
# Preferred model-name prefixes, highest priority first.
|
|
41
|
+
_PREFERRED_PREFIXES = [
|
|
42
|
+
"aria-", # native Aria models (future)
|
|
43
|
+
"qwen2.5:", # full qwen2.5 series
|
|
44
|
+
"qwen2.5-coder:", # coder variant (great for finance code tasks)
|
|
45
|
+
"qwen2.5", # any qwen2.5 variant without explicit tag
|
|
46
|
+
"qwen", # any qwen
|
|
47
|
+
"deepseek", # DeepSeek family
|
|
48
|
+
"gpt-oss", # locally-hosted GPT-compatible
|
|
49
|
+
"llama", # Meta Llama family
|
|
50
|
+
"mistral", # Mistral family
|
|
51
|
+
"gemma", # Google Gemma family
|
|
52
|
+
"phi", # Microsoft Phi
|
|
53
|
+
"command", # Cohere Command
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
_model_cache: Dict[str, str] = {} # base_url → resolved model name
|
|
57
|
+
_cache_ts: Dict[str, float] = {}
|
|
58
|
+
_CACHE_TTL = 60.0 # seconds
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def resolve_model_sync(base_url: str, requested: str) -> str:
|
|
62
|
+
"""
|
|
63
|
+
Synchronously resolve the best available Ollama model.
|
|
64
|
+
|
|
65
|
+
Resolution order:
|
|
66
|
+
1. Exact match with *requested*
|
|
67
|
+
2. Same family prefix (e.g. "aria-sonata" → any model starting with "aria-")
|
|
68
|
+
3. Priority-prefix list (_PREFERRED_PREFIXES)
|
|
69
|
+
4. First model returned by /api/tags
|
|
70
|
+
5. *requested* as-is (let Ollama surface the real error)
|
|
71
|
+
"""
|
|
72
|
+
import time
|
|
73
|
+
key = f"{base_url}::{requested}"
|
|
74
|
+
now = time.time()
|
|
75
|
+
if key in _model_cache and now - _cache_ts.get(key, 0) < _CACHE_TTL:
|
|
76
|
+
return _model_cache[key]
|
|
77
|
+
|
|
78
|
+
tags_url = base_url.rstrip("/") + "/api/tags"
|
|
79
|
+
try:
|
|
80
|
+
with urllib.request.urlopen(tags_url, timeout=3) as r:
|
|
81
|
+
data = json.loads(r.read())
|
|
82
|
+
available: List[str] = [m["name"] for m in data.get("models", [])]
|
|
83
|
+
except Exception:
|
|
84
|
+
return requested # Ollama unreachable — pass through
|
|
85
|
+
|
|
86
|
+
if not available:
|
|
87
|
+
return requested
|
|
88
|
+
|
|
89
|
+
resolved = _pick_model(available, requested)
|
|
90
|
+
_model_cache[key] = resolved
|
|
91
|
+
_cache_ts[key] = now
|
|
92
|
+
return resolved
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def resolve_model_async(base_url: str, requested: str) -> str:
|
|
96
|
+
"""Async variant of resolve_model_sync (uses aiohttp if available)."""
|
|
97
|
+
import time
|
|
98
|
+
key = f"{base_url}::{requested}"
|
|
99
|
+
now = time.time()
|
|
100
|
+
if key in _model_cache and now - _cache_ts.get(key, 0) < _CACHE_TTL:
|
|
101
|
+
return _model_cache[key]
|
|
102
|
+
|
|
103
|
+
tags_url = base_url.rstrip("/") + "/api/tags"
|
|
104
|
+
try:
|
|
105
|
+
if _HAS_AIOHTTP:
|
|
106
|
+
async with aiohttp.ClientSession() as s:
|
|
107
|
+
async with s.get(tags_url, timeout=aiohttp.ClientTimeout(total=3)) as r:
|
|
108
|
+
data = await r.json()
|
|
109
|
+
else:
|
|
110
|
+
with urllib.request.urlopen(tags_url, timeout=3) as r:
|
|
111
|
+
data = json.loads(r.read())
|
|
112
|
+
available: List[str] = [m["name"] for m in data.get("models", [])]
|
|
113
|
+
except Exception:
|
|
114
|
+
return requested
|
|
115
|
+
|
|
116
|
+
if not available:
|
|
117
|
+
return requested
|
|
118
|
+
|
|
119
|
+
resolved = _pick_model(available, requested)
|
|
120
|
+
_model_cache[key] = resolved
|
|
121
|
+
_cache_ts[key] = now
|
|
122
|
+
return resolved
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _pick_model(available: List[str], requested: str) -> str:
|
|
126
|
+
"""Choose the best model from *available* given *requested*."""
|
|
127
|
+
# 1. Exact match
|
|
128
|
+
if requested in available:
|
|
129
|
+
return requested
|
|
130
|
+
|
|
131
|
+
# 2. Same family (strip tag, match prefix)
|
|
132
|
+
family = requested.split(":")[0]
|
|
133
|
+
hit = next((m for m in available if m.startswith(family)), None)
|
|
134
|
+
if hit:
|
|
135
|
+
return hit
|
|
136
|
+
|
|
137
|
+
# 3. Priority-prefix list
|
|
138
|
+
for prefix in _PREFERRED_PREFIXES:
|
|
139
|
+
hit = next((m for m in available if m.startswith(prefix)), None)
|
|
140
|
+
if hit:
|
|
141
|
+
return hit
|
|
142
|
+
|
|
143
|
+
# 4. First available
|
|
144
|
+
return available[0]
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def list_ollama_models(base_url: str) -> List[str]:
|
|
148
|
+
"""Return all model names from Ollama /api/tags (sync)."""
|
|
149
|
+
try:
|
|
150
|
+
with urllib.request.urlopen(base_url.rstrip("/") + "/api/tags", timeout=3) as r:
|
|
151
|
+
return [m["name"] for m in json.loads(r.read()).get("models", [])]
|
|
152
|
+
except Exception:
|
|
153
|
+
return []
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
import aiohttp
|
|
157
|
+
_HAS_AIOHTTP = True
|
|
158
|
+
except ImportError:
|
|
159
|
+
_HAS_AIOHTTP = False
|
|
160
|
+
|
|
161
|
+
from model_capability import (
|
|
162
|
+
ModelCapability,
|
|
163
|
+
get_model_capability,
|
|
164
|
+
build_ollama_tool_payload,
|
|
165
|
+
build_tool_system_prompt,
|
|
166
|
+
parse_tool_calls_from_response,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
# Backend definitions
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
BACKEND_DEFAULTS: Dict[str, Dict[str, Any]] = {
|
|
175
|
+
"ollama": {"default_url": "http://localhost:11434", "api_path": "/api/chat", "protocol": "ollama"},
|
|
176
|
+
"lmstudio": {"default_url": "http://localhost:1234", "api_path": "/v1/chat/completions", "protocol": "openai"},
|
|
177
|
+
"vllm": {"default_url": "http://localhost:8000", "api_path": "/v1/chat/completions", "protocol": "openai"},
|
|
178
|
+
"llamacpp": {"default_url": "http://localhost:8080", "api_path": "/v1/chat/completions", "protocol": "openai"},
|
|
179
|
+
"jan": {"default_url": "http://localhost:1337", "api_path": "/v1/chat/completions", "protocol": "openai"},
|
|
180
|
+
"openai": {"default_url": "https://api.openai.com", "api_path": "/v1/chat/completions", "protocol": "openai"},
|
|
181
|
+
# Cloud providers (API key required)
|
|
182
|
+
"deepseek": {"default_url": "https://api.deepseek.com", "api_path": "/v1/chat/completions", "protocol": "openai"},
|
|
183
|
+
"groq": {"default_url": "https://api.groq.com/openai", "api_path": "/v1/chat/completions", "protocol": "openai"},
|
|
184
|
+
"anthropic": {"default_url": "https://api.anthropic.com", "api_path": "/v1/messages", "protocol": "anthropic"},
|
|
185
|
+
"together": {"default_url": "https://api.together.xyz", "api_path": "/v1/chat/completions", "protocol": "openai"},
|
|
186
|
+
"siliconflow": {"default_url": "https://api.siliconflow.cn", "api_path": "/v1/chat/completions", "protocol": "openai"},
|
|
187
|
+
"moonshot": {"default_url": "https://api.moonshot.cn/v1", "api_path": "/chat/completions", "protocol": "openai"},
|
|
188
|
+
# Custom user-defined OpenAI-compatible endpoint
|
|
189
|
+
"custom": {"default_url": "", "api_path": "/chat/completions", "protocol": "openai"},
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
@dataclass
|
|
194
|
+
class LocalLLMProvider:
|
|
195
|
+
backend: str = "ollama" # one of BACKEND_DEFAULTS keys
|
|
196
|
+
base_url: str = "" # override; blank → use BACKEND_DEFAULTS default
|
|
197
|
+
model: str = "qwen2.5-coder:7b"
|
|
198
|
+
api_key: str = "" # needed for openai-compatible remotes
|
|
199
|
+
timeout: int = 300
|
|
200
|
+
|
|
201
|
+
# Derived from model_capability on first use
|
|
202
|
+
_capability: Optional[ModelCapability] = field(default=None, repr=False)
|
|
203
|
+
|
|
204
|
+
# ── constructor ────────────────────────────────────────────────────────
|
|
205
|
+
|
|
206
|
+
@classmethod
|
|
207
|
+
def from_config(cls, config: Dict[str, Any]) -> "LocalLLMProvider":
|
|
208
|
+
"""Build from aria-code config dict."""
|
|
209
|
+
backend = config.get("local_provider", "ollama").lower()
|
|
210
|
+
info = BACKEND_DEFAULTS.get(backend, BACKEND_DEFAULTS["ollama"])
|
|
211
|
+
requested_model = config.get("model", "qwen2.5-coder:1.5b")
|
|
212
|
+
|
|
213
|
+
# Custom endpoint: user-specified base_url via /config set custom_endpoint=...
|
|
214
|
+
if backend == "custom":
|
|
215
|
+
url = config.get("custom_endpoint", "") or info["default_url"]
|
|
216
|
+
model = config.get("custom_model", requested_model)
|
|
217
|
+
api_key = config.get("local_api_key", os.getenv("LOCAL_LLM_API_KEY", ""))
|
|
218
|
+
return cls(backend=backend, base_url=url, model=model, api_key=api_key)
|
|
219
|
+
|
|
220
|
+
# Cloud provider backends: read API key from providers.json or env var
|
|
221
|
+
_cloud_env_map = {
|
|
222
|
+
"deepseek": "DEEPSEEK_API_KEY",
|
|
223
|
+
"openai": "OPENAI_API_KEY",
|
|
224
|
+
"anthropic": "ANTHROPIC_API_KEY",
|
|
225
|
+
"groq": "GROQ_API_KEY",
|
|
226
|
+
"together": "TOGETHER_API_KEY",
|
|
227
|
+
"siliconflow": "SILICONFLOW_API_KEY",
|
|
228
|
+
"moonshot": "MOONSHOT_API_KEY",
|
|
229
|
+
}
|
|
230
|
+
if backend in _cloud_env_map:
|
|
231
|
+
env_var = _cloud_env_map[backend]
|
|
232
|
+
api_key = (os.getenv(env_var, "")
|
|
233
|
+
or config.get("local_api_key", "")
|
|
234
|
+
or os.getenv("LOCAL_LLM_API_KEY", ""))
|
|
235
|
+
url = config.get("custom_endpoint") or info["default_url"]
|
|
236
|
+
return cls(backend=backend, base_url=url, model=requested_model, api_key=api_key)
|
|
237
|
+
|
|
238
|
+
url = config.get("local_url") or config.get("ollama_url") or info["default_url"]
|
|
239
|
+
api_key = config.get("local_api_key", os.getenv("LOCAL_LLM_API_KEY", ""))
|
|
240
|
+
|
|
241
|
+
# Resolve model at construction time so later callers always get a
|
|
242
|
+
# valid model name (Ollama-only: other backends manage their own catalog).
|
|
243
|
+
if backend == "ollama":
|
|
244
|
+
model = resolve_model_sync(url, requested_model)
|
|
245
|
+
else:
|
|
246
|
+
model = requested_model
|
|
247
|
+
|
|
248
|
+
return cls(backend=backend, base_url=url, model=model, api_key=api_key)
|
|
249
|
+
|
|
250
|
+
# ── capability ─────────────────────────────────────────────────────────
|
|
251
|
+
|
|
252
|
+
@property
|
|
253
|
+
def capability(self) -> ModelCapability:
|
|
254
|
+
if self._capability is None:
|
|
255
|
+
self._capability = get_model_capability(self.model)
|
|
256
|
+
return self._capability
|
|
257
|
+
|
|
258
|
+
# ── health check ───────────────────────────────────────────────────────
|
|
259
|
+
|
|
260
|
+
def is_available(self) -> bool:
|
|
261
|
+
"""Quick synchronous check that the backend is reachable."""
|
|
262
|
+
info = BACKEND_DEFAULTS.get(self.backend, BACKEND_DEFAULTS["ollama"])
|
|
263
|
+
url = (self.base_url or info["default_url"]).rstrip("/")
|
|
264
|
+
probe = f"{url}/api/tags" if self.backend == "ollama" else f"{url}/v1/models"
|
|
265
|
+
try:
|
|
266
|
+
with urllib.request.urlopen(probe, timeout=3) as r:
|
|
267
|
+
return r.status == 200
|
|
268
|
+
except Exception:
|
|
269
|
+
return False
|
|
270
|
+
|
|
271
|
+
def list_models(self) -> List[str]:
|
|
272
|
+
"""Return available model names from backend."""
|
|
273
|
+
info = BACKEND_DEFAULTS.get(self.backend, BACKEND_DEFAULTS["ollama"])
|
|
274
|
+
url = (self.base_url or info["default_url"]).rstrip("/")
|
|
275
|
+
try:
|
|
276
|
+
if self.backend == "ollama":
|
|
277
|
+
with urllib.request.urlopen(f"{url}/api/tags", timeout=3) as r:
|
|
278
|
+
data = json.loads(r.read())
|
|
279
|
+
return [m["name"] for m in data.get("models", [])]
|
|
280
|
+
else:
|
|
281
|
+
req = urllib.request.Request(f"{url}/v1/models")
|
|
282
|
+
if self.api_key:
|
|
283
|
+
req.add_header("Authorization", f"Bearer {self.api_key}")
|
|
284
|
+
with urllib.request.urlopen(req, timeout=3) as r:
|
|
285
|
+
data = json.loads(r.read())
|
|
286
|
+
return [m["id"] for m in data.get("data", [])]
|
|
287
|
+
except Exception:
|
|
288
|
+
return []
|
|
289
|
+
|
|
290
|
+
# ── streaming core ─────────────────────────────────────────────────────
|
|
291
|
+
|
|
292
|
+
async def stream(
|
|
293
|
+
self,
|
|
294
|
+
messages: List[Dict[str, Any]],
|
|
295
|
+
tools: Optional[List[Dict]] = None,
|
|
296
|
+
*,
|
|
297
|
+
temperature: Optional[float] = None,
|
|
298
|
+
max_tokens: Optional[int] = None,
|
|
299
|
+
cancel_event=None,
|
|
300
|
+
) -> AsyncIterator[Dict[str, Any]]:
|
|
301
|
+
"""
|
|
302
|
+
Unified async generator. Yields event dicts::
|
|
303
|
+
|
|
304
|
+
{"type": "token", "text": str}
|
|
305
|
+
{"type": "thinking", "text": str} # for reasoning models
|
|
306
|
+
{"type": "tool_call", "name": str, "arguments": dict, "id": str}
|
|
307
|
+
{"type": "done", "usage": dict, "stop_reason": str}
|
|
308
|
+
{"type": "error", "message": str}
|
|
309
|
+
"""
|
|
310
|
+
if not _HAS_AIOHTTP:
|
|
311
|
+
yield {"type": "error", "message": "aiohttp not installed: pip install aiohttp"}
|
|
312
|
+
return
|
|
313
|
+
|
|
314
|
+
info = BACKEND_DEFAULTS.get(self.backend, BACKEND_DEFAULTS["ollama"])
|
|
315
|
+
url = (self.base_url or info["default_url"]).rstrip("/") + info["api_path"]
|
|
316
|
+
protocol = info["protocol"]
|
|
317
|
+
cap = self.capability
|
|
318
|
+
|
|
319
|
+
temp = temperature if temperature is not None else cap.temperature
|
|
320
|
+
mtoks = max_tokens if max_tokens is not None else 4096
|
|
321
|
+
|
|
322
|
+
# Inject tool system prompt for text-only models
|
|
323
|
+
if tools and not cap.tool_calls:
|
|
324
|
+
tool_sys = build_tool_system_prompt(tools, self.model)
|
|
325
|
+
if tool_sys:
|
|
326
|
+
sys_msgs = [m for m in messages if m.get("role") == "system"]
|
|
327
|
+
if sys_msgs:
|
|
328
|
+
messages[0]["content"] = messages[0]["content"] + tool_sys
|
|
329
|
+
else:
|
|
330
|
+
messages = [{"role": "system", "content": tool_sys.strip()}] + messages
|
|
331
|
+
|
|
332
|
+
if protocol == "ollama":
|
|
333
|
+
async for ev in self._stream_ollama(url, messages, tools, temp, mtoks, cap, cancel_event):
|
|
334
|
+
yield ev
|
|
335
|
+
else:
|
|
336
|
+
async for ev in self._stream_openai(url, messages, tools, temp, mtoks, cap, cancel_event):
|
|
337
|
+
yield ev
|
|
338
|
+
|
|
339
|
+
# ── Ollama protocol ────────────────────────────────────────────────────
|
|
340
|
+
|
|
341
|
+
async def _stream_ollama(self, url, messages, tools, temp, max_tokens, cap, cancel_event):
|
|
342
|
+
# Re-resolve the model async before every call — handles the case where
|
|
343
|
+
# the model was changed or wasn't available at construction time.
|
|
344
|
+
base_url = url.rsplit("/api/chat", 1)[0]
|
|
345
|
+
model = await resolve_model_async(base_url, self.model)
|
|
346
|
+
if model != self.model:
|
|
347
|
+
self.model = model # keep in sync for future calls
|
|
348
|
+
self._capability = None # reset capability cache for new model
|
|
349
|
+
|
|
350
|
+
native_tools = build_ollama_tool_payload(tools or [], self.model) if tools else None
|
|
351
|
+
payload: Dict[str, Any] = {
|
|
352
|
+
"model": self.model,
|
|
353
|
+
"messages": messages,
|
|
354
|
+
"stream": True,
|
|
355
|
+
"options": {
|
|
356
|
+
"num_ctx": cap.context_window,
|
|
357
|
+
"temperature": temp,
|
|
358
|
+
"top_p": cap.top_p,
|
|
359
|
+
"repeat_penalty": 1.15,
|
|
360
|
+
"repeat_last_n": 128,
|
|
361
|
+
"num_predict": max_tokens,
|
|
362
|
+
},
|
|
363
|
+
}
|
|
364
|
+
if native_tools:
|
|
365
|
+
payload["tools"] = native_tools
|
|
366
|
+
|
|
367
|
+
usage = {"prompt_tokens": 0, "completion_tokens": 0}
|
|
368
|
+
full_text = ""
|
|
369
|
+
|
|
370
|
+
try:
|
|
371
|
+
async with aiohttp.ClientSession() as sess:
|
|
372
|
+
async with sess.post(url, json=payload,
|
|
373
|
+
timeout=aiohttp.ClientTimeout(total=self.timeout)) as resp:
|
|
374
|
+
if resp.status != 200:
|
|
375
|
+
body = await resp.text()
|
|
376
|
+
yield {"type": "error", "message": f"Ollama HTTP {resp.status}: {body[:200]}"}
|
|
377
|
+
return
|
|
378
|
+
|
|
379
|
+
async for line in resp.content:
|
|
380
|
+
if cancel_event and cancel_event.is_set():
|
|
381
|
+
yield {"type": "done", "usage": usage, "stop_reason": "cancelled"}
|
|
382
|
+
return
|
|
383
|
+
|
|
384
|
+
text = line.decode("utf-8", errors="ignore").strip()
|
|
385
|
+
if not text:
|
|
386
|
+
continue
|
|
387
|
+
try:
|
|
388
|
+
data = json.loads(text)
|
|
389
|
+
except json.JSONDecodeError:
|
|
390
|
+
continue
|
|
391
|
+
|
|
392
|
+
msg = data.get("message", {})
|
|
393
|
+
|
|
394
|
+
# Native tool calls
|
|
395
|
+
if msg.get("tool_calls"):
|
|
396
|
+
for tc in msg["tool_calls"]:
|
|
397
|
+
fn = tc.get("function", {})
|
|
398
|
+
name = fn.get("name", "")
|
|
399
|
+
args = fn.get("arguments", {})
|
|
400
|
+
if isinstance(args, str):
|
|
401
|
+
try:
|
|
402
|
+
args = json.loads(args)
|
|
403
|
+
except Exception:
|
|
404
|
+
args = {}
|
|
405
|
+
if name:
|
|
406
|
+
yield {"type": "tool_call", "name": name,
|
|
407
|
+
"arguments": args, "id": tc.get("id", "")}
|
|
408
|
+
|
|
409
|
+
if data.get("done"):
|
|
410
|
+
usage["prompt_tokens"] += data.get("prompt_eval_count", 0)
|
|
411
|
+
usage["completion_tokens"] += data.get("eval_count", 0)
|
|
412
|
+
break
|
|
413
|
+
|
|
414
|
+
token = msg.get("content", "")
|
|
415
|
+
if token:
|
|
416
|
+
full_text += token
|
|
417
|
+
# Suppress tokens that are part of <tool_call> tags
|
|
418
|
+
if not full_text.lstrip().startswith("<tool_call"):
|
|
419
|
+
yield {"type": "token", "text": token}
|
|
420
|
+
|
|
421
|
+
except Exception as exc:
|
|
422
|
+
yield {"type": "error", "message": f"Ollama stream error: {exc}"}
|
|
423
|
+
return
|
|
424
|
+
|
|
425
|
+
# Fallback: parse text-based tool calls
|
|
426
|
+
text_calls = parse_tool_calls_from_response(full_text, model_name=self.model)
|
|
427
|
+
for tc in text_calls:
|
|
428
|
+
yield {"type": "tool_call", "name": tc["tool"],
|
|
429
|
+
"arguments": tc["params"], "id": ""}
|
|
430
|
+
|
|
431
|
+
yield {"type": "done", "usage": usage, "stop_reason": "stop"}
|
|
432
|
+
|
|
433
|
+
# ── OpenAI-compatible protocol ─────────────────────────────────────────
|
|
434
|
+
|
|
435
|
+
async def _stream_openai(self, url, messages, tools, temp, max_tokens, cap, cancel_event):
|
|
436
|
+
headers: Dict[str, str] = {"Content-Type": "application/json"}
|
|
437
|
+
if self.api_key:
|
|
438
|
+
headers["Authorization"] = f"Bearer {self.api_key}"
|
|
439
|
+
|
|
440
|
+
payload: Dict[str, Any] = {
|
|
441
|
+
"model": self.model,
|
|
442
|
+
"messages": messages,
|
|
443
|
+
"stream": True,
|
|
444
|
+
"temperature": temp,
|
|
445
|
+
"max_tokens": max_tokens,
|
|
446
|
+
}
|
|
447
|
+
if tools and cap.tool_calls:
|
|
448
|
+
payload["tools"] = tools
|
|
449
|
+
payload["tool_choice"] = "auto"
|
|
450
|
+
|
|
451
|
+
usage = {"prompt_tokens": 0, "completion_tokens": 0}
|
|
452
|
+
full_text = ""
|
|
453
|
+
tool_call_accumulator: Dict[int, Dict] = {} # index → partial call
|
|
454
|
+
|
|
455
|
+
try:
|
|
456
|
+
async with aiohttp.ClientSession(headers=headers) as sess:
|
|
457
|
+
async with sess.post(url, json=payload,
|
|
458
|
+
timeout=aiohttp.ClientTimeout(total=self.timeout)) as resp:
|
|
459
|
+
if resp.status != 200:
|
|
460
|
+
body = await resp.text()
|
|
461
|
+
yield {"type": "error", "message": f"HTTP {resp.status}: {body[:200]}"}
|
|
462
|
+
return
|
|
463
|
+
|
|
464
|
+
async for line in resp.content:
|
|
465
|
+
if cancel_event and cancel_event.is_set():
|
|
466
|
+
yield {"type": "done", "usage": usage, "stop_reason": "cancelled"}
|
|
467
|
+
return
|
|
468
|
+
|
|
469
|
+
raw = line.decode("utf-8", errors="ignore").strip()
|
|
470
|
+
if not raw or raw == "data: [DONE]":
|
|
471
|
+
continue
|
|
472
|
+
if raw.startswith("data: "):
|
|
473
|
+
raw = raw[6:]
|
|
474
|
+
try:
|
|
475
|
+
data = json.loads(raw)
|
|
476
|
+
except json.JSONDecodeError:
|
|
477
|
+
continue
|
|
478
|
+
|
|
479
|
+
# Usage from final chunk
|
|
480
|
+
if data.get("usage"):
|
|
481
|
+
u = data["usage"]
|
|
482
|
+
usage["prompt_tokens"] = u.get("prompt_tokens", 0)
|
|
483
|
+
usage["completion_tokens"] = u.get("completion_tokens", 0)
|
|
484
|
+
|
|
485
|
+
choices = data.get("choices", [])
|
|
486
|
+
if not choices:
|
|
487
|
+
continue
|
|
488
|
+
delta = choices[0].get("delta", {})
|
|
489
|
+
|
|
490
|
+
# Token
|
|
491
|
+
content = delta.get("content", "")
|
|
492
|
+
if content:
|
|
493
|
+
full_text += content
|
|
494
|
+
yield {"type": "token", "text": content}
|
|
495
|
+
|
|
496
|
+
# Tool calls (streamed fragments)
|
|
497
|
+
for tc_delta in (delta.get("tool_calls") or []):
|
|
498
|
+
idx = tc_delta.get("index", 0)
|
|
499
|
+
if idx not in tool_call_accumulator:
|
|
500
|
+
tool_call_accumulator[idx] = {
|
|
501
|
+
"id": "", "name": "", "arguments": ""}
|
|
502
|
+
acc = tool_call_accumulator[idx]
|
|
503
|
+
acc["id"] += tc_delta.get("id", "")
|
|
504
|
+
fn = tc_delta.get("function", {})
|
|
505
|
+
acc["name"] += fn.get("name", "")
|
|
506
|
+
acc["arguments"] += fn.get("arguments", "")
|
|
507
|
+
|
|
508
|
+
fin_reason = choices[0].get("finish_reason")
|
|
509
|
+
if fin_reason in ("stop", "tool_calls", "length"):
|
|
510
|
+
break
|
|
511
|
+
|
|
512
|
+
except Exception as exc:
|
|
513
|
+
yield {"type": "error", "message": f"OpenAI-compat stream error: {exc}"}
|
|
514
|
+
return
|
|
515
|
+
|
|
516
|
+
# Emit accumulated tool calls
|
|
517
|
+
for acc in tool_call_accumulator.values():
|
|
518
|
+
args_str = acc.get("arguments", "{}")
|
|
519
|
+
try:
|
|
520
|
+
args = json.loads(args_str)
|
|
521
|
+
except Exception:
|
|
522
|
+
args = {}
|
|
523
|
+
if acc.get("name"):
|
|
524
|
+
yield {"type": "tool_call", "name": acc["name"],
|
|
525
|
+
"arguments": args, "id": acc.get("id", "")}
|
|
526
|
+
|
|
527
|
+
# Fallback text-based tool call parsing (for non-native models)
|
|
528
|
+
if not tool_call_accumulator:
|
|
529
|
+
text_calls = parse_tool_calls_from_response(full_text, model_name=self.model)
|
|
530
|
+
for tc in text_calls:
|
|
531
|
+
yield {"type": "tool_call", "name": tc["tool"],
|
|
532
|
+
"arguments": tc["params"], "id": ""}
|
|
533
|
+
|
|
534
|
+
yield {"type": "done", "usage": usage, "stop_reason": "stop"}
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
# ---------------------------------------------------------------------------
|
|
538
|
+
# Quick availability probe (synchronous, for startup checks)
|
|
539
|
+
# ---------------------------------------------------------------------------
|
|
540
|
+
|
|
541
|
+
def probe_all_backends() -> Dict[str, bool]:
|
|
542
|
+
"""Return {backend: is_reachable} for all known backends."""
|
|
543
|
+
results = {}
|
|
544
|
+
for name, info in BACKEND_DEFAULTS.items():
|
|
545
|
+
url = info["default_url"]
|
|
546
|
+
probe = f"{url}/api/tags" if name == "ollama" else f"{url}/v1/models"
|
|
547
|
+
try:
|
|
548
|
+
with urllib.request.urlopen(probe, timeout=1) as r:
|
|
549
|
+
results[name] = r.status == 200
|
|
550
|
+
except Exception:
|
|
551
|
+
results[name] = False
|
|
552
|
+
return results
|