aria-code 4.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. agents/__init__.py +32 -0
  2. agents/base.py +190 -0
  3. agents/deep/__init__.py +37 -0
  4. agents/deep/calibration_loop.py +144 -0
  5. agents/deep/critic.py +125 -0
  6. agents/deep/deepen.py +193 -0
  7. agents/deep/models.py +149 -0
  8. agents/deep/pipeline.py +164 -0
  9. agents/deep/quant_fusion.py +192 -0
  10. agents/deep/themes.py +95 -0
  11. agents/deep/tiers.py +106 -0
  12. agents/financial/__init__.py +10 -0
  13. agents/financial/catalyst.py +279 -0
  14. agents/financial/debate.py +145 -0
  15. agents/financial/earnings.py +303 -0
  16. agents/financial/fundamental.py +159 -0
  17. agents/financial/macro.py +99 -0
  18. agents/financial/news.py +207 -0
  19. agents/financial/risk.py +132 -0
  20. agents/financial/sector.py +279 -0
  21. agents/financial/synthesis.py +274 -0
  22. agents/financial/technical.py +258 -0
  23. agents/portfolio_agent.py +333 -0
  24. agents/realty/__init__.py +62 -0
  25. agents/realty/asset_diagnosis.py +150 -0
  26. agents/realty/business_match.py +165 -0
  27. agents/realty/cashflow_verify.py +208 -0
  28. agents/realty/contract_rules.py +209 -0
  29. agents/realty/energy_anomaly.py +188 -0
  30. agents/realty/exit_settlement.py +207 -0
  31. agents/realty/fulfillment_risk.py +205 -0
  32. agents/realty/ops_optimize.py +159 -0
  33. agents/realty/revenue_share.py +214 -0
  34. agents/registry.py +144 -0
  35. agents/sports/__init__.py +0 -0
  36. agents/sports/football_agent.py +169 -0
  37. agents/team.py +289 -0
  38. aliyun_data_client.py +660 -0
  39. apps/README.md +12 -0
  40. apps/__init__.py +2 -0
  41. apps/channels/README.md +15 -0
  42. apps/cli/README.md +13 -0
  43. apps/cli/__init__.py +2 -0
  44. apps/cli/bootstrap.py +99 -0
  45. apps/cli/codegen_paths.py +29 -0
  46. apps/cli/commands/__init__.py +16 -0
  47. apps/cli/commands/analysis_cmds.py +288 -0
  48. apps/cli/commands/backtest_cmds.py +1887 -0
  49. apps/cli/commands/broker_cmds.py +1154 -0
  50. apps/cli/commands/business_workflow_cmds.py +289 -0
  51. apps/cli/commands/catalog.py +84 -0
  52. apps/cli/commands/data_cmds.py +405 -0
  53. apps/cli/commands/diagnostic_cmds.py +179 -0
  54. apps/cli/commands/diagnostic_ops_cmds.py +696 -0
  55. apps/cli/commands/finance_render.py +12 -0
  56. apps/cli/commands/market.py +399 -0
  57. apps/cli/commands/market_cmds.py +1276 -0
  58. apps/cli/commands/market_context.py +425 -0
  59. apps/cli/commands/market_render.py +7 -0
  60. apps/cli/commands/model_cmds.py +1579 -0
  61. apps/cli/commands/ops_cmds.py +668 -0
  62. apps/cli/commands/portfolio_cmds.py +962 -0
  63. apps/cli/commands/report.py +377 -0
  64. apps/cli/commands/scaffold_templates.py +617 -0
  65. apps/cli/commands/session_cmds.py +179 -0
  66. apps/cli/commands/session_ux_cmds.py +280 -0
  67. apps/cli/commands/team.py +588 -0
  68. apps/cli/commands/team_render.py +8 -0
  69. apps/cli/commands/ui_cmds.py +358 -0
  70. apps/cli/commands/workflow_cmds.py +279 -0
  71. apps/cli/commands/workspace_cmds.py +1414 -0
  72. apps/cli/config_paths.py +70 -0
  73. apps/cli/config_store.py +61 -0
  74. apps/cli/deterministic.py +122 -0
  75. apps/cli/direct.py +48 -0
  76. apps/cli/github_app_auth.py +135 -0
  77. apps/cli/handlers/__init__.py +11 -0
  78. apps/cli/handlers/broker_handlers.py +122 -0
  79. apps/cli/handlers/chart_handlers.py +1309 -0
  80. apps/cli/handlers/market_handlers.py +2509 -0
  81. apps/cli/handlers/realty_handlers.py +114 -0
  82. apps/cli/handlers/strategy_advice.py +82 -0
  83. apps/cli/hooks.py +180 -0
  84. apps/cli/i18n.py +284 -0
  85. apps/cli/intent.py +136 -0
  86. apps/cli/intent_router.py +217 -0
  87. apps/cli/lifecycle_hooks.py +48 -0
  88. apps/cli/main.py +29 -0
  89. apps/cli/market_metadata.py +135 -0
  90. apps/cli/market_universe.py +265 -0
  91. apps/cli/message_processing.py +257 -0
  92. apps/cli/plan_mode.py +139 -0
  93. apps/cli/plotly_html.py +15 -0
  94. apps/cli/prediction_feedback.py +202 -0
  95. apps/cli/preflight.py +497 -0
  96. apps/cli/project_aria.py +60 -0
  97. apps/cli/prompts/__init__.py +0 -0
  98. apps/cli/prompts/coding.py +658 -0
  99. apps/cli/prompts/system_prompts.py +531 -0
  100. apps/cli/prompts/ui.py +434 -0
  101. apps/cli/providers/__init__.py +1 -0
  102. apps/cli/providers/base.py +271 -0
  103. apps/cli/providers/chat_routing.py +80 -0
  104. apps/cli/providers/llm/__init__.py +1 -0
  105. apps/cli/providers/llm/ollama_stream.py +1170 -0
  106. apps/cli/providers/llm/sse_stream.py +216 -0
  107. apps/cli/providers/runtime_bridge.py +185 -0
  108. apps/cli/runtime_consumer.py +489 -0
  109. apps/cli/session_export.py +87 -0
  110. apps/cli/session_jsonl.py +207 -0
  111. apps/cli/session_store.py +112 -0
  112. apps/cli/todo_tracker.py +190 -0
  113. apps/cli/tools/__init__.py +40 -0
  114. apps/cli/tools/context.py +46 -0
  115. apps/cli/tools/file_tools.py +112 -0
  116. apps/cli/tools/market_tools.py +549 -0
  117. apps/cli/tools/notebook_tools.py +111 -0
  118. apps/cli/tools/system_tools.py +669 -0
  119. apps/cli/tools/write_tools.py +715 -0
  120. apps/cli/tradingview_bridge.py +434 -0
  121. apps/cli/update_check.py +152 -0
  122. apps/cli/utils/__init__.py +0 -0
  123. apps/cli/utils/market_detect.py +1578 -0
  124. apps/daemon/README.md +14 -0
  125. apps/vscode/README.md +115 -0
  126. apps/vscode/package.json +70 -0
  127. aria_cli.py +11636 -0
  128. aria_code-4.1.3.dist-info/METADATA +952 -0
  129. aria_code-4.1.3.dist-info/RECORD +284 -0
  130. aria_code-4.1.3.dist-info/WHEEL +5 -0
  131. aria_code-4.1.3.dist-info/entry_points.txt +2 -0
  132. aria_code-4.1.3.dist-info/licenses/LICENSE +121 -0
  133. aria_code-4.1.3.dist-info/top_level.txt +50 -0
  134. aria_daemon.py +1295 -0
  135. aria_feishu_bot.py +1359 -0
  136. aria_relay_client.py +182 -0
  137. aria_relay_server.py +405 -0
  138. aria_telegram_bot.py +202 -0
  139. ariarc.py +328 -0
  140. artifacts.py +491 -0
  141. backtest_report.py +472 -0
  142. brokers/__init__.py +72 -0
  143. brokers/base.py +207 -0
  144. brokers/capabilities.py +264 -0
  145. brokers/cn/__init__.py +10 -0
  146. brokers/cn/easytrader_broker.py +193 -0
  147. brokers/cn/futu_broker.py +194 -0
  148. brokers/cn/longbridge_broker.py +190 -0
  149. brokers/cn/tiger_broker.py +196 -0
  150. brokers/cn/xtquant_broker.py +175 -0
  151. brokers/config.py +364 -0
  152. brokers/intl/__init__.py +5 -0
  153. brokers/intl/alpaca_broker.py +183 -0
  154. brokers/intl/ibkr_broker.py +215 -0
  155. brokers/intl/webull_broker.py +156 -0
  156. brokers/paper_broker.py +259 -0
  157. brokers/planning.py +296 -0
  158. brokers/registry.py +181 -0
  159. brokers/trading.py +237 -0
  160. change_store.py +127 -0
  161. command_safety.py +19 -0
  162. computer_use_tools.py +504 -0
  163. dashboard_generator.py +578 -0
  164. data_analysis_tools.py +808 -0
  165. data_cleaner.py +483 -0
  166. data_service.py +481 -0
  167. datasources/__init__.py +23 -0
  168. datasources/base.py +166 -0
  169. datasources/router.py +221 -0
  170. datasources/sources/__init__.py +15 -0
  171. datasources/sources/akshare_source.py +269 -0
  172. datasources/sources/alpha_vantage_source.py +202 -0
  173. datasources/sources/edgar_source.py +218 -0
  174. datasources/sources/finnhub_source.py +197 -0
  175. datasources/sources/fred_source.py +219 -0
  176. datasources/sources/tushare_source.py +141 -0
  177. datasources/sources/web_scraper_source.py +278 -0
  178. datasources/sources/world_bank_source.py +205 -0
  179. datasources/sources/yfinance_source.py +152 -0
  180. demo_player.py +204 -0
  181. doctor.py +508 -0
  182. file_analysis_tools.py +734 -0
  183. finance_formulas.py +389 -0
  184. football_data_client.py +1670 -0
  185. intent_classifier.py +358 -0
  186. local_finance_tools.py +3221 -0
  187. local_llm_provider.py +552 -0
  188. macro_tools.py +368 -0
  189. market_data_client.py +1899 -0
  190. mcp_client.py +506 -0
  191. memory_manager.py +245 -0
  192. model_capability.py +416 -0
  193. notification_tools.py +248 -0
  194. packages/__init__.py +23 -0
  195. packages/aria_agents/__init__.py +5 -0
  196. packages/aria_agents/manifest.py +69 -0
  197. packages/aria_core/__init__.py +34 -0
  198. packages/aria_core/architecture.py +192 -0
  199. packages/aria_core/export.py +124 -0
  200. packages/aria_core/manifest.py +65 -0
  201. packages/aria_infra/__init__.py +15 -0
  202. packages/aria_infra/arthera.py +52 -0
  203. packages/aria_infra/doctor.py +246 -0
  204. packages/aria_infra/product.py +37 -0
  205. packages/aria_mcp/__init__.py +25 -0
  206. packages/aria_mcp/bridge.py +38 -0
  207. packages/aria_mcp/config.py +97 -0
  208. packages/aria_mcp/tools.py +61 -0
  209. packages/aria_sdk/__init__.py +19 -0
  210. packages/aria_sdk/client.py +396 -0
  211. packages/aria_sdk/providers.py +70 -0
  212. packages/aria_sdk/streaming.py +73 -0
  213. packages/aria_sdk/types.py +86 -0
  214. packages/aria_services/__init__.py +55 -0
  215. packages/aria_services/context.py +258 -0
  216. packages/aria_services/data.py +11 -0
  217. packages/aria_services/provider_health.py +189 -0
  218. packages/aria_services/registry.py +213 -0
  219. packages/aria_services/usage.py +138 -0
  220. packages/aria_skills/__init__.py +5 -0
  221. packages/aria_skills/registry.py +59 -0
  222. packages/aria_tools/__init__.py +5 -0
  223. packages/aria_tools/registry.py +128 -0
  224. packages/quant_engine/__init__.py +6 -0
  225. packages/quant_engine/sports/__init__.py +72 -0
  226. packages/quant_engine/sports/calibrator.py +353 -0
  227. packages/quant_engine/sports/dixon_coles.py +234 -0
  228. packages/quant_engine/sports/elo.py +299 -0
  229. packages/quant_engine/sports/form.py +188 -0
  230. packages/quant_engine/sports/h2h.py +195 -0
  231. packages/quant_engine/sports/ml_model.py +354 -0
  232. packages/quant_engine/sports/predictor.py +311 -0
  233. packages/quant_engine/sports/tracker.py +664 -0
  234. packages/quant_engine/stochastic/__init__.py +27 -0
  235. packages/quant_engine/stochastic/gbm_enhanced.py +195 -0
  236. packages/quant_engine/stochastic/ito_calculus.py +477 -0
  237. packages/quant_engine/stochastic/kelly_criterion.py +181 -0
  238. packages/quant_engine/stochastic/monte_carlo_advanced.py +95 -0
  239. packages/quant_engine/stochastic/options_pricing.py +573 -0
  240. packages/quant_engine/stochastic/stochastic_processes.py +90 -0
  241. plan_utils.py +194 -0
  242. plugin_loader.py +328 -0
  243. portfolio_ledger.py +262 -0
  244. privacy/__init__.py +5 -0
  245. privacy/feedback.py +123 -0
  246. project_tools.py +525 -0
  247. providers/__init__.py +30 -0
  248. providers/llm/__init__.py +19 -0
  249. providers/llm/anthropic.py +184 -0
  250. providers/llm/base.py +139 -0
  251. providers/llm/ollama.py +128 -0
  252. providers/llm/openai_compat.py +282 -0
  253. providers/llm/registry.py +358 -0
  254. realty_data_tools.py +659 -0
  255. report_generator.py +1314 -0
  256. runtime/__init__.py +103 -0
  257. runtime/agent_loop.py +1183 -0
  258. runtime/approval.py +51 -0
  259. runtime/events.py +102 -0
  260. runtime/gateway.py +128 -0
  261. runtime/lsp.py +346 -0
  262. runtime/subagent.py +258 -0
  263. runtime/tool_executor.py +104 -0
  264. runtime/tool_policy.py +106 -0
  265. safety/__init__.py +21 -0
  266. safety/permissions.py +275 -0
  267. setup_wizard.py +653 -0
  268. strategy_vault.py +420 -0
  269. ui/__init__.py +100 -0
  270. ui/banner.py +310 -0
  271. ui/completer.py +391 -0
  272. ui/console.py +271 -0
  273. ui/image_render.py +243 -0
  274. ui/input_box.py +376 -0
  275. ui/picker.py +195 -0
  276. ui/render/__init__.py +11 -0
  277. ui/render/finance.py +1480 -0
  278. ui/render/market.py +225 -0
  279. ui/render/output.py +681 -0
  280. ui/render/team.py +346 -0
  281. ui/robot.py +235 -0
  282. workspace/__init__.py +6 -0
  283. workspace/files.py +170 -0
  284. workspace/verify.py +113 -0
local_llm_provider.py ADDED
@@ -0,0 +1,552 @@
1
+ """
2
+ local_llm_provider.py — Unified async streaming interface for local LLM backends.
3
+
4
+ Supported backends
5
+ ------------------
6
+ ollama → http://localhost:11434 (Ollama /api/chat)
7
+ lmstudio → http://localhost:1234 (LM Studio /v1/chat/completions)
8
+ vllm → http://localhost:8000 (vLLM /v1/chat/completions)
9
+ llamacpp → http://localhost:8080 (llama.cpp server /v1/chat/completions)
10
+ jan → http://localhost:1337 (Jan /v1/chat/completions)
11
+ openai → https://api.openai.com (OpenAI-compatible proxy)
12
+
13
+ All non-Ollama backends speak the OpenAI /v1/chat/completions SSE format.
14
+
15
+ Usage::
16
+
17
+ provider = LocalLLMProvider.from_config(config)
18
+ async for event in provider.stream(messages, tools=schemas):
19
+ if event["type"] == "token":
20
+ print(event["text"], end="", flush=True)
21
+ elif event["type"] == "tool_call":
22
+ handle_tool(event["name"], event["arguments"])
23
+ elif event["type"] == "done":
24
+ break
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import json
30
+ import os
31
+ import re
32
+ import urllib.request
33
+ from dataclasses import dataclass, field
34
+ from typing import Any, AsyncIterator, Dict, List, Optional
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Model resolution helpers
38
+ # ---------------------------------------------------------------------------
39
+
40
+ # Preferred model-name prefixes, highest priority first.
41
+ _PREFERRED_PREFIXES = [
42
+ "aria-", # native Aria models (future)
43
+ "qwen2.5:", # full qwen2.5 series
44
+ "qwen2.5-coder:", # coder variant (great for finance code tasks)
45
+ "qwen2.5", # any qwen2.5 variant without explicit tag
46
+ "qwen", # any qwen
47
+ "deepseek", # DeepSeek family
48
+ "gpt-oss", # locally-hosted GPT-compatible
49
+ "llama", # Meta Llama family
50
+ "mistral", # Mistral family
51
+ "gemma", # Google Gemma family
52
+ "phi", # Microsoft Phi
53
+ "command", # Cohere Command
54
+ ]
55
+
56
+ _model_cache: Dict[str, str] = {} # base_url → resolved model name
57
+ _cache_ts: Dict[str, float] = {}
58
+ _CACHE_TTL = 60.0 # seconds
59
+
60
+
61
+ def resolve_model_sync(base_url: str, requested: str) -> str:
62
+ """
63
+ Synchronously resolve the best available Ollama model.
64
+
65
+ Resolution order:
66
+ 1. Exact match with *requested*
67
+ 2. Same family prefix (e.g. "aria-sonata" → any model starting with "aria-")
68
+ 3. Priority-prefix list (_PREFERRED_PREFIXES)
69
+ 4. First model returned by /api/tags
70
+ 5. *requested* as-is (let Ollama surface the real error)
71
+ """
72
+ import time
73
+ key = f"{base_url}::{requested}"
74
+ now = time.time()
75
+ if key in _model_cache and now - _cache_ts.get(key, 0) < _CACHE_TTL:
76
+ return _model_cache[key]
77
+
78
+ tags_url = base_url.rstrip("/") + "/api/tags"
79
+ try:
80
+ with urllib.request.urlopen(tags_url, timeout=3) as r:
81
+ data = json.loads(r.read())
82
+ available: List[str] = [m["name"] for m in data.get("models", [])]
83
+ except Exception:
84
+ return requested # Ollama unreachable — pass through
85
+
86
+ if not available:
87
+ return requested
88
+
89
+ resolved = _pick_model(available, requested)
90
+ _model_cache[key] = resolved
91
+ _cache_ts[key] = now
92
+ return resolved
93
+
94
+
95
+ async def resolve_model_async(base_url: str, requested: str) -> str:
96
+ """Async variant of resolve_model_sync (uses aiohttp if available)."""
97
+ import time
98
+ key = f"{base_url}::{requested}"
99
+ now = time.time()
100
+ if key in _model_cache and now - _cache_ts.get(key, 0) < _CACHE_TTL:
101
+ return _model_cache[key]
102
+
103
+ tags_url = base_url.rstrip("/") + "/api/tags"
104
+ try:
105
+ if _HAS_AIOHTTP:
106
+ async with aiohttp.ClientSession() as s:
107
+ async with s.get(tags_url, timeout=aiohttp.ClientTimeout(total=3)) as r:
108
+ data = await r.json()
109
+ else:
110
+ with urllib.request.urlopen(tags_url, timeout=3) as r:
111
+ data = json.loads(r.read())
112
+ available: List[str] = [m["name"] for m in data.get("models", [])]
113
+ except Exception:
114
+ return requested
115
+
116
+ if not available:
117
+ return requested
118
+
119
+ resolved = _pick_model(available, requested)
120
+ _model_cache[key] = resolved
121
+ _cache_ts[key] = now
122
+ return resolved
123
+
124
+
125
+ def _pick_model(available: List[str], requested: str) -> str:
126
+ """Choose the best model from *available* given *requested*."""
127
+ # 1. Exact match
128
+ if requested in available:
129
+ return requested
130
+
131
+ # 2. Same family (strip tag, match prefix)
132
+ family = requested.split(":")[0]
133
+ hit = next((m for m in available if m.startswith(family)), None)
134
+ if hit:
135
+ return hit
136
+
137
+ # 3. Priority-prefix list
138
+ for prefix in _PREFERRED_PREFIXES:
139
+ hit = next((m for m in available if m.startswith(prefix)), None)
140
+ if hit:
141
+ return hit
142
+
143
+ # 4. First available
144
+ return available[0]
145
+
146
+
147
+ def list_ollama_models(base_url: str) -> List[str]:
148
+ """Return all model names from Ollama /api/tags (sync)."""
149
+ try:
150
+ with urllib.request.urlopen(base_url.rstrip("/") + "/api/tags", timeout=3) as r:
151
+ return [m["name"] for m in json.loads(r.read()).get("models", [])]
152
+ except Exception:
153
+ return []
154
+
155
+ try:
156
+ import aiohttp
157
+ _HAS_AIOHTTP = True
158
+ except ImportError:
159
+ _HAS_AIOHTTP = False
160
+
161
+ from model_capability import (
162
+ ModelCapability,
163
+ get_model_capability,
164
+ build_ollama_tool_payload,
165
+ build_tool_system_prompt,
166
+ parse_tool_calls_from_response,
167
+ )
168
+
169
+
170
+ # ---------------------------------------------------------------------------
171
+ # Backend definitions
172
+ # ---------------------------------------------------------------------------
173
+
174
+ BACKEND_DEFAULTS: Dict[str, Dict[str, Any]] = {
175
+ "ollama": {"default_url": "http://localhost:11434", "api_path": "/api/chat", "protocol": "ollama"},
176
+ "lmstudio": {"default_url": "http://localhost:1234", "api_path": "/v1/chat/completions", "protocol": "openai"},
177
+ "vllm": {"default_url": "http://localhost:8000", "api_path": "/v1/chat/completions", "protocol": "openai"},
178
+ "llamacpp": {"default_url": "http://localhost:8080", "api_path": "/v1/chat/completions", "protocol": "openai"},
179
+ "jan": {"default_url": "http://localhost:1337", "api_path": "/v1/chat/completions", "protocol": "openai"},
180
+ "openai": {"default_url": "https://api.openai.com", "api_path": "/v1/chat/completions", "protocol": "openai"},
181
+ # Cloud providers (API key required)
182
+ "deepseek": {"default_url": "https://api.deepseek.com", "api_path": "/v1/chat/completions", "protocol": "openai"},
183
+ "groq": {"default_url": "https://api.groq.com/openai", "api_path": "/v1/chat/completions", "protocol": "openai"},
184
+ "anthropic": {"default_url": "https://api.anthropic.com", "api_path": "/v1/messages", "protocol": "anthropic"},
185
+ "together": {"default_url": "https://api.together.xyz", "api_path": "/v1/chat/completions", "protocol": "openai"},
186
+ "siliconflow": {"default_url": "https://api.siliconflow.cn", "api_path": "/v1/chat/completions", "protocol": "openai"},
187
+ "moonshot": {"default_url": "https://api.moonshot.cn/v1", "api_path": "/chat/completions", "protocol": "openai"},
188
+ # Custom user-defined OpenAI-compatible endpoint
189
+ "custom": {"default_url": "", "api_path": "/chat/completions", "protocol": "openai"},
190
+ }
191
+
192
+
193
+ @dataclass
194
+ class LocalLLMProvider:
195
+ backend: str = "ollama" # one of BACKEND_DEFAULTS keys
196
+ base_url: str = "" # override; blank → use BACKEND_DEFAULTS default
197
+ model: str = "qwen2.5-coder:7b"
198
+ api_key: str = "" # needed for openai-compatible remotes
199
+ timeout: int = 300
200
+
201
+ # Derived from model_capability on first use
202
+ _capability: Optional[ModelCapability] = field(default=None, repr=False)
203
+
204
+ # ── constructor ────────────────────────────────────────────────────────
205
+
206
+ @classmethod
207
+ def from_config(cls, config: Dict[str, Any]) -> "LocalLLMProvider":
208
+ """Build from aria-code config dict."""
209
+ backend = config.get("local_provider", "ollama").lower()
210
+ info = BACKEND_DEFAULTS.get(backend, BACKEND_DEFAULTS["ollama"])
211
+ requested_model = config.get("model", "qwen2.5-coder:1.5b")
212
+
213
+ # Custom endpoint: user-specified base_url via /config set custom_endpoint=...
214
+ if backend == "custom":
215
+ url = config.get("custom_endpoint", "") or info["default_url"]
216
+ model = config.get("custom_model", requested_model)
217
+ api_key = config.get("local_api_key", os.getenv("LOCAL_LLM_API_KEY", ""))
218
+ return cls(backend=backend, base_url=url, model=model, api_key=api_key)
219
+
220
+ # Cloud provider backends: read API key from providers.json or env var
221
+ _cloud_env_map = {
222
+ "deepseek": "DEEPSEEK_API_KEY",
223
+ "openai": "OPENAI_API_KEY",
224
+ "anthropic": "ANTHROPIC_API_KEY",
225
+ "groq": "GROQ_API_KEY",
226
+ "together": "TOGETHER_API_KEY",
227
+ "siliconflow": "SILICONFLOW_API_KEY",
228
+ "moonshot": "MOONSHOT_API_KEY",
229
+ }
230
+ if backend in _cloud_env_map:
231
+ env_var = _cloud_env_map[backend]
232
+ api_key = (os.getenv(env_var, "")
233
+ or config.get("local_api_key", "")
234
+ or os.getenv("LOCAL_LLM_API_KEY", ""))
235
+ url = config.get("custom_endpoint") or info["default_url"]
236
+ return cls(backend=backend, base_url=url, model=requested_model, api_key=api_key)
237
+
238
+ url = config.get("local_url") or config.get("ollama_url") or info["default_url"]
239
+ api_key = config.get("local_api_key", os.getenv("LOCAL_LLM_API_KEY", ""))
240
+
241
+ # Resolve model at construction time so later callers always get a
242
+ # valid model name (Ollama-only: other backends manage their own catalog).
243
+ if backend == "ollama":
244
+ model = resolve_model_sync(url, requested_model)
245
+ else:
246
+ model = requested_model
247
+
248
+ return cls(backend=backend, base_url=url, model=model, api_key=api_key)
249
+
250
+ # ── capability ─────────────────────────────────────────────────────────
251
+
252
+ @property
253
+ def capability(self) -> ModelCapability:
254
+ if self._capability is None:
255
+ self._capability = get_model_capability(self.model)
256
+ return self._capability
257
+
258
+ # ── health check ───────────────────────────────────────────────────────
259
+
260
+ def is_available(self) -> bool:
261
+ """Quick synchronous check that the backend is reachable."""
262
+ info = BACKEND_DEFAULTS.get(self.backend, BACKEND_DEFAULTS["ollama"])
263
+ url = (self.base_url or info["default_url"]).rstrip("/")
264
+ probe = f"{url}/api/tags" if self.backend == "ollama" else f"{url}/v1/models"
265
+ try:
266
+ with urllib.request.urlopen(probe, timeout=3) as r:
267
+ return r.status == 200
268
+ except Exception:
269
+ return False
270
+
271
+ def list_models(self) -> List[str]:
272
+ """Return available model names from backend."""
273
+ info = BACKEND_DEFAULTS.get(self.backend, BACKEND_DEFAULTS["ollama"])
274
+ url = (self.base_url or info["default_url"]).rstrip("/")
275
+ try:
276
+ if self.backend == "ollama":
277
+ with urllib.request.urlopen(f"{url}/api/tags", timeout=3) as r:
278
+ data = json.loads(r.read())
279
+ return [m["name"] for m in data.get("models", [])]
280
+ else:
281
+ req = urllib.request.Request(f"{url}/v1/models")
282
+ if self.api_key:
283
+ req.add_header("Authorization", f"Bearer {self.api_key}")
284
+ with urllib.request.urlopen(req, timeout=3) as r:
285
+ data = json.loads(r.read())
286
+ return [m["id"] for m in data.get("data", [])]
287
+ except Exception:
288
+ return []
289
+
290
+ # ── streaming core ─────────────────────────────────────────────────────
291
+
292
+ async def stream(
293
+ self,
294
+ messages: List[Dict[str, Any]],
295
+ tools: Optional[List[Dict]] = None,
296
+ *,
297
+ temperature: Optional[float] = None,
298
+ max_tokens: Optional[int] = None,
299
+ cancel_event=None,
300
+ ) -> AsyncIterator[Dict[str, Any]]:
301
+ """
302
+ Unified async generator. Yields event dicts::
303
+
304
+ {"type": "token", "text": str}
305
+ {"type": "thinking", "text": str} # for reasoning models
306
+ {"type": "tool_call", "name": str, "arguments": dict, "id": str}
307
+ {"type": "done", "usage": dict, "stop_reason": str}
308
+ {"type": "error", "message": str}
309
+ """
310
+ if not _HAS_AIOHTTP:
311
+ yield {"type": "error", "message": "aiohttp not installed: pip install aiohttp"}
312
+ return
313
+
314
+ info = BACKEND_DEFAULTS.get(self.backend, BACKEND_DEFAULTS["ollama"])
315
+ url = (self.base_url or info["default_url"]).rstrip("/") + info["api_path"]
316
+ protocol = info["protocol"]
317
+ cap = self.capability
318
+
319
+ temp = temperature if temperature is not None else cap.temperature
320
+ mtoks = max_tokens if max_tokens is not None else 4096
321
+
322
+ # Inject tool system prompt for text-only models
323
+ if tools and not cap.tool_calls:
324
+ tool_sys = build_tool_system_prompt(tools, self.model)
325
+ if tool_sys:
326
+ sys_msgs = [m for m in messages if m.get("role") == "system"]
327
+ if sys_msgs:
328
+ messages[0]["content"] = messages[0]["content"] + tool_sys
329
+ else:
330
+ messages = [{"role": "system", "content": tool_sys.strip()}] + messages
331
+
332
+ if protocol == "ollama":
333
+ async for ev in self._stream_ollama(url, messages, tools, temp, mtoks, cap, cancel_event):
334
+ yield ev
335
+ else:
336
+ async for ev in self._stream_openai(url, messages, tools, temp, mtoks, cap, cancel_event):
337
+ yield ev
338
+
339
+ # ── Ollama protocol ────────────────────────────────────────────────────
340
+
341
+ async def _stream_ollama(self, url, messages, tools, temp, max_tokens, cap, cancel_event):
342
+ # Re-resolve the model async before every call — handles the case where
343
+ # the model was changed or wasn't available at construction time.
344
+ base_url = url.rsplit("/api/chat", 1)[0]
345
+ model = await resolve_model_async(base_url, self.model)
346
+ if model != self.model:
347
+ self.model = model # keep in sync for future calls
348
+ self._capability = None # reset capability cache for new model
349
+
350
+ native_tools = build_ollama_tool_payload(tools or [], self.model) if tools else None
351
+ payload: Dict[str, Any] = {
352
+ "model": self.model,
353
+ "messages": messages,
354
+ "stream": True,
355
+ "options": {
356
+ "num_ctx": cap.context_window,
357
+ "temperature": temp,
358
+ "top_p": cap.top_p,
359
+ "repeat_penalty": 1.15,
360
+ "repeat_last_n": 128,
361
+ "num_predict": max_tokens,
362
+ },
363
+ }
364
+ if native_tools:
365
+ payload["tools"] = native_tools
366
+
367
+ usage = {"prompt_tokens": 0, "completion_tokens": 0}
368
+ full_text = ""
369
+
370
+ try:
371
+ async with aiohttp.ClientSession() as sess:
372
+ async with sess.post(url, json=payload,
373
+ timeout=aiohttp.ClientTimeout(total=self.timeout)) as resp:
374
+ if resp.status != 200:
375
+ body = await resp.text()
376
+ yield {"type": "error", "message": f"Ollama HTTP {resp.status}: {body[:200]}"}
377
+ return
378
+
379
+ async for line in resp.content:
380
+ if cancel_event and cancel_event.is_set():
381
+ yield {"type": "done", "usage": usage, "stop_reason": "cancelled"}
382
+ return
383
+
384
+ text = line.decode("utf-8", errors="ignore").strip()
385
+ if not text:
386
+ continue
387
+ try:
388
+ data = json.loads(text)
389
+ except json.JSONDecodeError:
390
+ continue
391
+
392
+ msg = data.get("message", {})
393
+
394
+ # Native tool calls
395
+ if msg.get("tool_calls"):
396
+ for tc in msg["tool_calls"]:
397
+ fn = tc.get("function", {})
398
+ name = fn.get("name", "")
399
+ args = fn.get("arguments", {})
400
+ if isinstance(args, str):
401
+ try:
402
+ args = json.loads(args)
403
+ except Exception:
404
+ args = {}
405
+ if name:
406
+ yield {"type": "tool_call", "name": name,
407
+ "arguments": args, "id": tc.get("id", "")}
408
+
409
+ if data.get("done"):
410
+ usage["prompt_tokens"] += data.get("prompt_eval_count", 0)
411
+ usage["completion_tokens"] += data.get("eval_count", 0)
412
+ break
413
+
414
+ token = msg.get("content", "")
415
+ if token:
416
+ full_text += token
417
+ # Suppress tokens that are part of <tool_call> tags
418
+ if not full_text.lstrip().startswith("<tool_call"):
419
+ yield {"type": "token", "text": token}
420
+
421
+ except Exception as exc:
422
+ yield {"type": "error", "message": f"Ollama stream error: {exc}"}
423
+ return
424
+
425
+ # Fallback: parse text-based tool calls
426
+ text_calls = parse_tool_calls_from_response(full_text, model_name=self.model)
427
+ for tc in text_calls:
428
+ yield {"type": "tool_call", "name": tc["tool"],
429
+ "arguments": tc["params"], "id": ""}
430
+
431
+ yield {"type": "done", "usage": usage, "stop_reason": "stop"}
432
+
433
+ # ── OpenAI-compatible protocol ─────────────────────────────────────────
434
+
435
+ async def _stream_openai(self, url, messages, tools, temp, max_tokens, cap, cancel_event):
436
+ headers: Dict[str, str] = {"Content-Type": "application/json"}
437
+ if self.api_key:
438
+ headers["Authorization"] = f"Bearer {self.api_key}"
439
+
440
+ payload: Dict[str, Any] = {
441
+ "model": self.model,
442
+ "messages": messages,
443
+ "stream": True,
444
+ "temperature": temp,
445
+ "max_tokens": max_tokens,
446
+ }
447
+ if tools and cap.tool_calls:
448
+ payload["tools"] = tools
449
+ payload["tool_choice"] = "auto"
450
+
451
+ usage = {"prompt_tokens": 0, "completion_tokens": 0}
452
+ full_text = ""
453
+ tool_call_accumulator: Dict[int, Dict] = {} # index → partial call
454
+
455
+ try:
456
+ async with aiohttp.ClientSession(headers=headers) as sess:
457
+ async with sess.post(url, json=payload,
458
+ timeout=aiohttp.ClientTimeout(total=self.timeout)) as resp:
459
+ if resp.status != 200:
460
+ body = await resp.text()
461
+ yield {"type": "error", "message": f"HTTP {resp.status}: {body[:200]}"}
462
+ return
463
+
464
+ async for line in resp.content:
465
+ if cancel_event and cancel_event.is_set():
466
+ yield {"type": "done", "usage": usage, "stop_reason": "cancelled"}
467
+ return
468
+
469
+ raw = line.decode("utf-8", errors="ignore").strip()
470
+ if not raw or raw == "data: [DONE]":
471
+ continue
472
+ if raw.startswith("data: "):
473
+ raw = raw[6:]
474
+ try:
475
+ data = json.loads(raw)
476
+ except json.JSONDecodeError:
477
+ continue
478
+
479
+ # Usage from final chunk
480
+ if data.get("usage"):
481
+ u = data["usage"]
482
+ usage["prompt_tokens"] = u.get("prompt_tokens", 0)
483
+ usage["completion_tokens"] = u.get("completion_tokens", 0)
484
+
485
+ choices = data.get("choices", [])
486
+ if not choices:
487
+ continue
488
+ delta = choices[0].get("delta", {})
489
+
490
+ # Token
491
+ content = delta.get("content", "")
492
+ if content:
493
+ full_text += content
494
+ yield {"type": "token", "text": content}
495
+
496
+ # Tool calls (streamed fragments)
497
+ for tc_delta in (delta.get("tool_calls") or []):
498
+ idx = tc_delta.get("index", 0)
499
+ if idx not in tool_call_accumulator:
500
+ tool_call_accumulator[idx] = {
501
+ "id": "", "name": "", "arguments": ""}
502
+ acc = tool_call_accumulator[idx]
503
+ acc["id"] += tc_delta.get("id", "")
504
+ fn = tc_delta.get("function", {})
505
+ acc["name"] += fn.get("name", "")
506
+ acc["arguments"] += fn.get("arguments", "")
507
+
508
+ fin_reason = choices[0].get("finish_reason")
509
+ if fin_reason in ("stop", "tool_calls", "length"):
510
+ break
511
+
512
+ except Exception as exc:
513
+ yield {"type": "error", "message": f"OpenAI-compat stream error: {exc}"}
514
+ return
515
+
516
+ # Emit accumulated tool calls
517
+ for acc in tool_call_accumulator.values():
518
+ args_str = acc.get("arguments", "{}")
519
+ try:
520
+ args = json.loads(args_str)
521
+ except Exception:
522
+ args = {}
523
+ if acc.get("name"):
524
+ yield {"type": "tool_call", "name": acc["name"],
525
+ "arguments": args, "id": acc.get("id", "")}
526
+
527
+ # Fallback text-based tool call parsing (for non-native models)
528
+ if not tool_call_accumulator:
529
+ text_calls = parse_tool_calls_from_response(full_text, model_name=self.model)
530
+ for tc in text_calls:
531
+ yield {"type": "tool_call", "name": tc["tool"],
532
+ "arguments": tc["params"], "id": ""}
533
+
534
+ yield {"type": "done", "usage": usage, "stop_reason": "stop"}
535
+
536
+
537
+ # ---------------------------------------------------------------------------
538
+ # Quick availability probe (synchronous, for startup checks)
539
+ # ---------------------------------------------------------------------------
540
+
541
+ def probe_all_backends() -> Dict[str, bool]:
542
+ """Return {backend: is_reachable} for all known backends."""
543
+ results = {}
544
+ for name, info in BACKEND_DEFAULTS.items():
545
+ url = info["default_url"]
546
+ probe = f"{url}/api/tags" if name == "ollama" else f"{url}/v1/models"
547
+ try:
548
+ with urllib.request.urlopen(probe, timeout=1) as r:
549
+ results[name] = r.status == 200
550
+ except Exception:
551
+ results[name] = False
552
+ return results