isa-model 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +770 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/models/model_repo.py +343 -0
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/__init__.py +9 -0
  15. isa_model/deployment/cloud/modal/__init__.py +10 -0
  16. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +766 -0
  17. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  18. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +406 -0
  19. isa_model/deployment/cloud/modal/register_models.py +321 -0
  20. isa_model/deployment/runtime/deployed_service.py +338 -0
  21. isa_model/deployment/services/__init__.py +9 -0
  22. isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
  23. isa_model/deployment/services/model_service.py +332 -0
  24. isa_model/deployment/services/service_monitor.py +356 -0
  25. isa_model/deployment/services/service_registry.py +527 -0
  26. isa_model/eval/__init__.py +80 -44
  27. isa_model/eval/config/__init__.py +10 -0
  28. isa_model/eval/config/evaluation_config.py +108 -0
  29. isa_model/eval/evaluators/__init__.py +18 -0
  30. isa_model/eval/evaluators/base_evaluator.py +503 -0
  31. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  32. isa_model/eval/factory.py +417 -709
  33. isa_model/eval/infrastructure/__init__.py +24 -0
  34. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  35. isa_model/eval/metrics.py +191 -21
  36. isa_model/inference/ai_factory.py +187 -387
  37. isa_model/inference/providers/modal_provider.py +109 -0
  38. isa_model/inference/providers/yyds_provider.py +108 -0
  39. isa_model/inference/services/__init__.py +2 -1
  40. isa_model/inference/services/audio/base_stt_service.py +65 -1
  41. isa_model/inference/services/audio/base_tts_service.py +75 -1
  42. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  43. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  44. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  45. isa_model/inference/services/base_service.py +55 -55
  46. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  47. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  48. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  49. isa_model/inference/services/helpers/stacked_config.py +148 -0
  50. isa_model/inference/services/img/__init__.py +18 -0
  51. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -35
  52. isa_model/inference/services/img/flux_professional_service.py +603 -0
  53. isa_model/inference/services/img/helpers/base_stacked_service.py +274 -0
  54. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +210 -69
  55. isa_model/inference/services/llm/__init__.py +3 -3
  56. isa_model/inference/services/llm/base_llm_service.py +519 -35
  57. isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +40 -0
  58. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  59. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  60. isa_model/inference/services/llm/ollama_llm_service.py +150 -15
  61. isa_model/inference/services/llm/openai_llm_service.py +134 -31
  62. isa_model/inference/services/llm/yyds_llm_service.py +255 -0
  63. isa_model/inference/services/vision/__init__.py +38 -4
  64. isa_model/inference/services/vision/base_vision_service.py +241 -96
  65. isa_model/inference/services/vision/disabled/isA_vision_service.py +500 -0
  66. isa_model/inference/services/vision/doc_analysis_service.py +640 -0
  67. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  68. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  69. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  70. isa_model/inference/services/vision/openai_vision_service.py +109 -170
  71. isa_model/inference/services/vision/replicate_vision_service.py +508 -0
  72. isa_model/inference/services/vision/ui_analysis_service.py +823 -0
  73. isa_model/scripts/register_models.py +370 -0
  74. isa_model/scripts/register_models_with_embeddings.py +510 -0
  75. isa_model/serving/__init__.py +19 -0
  76. isa_model/serving/api/__init__.py +10 -0
  77. isa_model/serving/api/fastapi_server.py +89 -0
  78. isa_model/serving/api/middleware/__init__.py +9 -0
  79. isa_model/serving/api/middleware/request_logger.py +88 -0
  80. isa_model/serving/api/routes/__init__.py +5 -0
  81. isa_model/serving/api/routes/health.py +82 -0
  82. isa_model/serving/api/routes/llm.py +19 -0
  83. isa_model/serving/api/routes/ui_analysis.py +223 -0
  84. isa_model/serving/api/routes/unified.py +202 -0
  85. isa_model/serving/api/routes/vision.py +19 -0
  86. isa_model/serving/api/schemas/__init__.py +17 -0
  87. isa_model/serving/api/schemas/common.py +33 -0
  88. isa_model/serving/api/schemas/ui_analysis.py +78 -0
  89. {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
  90. isa_model-0.3.6.dist-info/RECORD +147 -0
  91. isa_model/core/model_manager.py +0 -208
  92. isa_model/core/model_registry.py +0 -342
  93. isa_model/inference/billing_tracker.py +0 -406
  94. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  95. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  96. isa_model-0.3.4.dist-info/RECORD +0 -91
  97. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  98. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  99. {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
  100. {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,255 @@
1
+ import logging
2
+ from typing import Dict, Any, List, Union, AsyncGenerator
3
+
4
+ # (�� OpenAI �
5
+ from openai import AsyncOpenAI
6
+
7
+ from isa_model.inference.services.llm.base_llm_service import BaseLLMService
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class YydsLLMService(BaseLLMService):
12
+ """YYDS LLM service implementation with unified invoke interface"""
13
+
14
+ def __init__(self, provider_name: str, model_name: str = "claude-sonnet-4-20250514", **kwargs):
15
+ super().__init__(provider_name, model_name, **kwargs)
16
+
17
+ # Get configuration from centralized config manager
18
+ provider_config = self.get_provider_config()
19
+
20
+ # Initialize AsyncOpenAI client with provider configuration
21
+ try:
22
+ if not provider_config.get("api_key"):
23
+ raise ValueError("YYDS API key not found in provider configuration")
24
+
25
+ self.client = AsyncOpenAI(
26
+ api_key=provider_config["api_key"],
27
+ base_url=provider_config.get("base_url") or provider_config.get("api_base_url", "https://api.yyds.com/v1"),
28
+ organization=provider_config.get("organization")
29
+ )
30
+
31
+ logger.info(f"Initialized YydsLLMService with model {self.model_name} and endpoint {self.client.base_url}")
32
+
33
+ except Exception as e:
34
+ logger.error(f"Failed to initialize YYDS client: {e}")
35
+ raise ValueError(f"Failed to initialize YYDS client. Check your API key configuration: {e}") from e
36
+
37
+ self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
38
+ self.total_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "requests_count": 0}
39
+
40
+
41
+ def _create_bound_copy(self) -> 'YydsLLMService':
42
+ """Create a copy of this service for tool binding"""
43
+ bound_service = YydsLLMService(self.provider_name, self.model_name)
44
+ bound_service._bound_tools = self._bound_tools.copy()
45
+ return bound_service
46
+
47
+ def bind_tools(self, tools: List[Any], **kwargs) -> 'YydsLLMService':
48
+ """
49
+ Bind tools to this LLM service for function calling
50
+
51
+ Args:
52
+ tools: List of tools (functions, dicts, or LangChain tools)
53
+ **kwargs: Additional arguments for tool binding
54
+
55
+ Returns:
56
+ New LLM service instance with tools bound
57
+ """
58
+ # Create a copy of this service
59
+ bound_service = self._create_bound_copy()
60
+
61
+ # Use base class method to bind tools
62
+ bound_service._bound_tools = tools
63
+
64
+ return bound_service
65
+
66
+ async def astream(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[str, None]:
67
+ """
68
+ True streaming method - yields tokens one by one as they arrive
69
+
70
+ Args:
71
+ input_data: Same as ainvoke
72
+
73
+ Yields:
74
+ Individual tokens as they arrive from the API
75
+ """
76
+ try:
77
+ # Use adapter manager to prepare messages
78
+ messages = self._prepare_messages(input_data)
79
+
80
+ # Prepare request kwargs
81
+ provider_config = self.get_provider_config()
82
+ kwargs = {
83
+ "model": self.model_name,
84
+ "messages": messages,
85
+ "temperature": provider_config.get("temperature", 0.7),
86
+ "max_tokens": provider_config.get("max_tokens", 1024),
87
+ "stream": True
88
+ }
89
+
90
+ # Add tools if bound using adapter manager
91
+ tool_schemas = await self._prepare_tools_for_request()
92
+ if tool_schemas:
93
+ kwargs["tools"] = tool_schemas
94
+ kwargs["tool_choice"] = "auto"
95
+
96
+ # Stream tokens one by one
97
+ content_chunks = []
98
+ try:
99
+ stream = await self.client.chat.completions.create(**kwargs)
100
+ async for chunk in stream:
101
+ content = chunk.choices[0].delta.content
102
+ if content:
103
+ content_chunks.append(content)
104
+ yield content
105
+
106
+ # Track usage after streaming is complete
107
+ full_content = "".join(content_chunks)
108
+ await self._track_streaming_usage(messages, full_content)
109
+
110
+ except Exception as e:
111
+ logger.error(f"Error in streaming: {e}")
112
+ raise
113
+
114
+ except Exception as e:
115
+ logger.error(f"Error in astream: {e}")
116
+ raise
117
+
118
+ async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
119
+ """Unified invoke method for all input types"""
120
+ try:
121
+ # Use adapter manager to prepare messages
122
+ messages = self._prepare_messages(input_data)
123
+
124
+ # Prepare request kwargs
125
+ provider_config = self.get_provider_config()
126
+ kwargs = {
127
+ "model": self.model_name,
128
+ "messages": messages,
129
+ "temperature": provider_config.get("temperature", 0.7),
130
+ "max_tokens": provider_config.get("max_tokens", 1024)
131
+ }
132
+
133
+ # Add tools if bound using adapter manager
134
+ tool_schemas = await self._prepare_tools_for_request()
135
+ if tool_schemas:
136
+ kwargs["tools"] = tool_schemas
137
+ kwargs["tool_choice"] = "auto"
138
+
139
+ # Handle streaming vs non-streaming
140
+ if self.streaming:
141
+ # TRUE STREAMING MODE - collect all chunks from the stream
142
+ content_chunks = []
143
+ async for token in self.astream(input_data):
144
+ content_chunks.append(token)
145
+ content = "".join(content_chunks)
146
+
147
+ return self._format_response(content, input_data)
148
+ else:
149
+ # Non-streaming mode
150
+ response = await self.client.chat.completions.create(**kwargs)
151
+ message = response.choices[0].message
152
+
153
+ # Update usage tracking
154
+ if response.usage:
155
+ self._update_token_usage(response.usage)
156
+ await self._track_billing(response.usage)
157
+
158
+ # Handle tool calls if present - let adapter process the complete message
159
+ if message.tool_calls:
160
+ # Pass the complete message object to adapter for proper tool_calls handling
161
+ return self._format_response(message, input_data)
162
+
163
+ # Return appropriate format based on input type
164
+ return self._format_response(message.content or "", input_data)
165
+
166
+ except Exception as e:
167
+ logger.error(f"Error in ainvoke: {e}")
168
+ raise
169
+
170
+ async def _track_streaming_usage(self, messages: List[Dict[str, str]], content: str):
171
+ """Track usage for streaming requests (estimated)"""
172
+ # Create a mock usage object for tracking
173
+ class MockUsage:
174
+ def __init__(self):
175
+ self.prompt_tokens = len(str(messages)) // 4 # Rough estimate
176
+ self.completion_tokens = len(content) // 4 # Rough estimate
177
+ self.total_tokens = self.prompt_tokens + self.completion_tokens
178
+
179
+ usage = MockUsage()
180
+ self._update_token_usage(usage)
181
+ await self._track_billing(usage)
182
+
183
+ async def _stream_response(self, kwargs: Dict[str, Any]) -> AsyncGenerator[str, None]:
184
+ """Handle streaming responses - DEPRECATED: Use astream() instead"""
185
+ kwargs["stream"] = True
186
+
187
+ async def stream_generator():
188
+ try:
189
+ stream = await self.client.chat.completions.create(**kwargs)
190
+ async for chunk in stream:
191
+ content = chunk.choices[0].delta.content
192
+ if content:
193
+ yield content
194
+ except Exception as e:
195
+ logger.error(f"Error in streaming: {e}")
196
+ raise
197
+
198
+ return stream_generator()
199
+
200
+
201
+ def _update_token_usage(self, usage):
202
+ """Update token usage statistics"""
203
+ self.last_token_usage = {
204
+ "prompt_tokens": usage.prompt_tokens,
205
+ "completion_tokens": usage.completion_tokens,
206
+ "total_tokens": usage.total_tokens
207
+ }
208
+
209
+ # Update total usage
210
+ self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
211
+ self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
212
+ self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
213
+ self.total_token_usage["requests_count"] += 1
214
+
215
+ async def _track_billing(self, usage):
216
+ """Track billing information using unified billing system"""
217
+ provider_config = self.get_provider_config()
218
+ await self._track_llm_usage(
219
+ operation="chat",
220
+ input_tokens=usage.prompt_tokens,
221
+ output_tokens=usage.completion_tokens,
222
+ metadata={
223
+ "temperature": provider_config.get("temperature", 0.7),
224
+ "max_tokens": provider_config.get("max_tokens", 1024)
225
+ }
226
+ )
227
+
228
+ def get_token_usage(self) -> Dict[str, Any]:
229
+ """Get total token usage statistics"""
230
+ return self.total_token_usage
231
+
232
+ def get_last_token_usage(self) -> Dict[str, int]:
233
+ """Get token usage from last request"""
234
+ return self.last_token_usage
235
+
236
+ def get_model_info(self) -> Dict[str, Any]:
237
+ """Get information about the current model"""
238
+ provider_config = self.get_provider_config()
239
+ return {
240
+ "name": self.model_name,
241
+ "max_tokens": provider_config.get("max_tokens", 1024),
242
+ "supports_streaming": True,
243
+ "supports_functions": True,
244
+ "provider": "yyds",
245
+ "pricing": {
246
+ "input_tokens_per_1k": 0.0045,
247
+ "output_tokens_per_1k": 0.0225,
248
+ "currency": "USD"
249
+ }
250
+ }
251
+
252
+
253
+ async def close(self):
254
+ """Close the backend client"""
255
+ await self.client.close()
@@ -3,10 +3,44 @@
3
3
 
4
4
  """
5
5
  Vision服务包
6
- 包含所有视觉相关服务模块
6
+ 包含所有视觉相关服务模块,包括stacked services
7
7
  """
8
8
 
9
- # 导出ReplicateImageGenService
10
- from isa_model.inference.services.vision.replicate_image_gen_service import ReplicateImageGenService
9
+ # Vision understanding services
10
+ from .base_vision_service import BaseVisionService
11
+ from .openai_vision_service import OpenAIVisionService
12
+ from .replicate_vision_service import ReplicateVisionService
11
13
 
12
- __all__ = ["ReplicateImageGenService"]
14
+ # Stacked Vision Services
15
+ from .doc_analysis_service import DocAnalysisStackedService
16
+ from .ui_analysis_service import UIAnalysisService
17
+
18
+ # ISA Vision service
19
+ try:
20
+ from .isA_vision_service import ISAVisionService
21
+ ISA_VISION_AVAILABLE = True
22
+ except ImportError:
23
+ ISAVisionService = None
24
+ ISA_VISION_AVAILABLE = False
25
+
26
+ # Optional services - import only if available
27
+ try:
28
+ from .ollama_vision_service import OllamaVisionService
29
+ OLLAMA_VISION_AVAILABLE = True
30
+ except ImportError:
31
+ OllamaVisionService = None
32
+ OLLAMA_VISION_AVAILABLE = False
33
+
34
+ __all__ = [
35
+ "BaseVisionService",
36
+ "OpenAIVisionService",
37
+ "ReplicateVisionService",
38
+ "DocAnalysisStackedService",
39
+ "UIAnalysisService"
40
+ ]
41
+
42
+ if ISA_VISION_AVAILABLE:
43
+ __all__.append("ISAVisionService")
44
+
45
+ if OLLAMA_VISION_AVAILABLE:
46
+ __all__.append("OllamaVisionService")