isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +30 -1
- isa_model/client.py +937 -0
- isa_model/core/config/__init__.py +16 -0
- isa_model/core/config/config_manager.py +514 -0
- isa_model/core/config.py +426 -0
- isa_model/core/models/model_billing_tracker.py +476 -0
- isa_model/core/models/model_manager.py +399 -0
- isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
- isa_model/core/pricing_manager.py +426 -0
- isa_model/core/services/__init__.py +19 -0
- isa_model/core/services/intelligent_model_selector.py +547 -0
- isa_model/core/types.py +291 -0
- isa_model/deployment/__init__.py +2 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
- isa_model/deployment/cloud/modal/register_models.py +321 -0
- isa_model/deployment/runtime/deployed_service.py +338 -0
- isa_model/deployment/services/__init__.py +9 -0
- isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
- isa_model/deployment/services/model_service.py +332 -0
- isa_model/deployment/services/service_monitor.py +356 -0
- isa_model/deployment/services/service_registry.py +527 -0
- isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
- isa_model/eval/__init__.py +80 -44
- isa_model/eval/config/__init__.py +10 -0
- isa_model/eval/config/evaluation_config.py +108 -0
- isa_model/eval/evaluators/__init__.py +18 -0
- isa_model/eval/evaluators/base_evaluator.py +503 -0
- isa_model/eval/evaluators/llm_evaluator.py +472 -0
- isa_model/eval/factory.py +417 -709
- isa_model/eval/infrastructure/__init__.py +24 -0
- isa_model/eval/infrastructure/experiment_tracker.py +466 -0
- isa_model/eval/metrics.py +191 -21
- isa_model/inference/ai_factory.py +257 -601
- isa_model/inference/services/audio/base_stt_service.py +65 -1
- isa_model/inference/services/audio/base_tts_service.py +75 -1
- isa_model/inference/services/audio/openai_stt_service.py +189 -151
- isa_model/inference/services/audio/openai_tts_service.py +12 -10
- isa_model/inference/services/audio/replicate_tts_service.py +61 -56
- isa_model/inference/services/base_service.py +55 -17
- isa_model/inference/services/embedding/base_embed_service.py +65 -1
- isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
- isa_model/inference/services/embedding/openai_embed_service.py +8 -10
- isa_model/inference/services/helpers/stacked_config.py +148 -0
- isa_model/inference/services/img/__init__.py +18 -0
- isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
- isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
- isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
- isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
- isa_model/inference/services/llm/__init__.py +3 -3
- isa_model/inference/services/llm/base_llm_service.py +492 -40
- isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
- isa_model/inference/services/llm/ollama_llm_service.py +51 -17
- isa_model/inference/services/llm/openai_llm_service.py +70 -19
- isa_model/inference/services/llm/yyds_llm_service.py +24 -23
- isa_model/inference/services/vision/__init__.py +38 -4
- isa_model/inference/services/vision/base_vision_service.py +218 -117
- isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
- isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
- isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/vision/helpers/image_utils.py +272 -3
- isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
- isa_model/inference/services/vision/openai_vision_service.py +104 -307
- isa_model/inference/services/vision/replicate_vision_service.py +140 -325
- isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
- isa_model/scripts/register_models.py +370 -0
- isa_model/scripts/register_models_with_embeddings.py +510 -0
- isa_model/serving/api/fastapi_server.py +6 -1
- isa_model/serving/api/routes/unified.py +274 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
- isa_model/config/__init__.py +0 -9
- isa_model/config/config_manager.py +0 -213
- isa_model/core/model_manager.py +0 -213
- isa_model/core/model_registry.py +0 -375
- isa_model/core/vision_models_init.py +0 -116
- isa_model/inference/billing_tracker.py +0 -406
- isa_model/inference/services/llm/triton_llm_service.py +0 -481
- isa_model/inference/services/stacked/__init__.py +0 -26
- isa_model/inference/services/stacked/config.py +0 -426
- isa_model/inference/services/vision/ollama_vision_service.py +0 -194
- /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
- /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
- /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
@@ -1,426 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Configuration system for stacked services
|
3
|
-
"""
|
4
|
-
|
5
|
-
from typing import Dict, Any, List
|
6
|
-
from dataclasses import dataclass, field
|
7
|
-
from enum import Enum
|
8
|
-
|
9
|
-
from .base_stacked_service import LayerConfig, LayerType
|
10
|
-
|
11
|
-
class WorkflowType(Enum):
|
12
|
-
"""Predefined workflow types"""
|
13
|
-
UI_ANALYSIS_FAST = "ui_analysis_fast"
|
14
|
-
UI_ANALYSIS_ACCURATE = "ui_analysis_accurate"
|
15
|
-
UI_ANALYSIS_COMPREHENSIVE = "ui_analysis_comprehensive"
|
16
|
-
SEARCH_PAGE_ANALYSIS = "search_page_analysis"
|
17
|
-
CONTENT_EXTRACTION = "content_extraction"
|
18
|
-
FORM_INTERACTION = "form_interaction"
|
19
|
-
NAVIGATION_ANALYSIS = "navigation_analysis"
|
20
|
-
CUSTOM = "custom"
|
21
|
-
|
22
|
-
@dataclass
|
23
|
-
class StackedServiceConfig:
|
24
|
-
"""Configuration for a stacked service workflow"""
|
25
|
-
name: str
|
26
|
-
workflow_type: WorkflowType
|
27
|
-
layers: List[LayerConfig] = field(default_factory=list)
|
28
|
-
global_timeout: float = 120.0
|
29
|
-
parallel_execution: bool = False
|
30
|
-
fail_fast: bool = False
|
31
|
-
metadata: Dict[str, Any] = field(default_factory=dict)
|
32
|
-
|
33
|
-
class ConfigManager:
|
34
|
-
"""Manager for stacked service configurations"""
|
35
|
-
|
36
|
-
PREDEFINED_CONFIGS = {
|
37
|
-
WorkflowType.UI_ANALYSIS_FAST: {
|
38
|
-
"name": "Fast UI Analysis",
|
39
|
-
"layers": [
|
40
|
-
LayerConfig(
|
41
|
-
name="page_intelligence",
|
42
|
-
layer_type=LayerType.INTELLIGENCE,
|
43
|
-
service_type="vision",
|
44
|
-
model_name="gpt-4.1-nano",
|
45
|
-
parameters={"max_tokens": 300},
|
46
|
-
depends_on=[],
|
47
|
-
timeout=10.0,
|
48
|
-
fallback_enabled=True
|
49
|
-
),
|
50
|
-
LayerConfig(
|
51
|
-
name="element_detection",
|
52
|
-
layer_type=LayerType.DETECTION,
|
53
|
-
service_type="vision",
|
54
|
-
model_name="omniparser",
|
55
|
-
parameters={
|
56
|
-
"imgsz": 480,
|
57
|
-
"box_threshold": 0.08,
|
58
|
-
"iou_threshold": 0.2
|
59
|
-
},
|
60
|
-
depends_on=["page_intelligence"],
|
61
|
-
timeout=15.0,
|
62
|
-
fallback_enabled=True
|
63
|
-
),
|
64
|
-
LayerConfig(
|
65
|
-
name="element_classification",
|
66
|
-
layer_type=LayerType.CLASSIFICATION,
|
67
|
-
service_type="vision",
|
68
|
-
model_name="gpt-4.1-nano",
|
69
|
-
parameters={"max_tokens": 200},
|
70
|
-
depends_on=["page_intelligence", "element_detection"],
|
71
|
-
timeout=20.0,
|
72
|
-
fallback_enabled=False
|
73
|
-
)
|
74
|
-
],
|
75
|
-
"global_timeout": 60.0,
|
76
|
-
"parallel_execution": False,
|
77
|
-
"fail_fast": False,
|
78
|
-
"metadata": {
|
79
|
-
"description": "Fast UI analysis optimized for speed",
|
80
|
-
"expected_time": "30-45 seconds",
|
81
|
-
"accuracy": "medium"
|
82
|
-
}
|
83
|
-
},
|
84
|
-
|
85
|
-
WorkflowType.UI_ANALYSIS_ACCURATE: {
|
86
|
-
"name": "Accurate UI Analysis",
|
87
|
-
"layers": [
|
88
|
-
LayerConfig(
|
89
|
-
name="page_intelligence",
|
90
|
-
layer_type=LayerType.INTELLIGENCE,
|
91
|
-
service_type="vision",
|
92
|
-
model_name="gpt-4-vision-preview",
|
93
|
-
parameters={"max_tokens": 800},
|
94
|
-
depends_on=[],
|
95
|
-
timeout=20.0,
|
96
|
-
fallback_enabled=True
|
97
|
-
),
|
98
|
-
LayerConfig(
|
99
|
-
name="element_detection",
|
100
|
-
layer_type=LayerType.DETECTION,
|
101
|
-
service_type="vision",
|
102
|
-
model_name="omniparser",
|
103
|
-
parameters={
|
104
|
-
"imgsz": 640,
|
105
|
-
"box_threshold": 0.05,
|
106
|
-
"iou_threshold": 0.1
|
107
|
-
},
|
108
|
-
depends_on=["page_intelligence"],
|
109
|
-
timeout=25.0,
|
110
|
-
fallback_enabled=True
|
111
|
-
),
|
112
|
-
LayerConfig(
|
113
|
-
name="element_classification",
|
114
|
-
layer_type=LayerType.CLASSIFICATION,
|
115
|
-
service_type="vision",
|
116
|
-
model_name="gpt-4-vision-preview",
|
117
|
-
parameters={"max_tokens": 500},
|
118
|
-
depends_on=["page_intelligence", "element_detection"],
|
119
|
-
timeout=30.0,
|
120
|
-
fallback_enabled=False
|
121
|
-
)
|
122
|
-
],
|
123
|
-
"global_timeout": 90.0,
|
124
|
-
"parallel_execution": False,
|
125
|
-
"fail_fast": False,
|
126
|
-
"metadata": {
|
127
|
-
"description": "Balanced UI analysis for production use",
|
128
|
-
"expected_time": "60-75 seconds",
|
129
|
-
"accuracy": "high"
|
130
|
-
}
|
131
|
-
},
|
132
|
-
|
133
|
-
WorkflowType.SEARCH_PAGE_ANALYSIS: {
|
134
|
-
"name": "Search Page Analysis",
|
135
|
-
"layers": [
|
136
|
-
LayerConfig(
|
137
|
-
name="page_intelligence",
|
138
|
-
layer_type=LayerType.INTELLIGENCE,
|
139
|
-
service_type="vision",
|
140
|
-
model_name="default",
|
141
|
-
parameters={
|
142
|
-
"task": "search_page_intelligence",
|
143
|
-
"max_tokens": 400
|
144
|
-
},
|
145
|
-
depends_on=[],
|
146
|
-
timeout=15.0,
|
147
|
-
fallback_enabled=True
|
148
|
-
),
|
149
|
-
LayerConfig(
|
150
|
-
name="element_detection",
|
151
|
-
layer_type=LayerType.DETECTION,
|
152
|
-
service_type="vision",
|
153
|
-
model_name="omniparser",
|
154
|
-
parameters={
|
155
|
-
"task": "element_detection",
|
156
|
-
"imgsz": 640,
|
157
|
-
"box_threshold": 0.05,
|
158
|
-
"iou_threshold": 0.1
|
159
|
-
},
|
160
|
-
depends_on=["page_intelligence"],
|
161
|
-
timeout=20.0,
|
162
|
-
fallback_enabled=True
|
163
|
-
),
|
164
|
-
LayerConfig(
|
165
|
-
name="element_classification",
|
166
|
-
layer_type=LayerType.CLASSIFICATION,
|
167
|
-
service_type="vision",
|
168
|
-
model_name="default",
|
169
|
-
parameters={
|
170
|
-
"task": "search_element_classification",
|
171
|
-
"max_tokens": 300
|
172
|
-
},
|
173
|
-
depends_on=["page_intelligence", "element_detection"],
|
174
|
-
timeout=25.0,
|
175
|
-
fallback_enabled=False
|
176
|
-
)
|
177
|
-
],
|
178
|
-
"global_timeout": 80.0,
|
179
|
-
"parallel_execution": False,
|
180
|
-
"fail_fast": False,
|
181
|
-
"metadata": {
|
182
|
-
"description": "Analysis for search pages (Google, Bing, etc.)",
|
183
|
-
"expected_time": "45-60 seconds",
|
184
|
-
"accuracy": "high",
|
185
|
-
"page_types": ["search", "query", "results"]
|
186
|
-
}
|
187
|
-
},
|
188
|
-
|
189
|
-
WorkflowType.CONTENT_EXTRACTION: {
|
190
|
-
"name": "Content Extraction",
|
191
|
-
"layers": [
|
192
|
-
LayerConfig(
|
193
|
-
name="page_intelligence",
|
194
|
-
layer_type=LayerType.INTELLIGENCE,
|
195
|
-
service_type="vision",
|
196
|
-
model_name="default",
|
197
|
-
parameters={
|
198
|
-
"task": "content_page_intelligence",
|
199
|
-
"max_tokens": 500
|
200
|
-
},
|
201
|
-
depends_on=[],
|
202
|
-
timeout=15.0,
|
203
|
-
fallback_enabled=True
|
204
|
-
),
|
205
|
-
LayerConfig(
|
206
|
-
name="content_detection",
|
207
|
-
layer_type=LayerType.DETECTION,
|
208
|
-
service_type="vision",
|
209
|
-
model_name="florence-2",
|
210
|
-
parameters={
|
211
|
-
"task": "<OPEN_VOCABULARY_DETECTION>",
|
212
|
-
"text_input": "article content, text blocks, headings, paragraphs, links"
|
213
|
-
},
|
214
|
-
depends_on=["page_intelligence"],
|
215
|
-
timeout=25.0,
|
216
|
-
fallback_enabled=True
|
217
|
-
),
|
218
|
-
LayerConfig(
|
219
|
-
name="content_classification",
|
220
|
-
layer_type=LayerType.CLASSIFICATION,
|
221
|
-
service_type="vision",
|
222
|
-
model_name="default",
|
223
|
-
parameters={
|
224
|
-
"task": "content_classification",
|
225
|
-
"max_tokens": 400
|
226
|
-
},
|
227
|
-
depends_on=["page_intelligence", "content_detection"],
|
228
|
-
timeout=30.0,
|
229
|
-
fallback_enabled=False
|
230
|
-
)
|
231
|
-
],
|
232
|
-
"global_timeout": 90.0,
|
233
|
-
"parallel_execution": False,
|
234
|
-
"fail_fast": False,
|
235
|
-
"metadata": {
|
236
|
-
"description": "Extract and analyze content from web pages",
|
237
|
-
"expected_time": "60-75 seconds",
|
238
|
-
"accuracy": "high",
|
239
|
-
"page_types": ["article", "blog", "news", "documentation"]
|
240
|
-
}
|
241
|
-
},
|
242
|
-
|
243
|
-
WorkflowType.UI_ANALYSIS_COMPREHENSIVE: {
|
244
|
-
"name": "Comprehensive UI Analysis",
|
245
|
-
"layers": [
|
246
|
-
LayerConfig(
|
247
|
-
name="page_intelligence",
|
248
|
-
layer_type=LayerType.INTELLIGENCE,
|
249
|
-
service_type="vision",
|
250
|
-
model_name="gpt-4-vision-preview",
|
251
|
-
parameters={"max_tokens": 1000},
|
252
|
-
depends_on=[],
|
253
|
-
timeout=25.0,
|
254
|
-
fallback_enabled=True
|
255
|
-
),
|
256
|
-
LayerConfig(
|
257
|
-
name="primary_detection",
|
258
|
-
layer_type=LayerType.DETECTION,
|
259
|
-
service_type="vision",
|
260
|
-
model_name="omniparser",
|
261
|
-
parameters={
|
262
|
-
"imgsz": 1024,
|
263
|
-
"box_threshold": 0.03,
|
264
|
-
"iou_threshold": 0.1
|
265
|
-
},
|
266
|
-
depends_on=["page_intelligence"],
|
267
|
-
timeout=30.0,
|
268
|
-
fallback_enabled=True
|
269
|
-
),
|
270
|
-
LayerConfig(
|
271
|
-
name="secondary_detection",
|
272
|
-
layer_type=LayerType.DETECTION,
|
273
|
-
service_type="vision",
|
274
|
-
model_name="florence-2",
|
275
|
-
parameters={
|
276
|
-
"task": "<OPEN_VOCABULARY_DETECTION>",
|
277
|
-
"text_input": "login form elements, input fields, buttons, checkboxes"
|
278
|
-
},
|
279
|
-
depends_on=["page_intelligence"],
|
280
|
-
timeout=25.0,
|
281
|
-
fallback_enabled=True
|
282
|
-
),
|
283
|
-
LayerConfig(
|
284
|
-
name="detection_fusion",
|
285
|
-
layer_type=LayerType.TRANSFORMATION,
|
286
|
-
service_type="custom",
|
287
|
-
model_name="fusion_algorithm",
|
288
|
-
parameters={"fusion_method": "confidence_weighted"},
|
289
|
-
depends_on=["primary_detection", "secondary_detection"],
|
290
|
-
timeout=5.0,
|
291
|
-
fallback_enabled=False
|
292
|
-
),
|
293
|
-
LayerConfig(
|
294
|
-
name="element_classification",
|
295
|
-
layer_type=LayerType.CLASSIFICATION,
|
296
|
-
service_type="vision",
|
297
|
-
model_name="gpt-4-vision-preview",
|
298
|
-
parameters={"max_tokens": 600},
|
299
|
-
depends_on=["page_intelligence", "detection_fusion"],
|
300
|
-
timeout=40.0,
|
301
|
-
fallback_enabled=False
|
302
|
-
),
|
303
|
-
LayerConfig(
|
304
|
-
name="result_validation",
|
305
|
-
layer_type=LayerType.VALIDATION,
|
306
|
-
service_type="vision",
|
307
|
-
model_name="gpt-4.1-nano",
|
308
|
-
parameters={"validation_criteria": ["completeness", "consistency", "accuracy"]},
|
309
|
-
depends_on=["element_classification"],
|
310
|
-
timeout=15.0,
|
311
|
-
fallback_enabled=True
|
312
|
-
)
|
313
|
-
],
|
314
|
-
"global_timeout": 180.0,
|
315
|
-
"parallel_execution": True, # Enable parallel execution for detection layers
|
316
|
-
"fail_fast": False,
|
317
|
-
"metadata": {
|
318
|
-
"description": "Most comprehensive UI analysis with multi-model fusion",
|
319
|
-
"expected_time": "120-150 seconds",
|
320
|
-
"accuracy": "very high"
|
321
|
-
}
|
322
|
-
}
|
323
|
-
}
|
324
|
-
|
325
|
-
@classmethod
|
326
|
-
def get_config(cls, workflow_type: WorkflowType) -> StackedServiceConfig:
|
327
|
-
"""Get predefined configuration for a workflow type"""
|
328
|
-
if workflow_type not in cls.PREDEFINED_CONFIGS:
|
329
|
-
raise ValueError(f"Unknown workflow type: {workflow_type}")
|
330
|
-
|
331
|
-
config_data = cls.PREDEFINED_CONFIGS[workflow_type]
|
332
|
-
|
333
|
-
return StackedServiceConfig(
|
334
|
-
name=config_data["name"],
|
335
|
-
workflow_type=workflow_type,
|
336
|
-
layers=config_data["layers"],
|
337
|
-
global_timeout=config_data["global_timeout"],
|
338
|
-
parallel_execution=config_data["parallel_execution"],
|
339
|
-
fail_fast=config_data["fail_fast"],
|
340
|
-
metadata=config_data["metadata"]
|
341
|
-
)
|
342
|
-
|
343
|
-
@classmethod
|
344
|
-
def create_custom_config(
|
345
|
-
cls,
|
346
|
-
name: str,
|
347
|
-
layers: List[LayerConfig],
|
348
|
-
global_timeout: float = 120.0,
|
349
|
-
parallel_execution: bool = False,
|
350
|
-
fail_fast: bool = False,
|
351
|
-
metadata: Dict[str, Any] = None
|
352
|
-
) -> StackedServiceConfig:
|
353
|
-
"""Create a custom configuration"""
|
354
|
-
return StackedServiceConfig(
|
355
|
-
name=name,
|
356
|
-
workflow_type=WorkflowType.CUSTOM,
|
357
|
-
layers=layers,
|
358
|
-
global_timeout=global_timeout,
|
359
|
-
parallel_execution=parallel_execution,
|
360
|
-
fail_fast=fail_fast,
|
361
|
-
metadata=metadata or {}
|
362
|
-
)
|
363
|
-
|
364
|
-
@classmethod
|
365
|
-
def modify_config(
|
366
|
-
cls,
|
367
|
-
base_config: StackedServiceConfig,
|
368
|
-
modifications: Dict[str, Any]
|
369
|
-
) -> StackedServiceConfig:
|
370
|
-
"""Modify an existing configuration"""
|
371
|
-
# Create a copy
|
372
|
-
new_config = StackedServiceConfig(
|
373
|
-
name=base_config.name,
|
374
|
-
workflow_type=base_config.workflow_type,
|
375
|
-
layers=base_config.layers.copy(),
|
376
|
-
global_timeout=base_config.global_timeout,
|
377
|
-
parallel_execution=base_config.parallel_execution,
|
378
|
-
fail_fast=base_config.fail_fast,
|
379
|
-
metadata=base_config.metadata.copy()
|
380
|
-
)
|
381
|
-
|
382
|
-
# Apply modifications
|
383
|
-
for key, value in modifications.items():
|
384
|
-
if hasattr(new_config, key):
|
385
|
-
setattr(new_config, key, value)
|
386
|
-
elif key == "layer_modifications":
|
387
|
-
# Modify specific layers
|
388
|
-
for layer_name, layer_mods in value.items():
|
389
|
-
for layer in new_config.layers:
|
390
|
-
if layer.name == layer_name:
|
391
|
-
for mod_key, mod_value in layer_mods.items():
|
392
|
-
if hasattr(layer, mod_key):
|
393
|
-
setattr(layer, mod_key, mod_value)
|
394
|
-
elif mod_key == "parameters":
|
395
|
-
layer.parameters.update(mod_value)
|
396
|
-
|
397
|
-
return new_config
|
398
|
-
|
399
|
-
@classmethod
|
400
|
-
def get_available_workflows(cls) -> Dict[WorkflowType, Dict[str, Any]]:
|
401
|
-
"""Get information about all available workflows"""
|
402
|
-
workflows = {}
|
403
|
-
|
404
|
-
for workflow_type in cls.PREDEFINED_CONFIGS:
|
405
|
-
config_data = cls.PREDEFINED_CONFIGS[workflow_type]
|
406
|
-
workflows[workflow_type] = {
|
407
|
-
"name": config_data["name"],
|
408
|
-
"layer_count": len(config_data["layers"]),
|
409
|
-
"expected_time": config_data["metadata"].get("expected_time", "unknown"),
|
410
|
-
"accuracy": config_data["metadata"].get("accuracy", "unknown"),
|
411
|
-
"description": config_data["metadata"].get("description", "")
|
412
|
-
}
|
413
|
-
|
414
|
-
return workflows
|
415
|
-
|
416
|
-
# Convenience function for quick access
|
417
|
-
def get_ui_analysis_config(speed: str = "accurate") -> StackedServiceConfig:
|
418
|
-
"""Get UI analysis configuration by speed preference"""
|
419
|
-
speed_mapping = {
|
420
|
-
"fast": WorkflowType.UI_ANALYSIS_FAST,
|
421
|
-
"accurate": WorkflowType.UI_ANALYSIS_ACCURATE,
|
422
|
-
"comprehensive": WorkflowType.UI_ANALYSIS_COMPREHENSIVE
|
423
|
-
}
|
424
|
-
|
425
|
-
workflow_type = speed_mapping.get(speed.lower(), WorkflowType.UI_ANALYSIS_ACCURATE)
|
426
|
-
return ConfigManager.get_config(workflow_type)
|
@@ -1,194 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import json
|
3
|
-
import base64
|
4
|
-
import ollama
|
5
|
-
from typing import Dict, Any, Union, List, Optional, BinaryIO
|
6
|
-
from tenacity import retry, stop_after_attempt, wait_exponential
|
7
|
-
from isa_model.inference.services.vision.base_vision_service import BaseVisionService
|
8
|
-
from isa_model.inference.providers.base_provider import BaseProvider
|
9
|
-
import logging
|
10
|
-
import requests
|
11
|
-
|
12
|
-
logger = logging.getLogger(__name__)
|
13
|
-
|
14
|
-
class OllamaVisionService(BaseVisionService):
|
15
|
-
"""Vision model service wrapper for Ollama using base64 encoded images"""
|
16
|
-
|
17
|
-
def __init__(self, provider: 'BaseProvider', model_name: str = 'gemma3:4b'):
|
18
|
-
super().__init__(provider, model_name)
|
19
|
-
self.max_tokens = self.config.get('max_tokens', 1000)
|
20
|
-
self.temperature = self.config.get('temperature', 0.7)
|
21
|
-
|
22
|
-
def _get_image_data(self, image: Union[str, BinaryIO]) -> bytes:
|
23
|
-
"""获取图像数据,支持本地文件和URL"""
|
24
|
-
if isinstance(image, str):
|
25
|
-
# Check if it's a URL
|
26
|
-
if image.startswith(('http://', 'https://')):
|
27
|
-
response = requests.get(image)
|
28
|
-
response.raise_for_status()
|
29
|
-
return response.content
|
30
|
-
else:
|
31
|
-
# Local file path
|
32
|
-
with open(image, 'rb') as f:
|
33
|
-
return f.read()
|
34
|
-
else:
|
35
|
-
return image.read()
|
36
|
-
|
37
|
-
@retry(
|
38
|
-
stop=stop_after_attempt(3),
|
39
|
-
wait=wait_exponential(multiplier=1, min=4, max=10),
|
40
|
-
reraise=True
|
41
|
-
)
|
42
|
-
async def analyze_image(
|
43
|
-
self,
|
44
|
-
image: Union[str, BinaryIO],
|
45
|
-
prompt: Optional[str] = None,
|
46
|
-
max_tokens: int = 1000
|
47
|
-
) -> Dict[str, Any]:
|
48
|
-
"""
|
49
|
-
Analyze image and provide description or answer questions
|
50
|
-
"""
|
51
|
-
try:
|
52
|
-
# 获取图像数据
|
53
|
-
image_data = self._get_image_data(image)
|
54
|
-
|
55
|
-
# 转换为base64
|
56
|
-
image_base64 = base64.b64encode(image_data).decode('utf-8')
|
57
|
-
|
58
|
-
# 使用默认提示词如果没有提供
|
59
|
-
query = prompt or "请描述这张图片的内容。"
|
60
|
-
|
61
|
-
# 使用 ollama 库直接调用
|
62
|
-
response = ollama.chat(
|
63
|
-
model=self.model_name,
|
64
|
-
messages=[{
|
65
|
-
'role': 'user',
|
66
|
-
'content': query,
|
67
|
-
'images': [image_base64]
|
68
|
-
}]
|
69
|
-
)
|
70
|
-
|
71
|
-
content = response['message']['content']
|
72
|
-
|
73
|
-
return {
|
74
|
-
"text": content,
|
75
|
-
"confidence": 1.0, # Ollama doesn't provide confidence scores
|
76
|
-
"detected_objects": [], # Basic implementation
|
77
|
-
"metadata": {
|
78
|
-
"model": self.model_name,
|
79
|
-
"prompt": query
|
80
|
-
}
|
81
|
-
}
|
82
|
-
|
83
|
-
except Exception as e:
|
84
|
-
logger.error(f"Error in image analysis: {e}")
|
85
|
-
raise
|
86
|
-
|
87
|
-
async def analyze_images(
|
88
|
-
self,
|
89
|
-
images: List[Union[str, BinaryIO]],
|
90
|
-
prompt: Optional[str] = None,
|
91
|
-
max_tokens: int = 1000
|
92
|
-
) -> List[Dict[str, Any]]:
|
93
|
-
"""Analyze multiple images"""
|
94
|
-
results = []
|
95
|
-
for image in images:
|
96
|
-
result = await self.analyze_image(image, prompt, max_tokens)
|
97
|
-
results.append(result)
|
98
|
-
return results
|
99
|
-
|
100
|
-
async def describe_image(
|
101
|
-
self,
|
102
|
-
image: Union[str, BinaryIO],
|
103
|
-
detail_level: str = "medium"
|
104
|
-
) -> Dict[str, Any]:
|
105
|
-
"""Generate detailed description of image"""
|
106
|
-
prompts = {
|
107
|
-
"low": "简单描述这张图片。",
|
108
|
-
"medium": "详细描述这张图片的内容、颜色、物体和场景。",
|
109
|
-
"high": "非常详细地描述这张图片,包括所有可见的物体、颜色、纹理、场景、情感和任何其他细节。"
|
110
|
-
}
|
111
|
-
|
112
|
-
prompt = prompts.get(detail_level, prompts["medium"])
|
113
|
-
result = await self.analyze_image(image, prompt)
|
114
|
-
|
115
|
-
return {
|
116
|
-
"description": result["text"],
|
117
|
-
"objects": [], # Basic implementation
|
118
|
-
"scene": "未知", # Basic implementation
|
119
|
-
"colors": [] # Basic implementation
|
120
|
-
}
|
121
|
-
|
122
|
-
async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
123
|
-
"""Extract text from image (OCR)"""
|
124
|
-
result = await self.analyze_image(image, "提取图片中的所有文字内容。")
|
125
|
-
|
126
|
-
return {
|
127
|
-
"text": result["text"],
|
128
|
-
"confidence": 1.0,
|
129
|
-
"bounding_boxes": [], # Basic implementation
|
130
|
-
"language": "未知" # Basic implementation
|
131
|
-
}
|
132
|
-
|
133
|
-
async def detect_objects(
|
134
|
-
self,
|
135
|
-
image: Union[str, BinaryIO],
|
136
|
-
confidence_threshold: float = 0.5
|
137
|
-
) -> Dict[str, Any]:
|
138
|
-
"""Detect objects in image"""
|
139
|
-
result = await self.analyze_image(image, "识别并列出图片中的所有物体。")
|
140
|
-
|
141
|
-
return {
|
142
|
-
"objects": [], # Basic implementation - would need parsing
|
143
|
-
"count": 0,
|
144
|
-
"bounding_boxes": []
|
145
|
-
}
|
146
|
-
|
147
|
-
async def classify_image(
|
148
|
-
self,
|
149
|
-
image: Union[str, BinaryIO],
|
150
|
-
categories: Optional[List[str]] = None
|
151
|
-
) -> Dict[str, Any]:
|
152
|
-
"""Classify image into categories"""
|
153
|
-
if categories:
|
154
|
-
category_str = "、".join(categories)
|
155
|
-
prompt = f"将这张图片分类到以下类别之一:{category_str}"
|
156
|
-
else:
|
157
|
-
prompt = "这张图片属于什么类别?"
|
158
|
-
|
159
|
-
result = await self.analyze_image(image, prompt)
|
160
|
-
|
161
|
-
return {
|
162
|
-
"category": result["text"],
|
163
|
-
"confidence": 1.0,
|
164
|
-
"all_predictions": [{"category": result["text"], "confidence": 1.0}]
|
165
|
-
}
|
166
|
-
|
167
|
-
async def compare_images(
|
168
|
-
self,
|
169
|
-
image1: Union[str, BinaryIO],
|
170
|
-
image2: Union[str, BinaryIO]
|
171
|
-
) -> Dict[str, Any]:
|
172
|
-
"""Compare two images for similarity"""
|
173
|
-
# For now, analyze each image separately and compare descriptions
|
174
|
-
result1 = await self.analyze_image(image1, "描述这张图片。")
|
175
|
-
result2 = await self.analyze_image(image2, "描述这张图片。")
|
176
|
-
|
177
|
-
return {
|
178
|
-
"similarity_score": 0.5, # Basic implementation
|
179
|
-
"differences": "需要进一步分析",
|
180
|
-
"common_elements": "需要进一步分析"
|
181
|
-
}
|
182
|
-
|
183
|
-
def get_supported_formats(self) -> List[str]:
|
184
|
-
"""Get list of supported image formats"""
|
185
|
-
return ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp']
|
186
|
-
|
187
|
-
def get_max_image_size(self) -> Dict[str, int]:
|
188
|
-
"""Get maximum supported image dimensions"""
|
189
|
-
return {"width": 4096, "height": 4096}
|
190
|
-
|
191
|
-
async def close(self):
|
192
|
-
"""Cleanup resources"""
|
193
|
-
pass
|
194
|
-
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|