isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,248 +0,0 @@
1
- import os
2
- import json
3
- import logging
4
- from typing import Dict, List, Any, Optional, Union
5
- from fastapi import FastAPI, HTTPException, Depends, Request
6
- from pydantic import BaseModel, Field
7
-
8
- from isa_model.inference.ai_factory import AIFactory
9
-
10
- # Configure logging
11
- logging.basicConfig(level=logging.INFO)
12
- logger = logging.getLogger("unified_api")
13
-
14
- # Create FastAPI app
15
- app = FastAPI(
16
- title="Unified AI Model API",
17
- description="API for inference with Llama3-8B, Gemma3-4B, Whisper, and BGE-M3 models",
18
- version="1.0.0"
19
- )
20
-
21
- # Models
22
- class ChatMessage(BaseModel):
23
- role: str = Field(..., description="Role of the message sender (system, user, assistant)")
24
- content: str = Field(..., description="Content of the message")
25
-
26
- class ChatCompletionRequest(BaseModel):
27
- model: str = Field(..., description="Model ID to use (llama, gemma)")
28
- messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
29
- temperature: Optional[float] = Field(0.7, description="Sampling temperature")
30
- max_tokens: Optional[int] = Field(512, description="Maximum number of tokens to generate")
31
- top_p: Optional[float] = Field(0.9, description="Top-p sampling parameter")
32
- top_k: Optional[int] = Field(50, description="Top-k sampling parameter")
33
-
34
- class ChatCompletionResponse(BaseModel):
35
- model: str = Field(..., description="Model used for completion")
36
- choices: List[Dict[str, Any]] = Field(..., description="Generated completions")
37
- usage: Dict[str, int] = Field(..., description="Token usage statistics")
38
-
39
- class EmbeddingRequest(BaseModel):
40
- model: str = Field(..., description="Model ID to use (bge_embed)")
41
- input: Union[str, List[str]] = Field(..., description="Text to embed")
42
- normalize: Optional[bool] = Field(True, description="Whether to normalize embeddings")
43
-
44
- class TranscriptionRequest(BaseModel):
45
- model: str = Field(..., description="Model ID to use (whisper)")
46
- audio: str = Field(..., description="Base64-encoded audio data or URL")
47
- language: Optional[str] = Field("en", description="Language code")
48
-
49
- # Factory for creating services
50
- ai_factory = AIFactory()
51
-
52
- # Dependency to get LLM service
53
- async def get_llm_service(model: str):
54
- if model == "llama":
55
- return await ai_factory.get_llm_service("llama")
56
- elif model == "gemma":
57
- return await ai_factory.get_llm_service("gemma")
58
- else:
59
- raise HTTPException(status_code=400, detail=f"Unsupported model: {model}")
60
-
61
- # Dependency to get embedding service
62
- async def get_embedding_service(model: str):
63
- if model == "bge_embed":
64
- return await ai_factory.get_embedding_service("bge_embed")
65
- else:
66
- raise HTTPException(status_code=400, detail=f"Unsupported model: {model}")
67
-
68
- # Dependency to get speech service
69
- async def get_speech_service(model: str):
70
- if model == "whisper":
71
- return await ai_factory.get_speech_service("whisper")
72
- else:
73
- raise HTTPException(status_code=400, detail=f"Unsupported model: {model}")
74
-
75
- # Endpoints
76
- @app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
77
- async def chat_completion(request: ChatCompletionRequest):
78
- """Generate chat completion"""
79
- try:
80
- # Get the appropriate service
81
- service = await get_llm_service(request.model)
82
-
83
- # Format messages
84
- formatted_messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
85
-
86
- # Extract system prompt if present
87
- system_prompt = None
88
- if formatted_messages and formatted_messages[0]["role"] == "system":
89
- system_prompt = formatted_messages[0]["content"]
90
- formatted_messages = formatted_messages[1:]
91
-
92
- # Get user prompt (last user message)
93
- user_prompt = ""
94
- for msg in reversed(formatted_messages):
95
- if msg["role"] == "user":
96
- user_prompt = msg["content"]
97
- break
98
-
99
- if not user_prompt:
100
- raise HTTPException(status_code=400, detail="No user message found")
101
-
102
- # Set generation config
103
- generation_config = {
104
- "temperature": request.temperature,
105
- "max_new_tokens": request.max_tokens,
106
- "top_p": request.top_p,
107
- "top_k": request.top_k
108
- }
109
-
110
- # Generate completion
111
- completion = await service.generate(
112
- prompt=user_prompt,
113
- system_prompt=system_prompt,
114
- generation_config=generation_config
115
- )
116
-
117
- # Format response
118
- response = {
119
- "model": request.model,
120
- "choices": [
121
- {
122
- "message": {
123
- "role": "assistant",
124
- "content": completion
125
- },
126
- "finish_reason": "stop",
127
- "index": 0
128
- }
129
- ],
130
- "usage": {
131
- "prompt_tokens": len(user_prompt.split()),
132
- "completion_tokens": len(completion.split()),
133
- "total_tokens": len(user_prompt.split()) + len(completion.split())
134
- }
135
- }
136
-
137
- return response
138
-
139
- except Exception as e:
140
- logger.error(f"Error in chat completion: {str(e)}")
141
- raise HTTPException(status_code=500, detail=str(e))
142
-
143
- @app.post("/v1/embeddings")
144
- async def create_embedding(request: EmbeddingRequest):
145
- """Generate embeddings for text"""
146
- try:
147
- # Get the embedding service
148
- service = await get_embedding_service("bge_embed")
149
-
150
- # Generate embeddings
151
- if isinstance(request.input, str):
152
- embeddings = await service.embed(request.input, normalize=request.normalize)
153
- data = [{"embedding": embeddings[0].tolist(), "index": 0}]
154
- else:
155
- embeddings = await service.embed(request.input, normalize=request.normalize)
156
- data = [{"embedding": emb.tolist(), "index": i} for i, emb in enumerate(embeddings)]
157
-
158
- # Format response
159
- response = {
160
- "model": request.model,
161
- "data": data,
162
- "usage": {
163
- "prompt_tokens": sum(len(text.split()) for text in (request.input if isinstance(request.input, list) else [request.input])),
164
- "total_tokens": sum(len(text.split()) for text in (request.input if isinstance(request.input, list) else [request.input]))
165
- }
166
- }
167
-
168
- return response
169
-
170
- except Exception as e:
171
- logger.error(f"Error in embedding generation: {str(e)}")
172
- raise HTTPException(status_code=500, detail=str(e))
173
-
174
- @app.post("/v1/audio/transcriptions")
175
- async def transcribe_audio(request: TranscriptionRequest):
176
- """Transcribe audio to text"""
177
- try:
178
- import base64
179
-
180
- # Get the speech service
181
- service = await get_speech_service("whisper")
182
-
183
- # Process audio
184
- if request.audio.startswith(("http://", "https://")):
185
- # URL - download audio
186
- import requests
187
- audio_data = requests.get(request.audio).content
188
- else:
189
- # Base64 - decode
190
- audio_data = base64.b64decode(request.audio)
191
-
192
- # Transcribe
193
- transcription = await service.transcribe(
194
- audio=audio_data,
195
- language=request.language
196
- )
197
-
198
- # Format response
199
- response = {
200
- "model": request.model,
201
- "text": transcription
202
- }
203
-
204
- return response
205
-
206
- except Exception as e:
207
- logger.error(f"Error in audio transcription: {str(e)}")
208
- raise HTTPException(status_code=500, detail=str(e))
209
-
210
- # Health check endpoint
211
- @app.get("/health")
212
- async def health_check():
213
- """Health check endpoint"""
214
- return {"status": "healthy"}
215
-
216
- # Model info endpoint
217
- @app.get("/v1/models")
218
- async def list_models():
219
- """List available models"""
220
- models = [
221
- {
222
- "id": "llama",
223
- "type": "llm",
224
- "description": "Llama3-8B language model"
225
- },
226
- {
227
- "id": "gemma",
228
- "type": "llm",
229
- "description": "Gemma3-4B language model"
230
- },
231
- {
232
- "id": "whisper",
233
- "type": "speech",
234
- "description": "Whisper-tiny speech-to-text model"
235
- },
236
- {
237
- "id": "bge_embed",
238
- "type": "embedding",
239
- "description": "BGE-M3 text embedding model"
240
- }
241
- ]
242
-
243
- return {"data": models}
244
-
245
- # Main entry point
246
- if __name__ == "__main__":
247
- import uvicorn
248
- uvicorn.run(app, host="0.0.0.0", port=8080)
@@ -1,148 +0,0 @@
1
- """
2
- Configuration system for stacked services
3
- """
4
-
5
- from typing import Dict, Any, List, Optional
6
- from dataclasses import dataclass, field
7
- from enum import Enum
8
-
9
- # Define stacked service specific layer types
10
- class StackedLayerType(Enum):
11
- """Types of processing layers for stacked services"""
12
- INTELLIGENCE = "intelligence" # High-level understanding
13
- DETECTION = "detection" # Element/object detection
14
- CLASSIFICATION = "classification" # Detailed classification
15
- VALIDATION = "validation" # Result validation
16
- TRANSFORMATION = "transformation" # Data transformation
17
- GENERATION = "generation" # Content generation
18
- ENHANCEMENT = "enhancement" # Quality enhancement
19
- CONTROL = "control" # Precise control/refinement
20
- UPSCALING = "upscaling" # Resolution enhancement
21
-
22
- @dataclass
23
- class LayerConfig:
24
- """Configuration for a processing layer"""
25
- name: str
26
- layer_type: StackedLayerType
27
- service_type: str # e.g., 'vision', 'llm'
28
- model_name: str
29
- parameters: Dict[str, Any]
30
- depends_on: List[str] # Layer dependencies
31
- timeout: float = 30.0
32
- retry_count: int = 1
33
- fallback_enabled: bool = True
34
-
35
- @dataclass
36
- class LayerResult:
37
- """Result from a processing layer"""
38
- layer_name: str
39
- success: bool
40
- data: Any
41
- metadata: Dict[str, Any]
42
- execution_time: float
43
- error: Optional[str] = None
44
-
45
- class WorkflowType(Enum):
46
- """Predefined workflow types"""
47
- UI_ANALYSIS_FAST = "ui_analysis_fast"
48
- UI_ANALYSIS_ACCURATE = "ui_analysis_accurate"
49
- UI_ANALYSIS_COMPREHENSIVE = "ui_analysis_comprehensive"
50
- SEARCH_PAGE_ANALYSIS = "search_page_analysis"
51
- CONTENT_EXTRACTION = "content_extraction"
52
- FORM_INTERACTION = "form_interaction"
53
- NAVIGATION_ANALYSIS = "navigation_analysis"
54
- CUSTOM = "custom"
55
-
56
- @dataclass
57
- class StackedServiceConfig:
58
- """Configuration for a stacked service workflow"""
59
- name: str
60
- workflow_type: WorkflowType
61
- layers: List[LayerConfig] = field(default_factory=list)
62
- global_timeout: float = 120.0
63
- parallel_execution: bool = False
64
- fail_fast: bool = False
65
- metadata: Dict[str, Any] = field(default_factory=dict)
66
-
67
- class ConfigManager:
68
- """Manager for stacked service configurations"""
69
-
70
- PREDEFINED_CONFIGS = {
71
- WorkflowType.UI_ANALYSIS_FAST: {
72
- "name": "Fast UI Analysis",
73
- "layers": [
74
- LayerConfig(
75
- name="page_intelligence",
76
- layer_type=StackedLayerType.INTELLIGENCE,
77
- service_type="vision",
78
- model_name="gpt-4.1-nano",
79
- parameters={"max_tokens": 300},
80
- depends_on=[],
81
- timeout=10.0,
82
- fallback_enabled=True
83
- ),
84
- LayerConfig(
85
- name="element_detection",
86
- layer_type=StackedLayerType.DETECTION,
87
- service_type="vision",
88
- model_name="omniparser",
89
- parameters={
90
- "imgsz": 480,
91
- "box_threshold": 0.08,
92
- "iou_threshold": 0.2
93
- },
94
- depends_on=["page_intelligence"],
95
- timeout=15.0,
96
- fallback_enabled=True
97
- ),
98
- LayerConfig(
99
- name="element_classification",
100
- layer_type=StackedLayerType.CLASSIFICATION,
101
- service_type="vision",
102
- model_name="gpt-4.1-nano",
103
- parameters={"max_tokens": 200},
104
- depends_on=["page_intelligence", "element_detection"],
105
- timeout=20.0,
106
- fallback_enabled=False
107
- )
108
- ],
109
- "global_timeout": 60.0,
110
- "parallel_execution": False,
111
- "fail_fast": False,
112
- "metadata": {
113
- "description": "Fast UI analysis optimized for speed",
114
- "expected_time": "30-45 seconds",
115
- "accuracy": "medium"
116
- }
117
- }
118
- }
119
-
120
- @classmethod
121
- def get_config(cls, workflow_type: WorkflowType) -> StackedServiceConfig:
122
- """Get predefined configuration for a workflow type"""
123
- if workflow_type not in cls.PREDEFINED_CONFIGS:
124
- raise ValueError(f"Unknown workflow type: {workflow_type}")
125
-
126
- config_data = cls.PREDEFINED_CONFIGS[workflow_type]
127
-
128
- return StackedServiceConfig(
129
- name=config_data["name"],
130
- workflow_type=workflow_type,
131
- layers=config_data["layers"],
132
- global_timeout=config_data["global_timeout"],
133
- parallel_execution=config_data["parallel_execution"],
134
- fail_fast=config_data["fail_fast"],
135
- metadata=config_data["metadata"]
136
- )
137
-
138
- # Convenience function for quick access
139
- def get_ui_analysis_config(speed: str = "accurate") -> StackedServiceConfig:
140
- """Get UI analysis configuration by speed preference"""
141
- speed_mapping = {
142
- "fast": WorkflowType.UI_ANALYSIS_FAST,
143
- "accurate": WorkflowType.UI_ANALYSIS_ACCURATE,
144
- "comprehensive": WorkflowType.UI_ANALYSIS_COMPREHENSIVE
145
- }
146
-
147
- workflow_type = speed_mapping.get(speed.lower(), WorkflowType.UI_ANALYSIS_ACCURATE)
148
- return ConfigManager.get_config(workflow_type)