isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,248 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import json
|
3
|
-
import logging
|
4
|
-
from typing import Dict, List, Any, Optional, Union
|
5
|
-
from fastapi import FastAPI, HTTPException, Depends, Request
|
6
|
-
from pydantic import BaseModel, Field
|
7
|
-
|
8
|
-
from isa_model.inference.ai_factory import AIFactory
|
9
|
-
|
10
|
-
# Configure logging
|
11
|
-
logging.basicConfig(level=logging.INFO)
|
12
|
-
logger = logging.getLogger("unified_api")
|
13
|
-
|
14
|
-
# Create FastAPI app
|
15
|
-
app = FastAPI(
|
16
|
-
title="Unified AI Model API",
|
17
|
-
description="API for inference with Llama3-8B, Gemma3-4B, Whisper, and BGE-M3 models",
|
18
|
-
version="1.0.0"
|
19
|
-
)
|
20
|
-
|
21
|
-
# Models
|
22
|
-
class ChatMessage(BaseModel):
|
23
|
-
role: str = Field(..., description="Role of the message sender (system, user, assistant)")
|
24
|
-
content: str = Field(..., description="Content of the message")
|
25
|
-
|
26
|
-
class ChatCompletionRequest(BaseModel):
|
27
|
-
model: str = Field(..., description="Model ID to use (llama, gemma)")
|
28
|
-
messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
|
29
|
-
temperature: Optional[float] = Field(0.7, description="Sampling temperature")
|
30
|
-
max_tokens: Optional[int] = Field(512, description="Maximum number of tokens to generate")
|
31
|
-
top_p: Optional[float] = Field(0.9, description="Top-p sampling parameter")
|
32
|
-
top_k: Optional[int] = Field(50, description="Top-k sampling parameter")
|
33
|
-
|
34
|
-
class ChatCompletionResponse(BaseModel):
|
35
|
-
model: str = Field(..., description="Model used for completion")
|
36
|
-
choices: List[Dict[str, Any]] = Field(..., description="Generated completions")
|
37
|
-
usage: Dict[str, int] = Field(..., description="Token usage statistics")
|
38
|
-
|
39
|
-
class EmbeddingRequest(BaseModel):
|
40
|
-
model: str = Field(..., description="Model ID to use (bge_embed)")
|
41
|
-
input: Union[str, List[str]] = Field(..., description="Text to embed")
|
42
|
-
normalize: Optional[bool] = Field(True, description="Whether to normalize embeddings")
|
43
|
-
|
44
|
-
class TranscriptionRequest(BaseModel):
|
45
|
-
model: str = Field(..., description="Model ID to use (whisper)")
|
46
|
-
audio: str = Field(..., description="Base64-encoded audio data or URL")
|
47
|
-
language: Optional[str] = Field("en", description="Language code")
|
48
|
-
|
49
|
-
# Factory for creating services
|
50
|
-
ai_factory = AIFactory()
|
51
|
-
|
52
|
-
# Dependency to get LLM service
|
53
|
-
async def get_llm_service(model: str):
|
54
|
-
if model == "llama":
|
55
|
-
return await ai_factory.get_llm_service("llama")
|
56
|
-
elif model == "gemma":
|
57
|
-
return await ai_factory.get_llm_service("gemma")
|
58
|
-
else:
|
59
|
-
raise HTTPException(status_code=400, detail=f"Unsupported model: {model}")
|
60
|
-
|
61
|
-
# Dependency to get embedding service
|
62
|
-
async def get_embedding_service(model: str):
|
63
|
-
if model == "bge_embed":
|
64
|
-
return await ai_factory.get_embedding_service("bge_embed")
|
65
|
-
else:
|
66
|
-
raise HTTPException(status_code=400, detail=f"Unsupported model: {model}")
|
67
|
-
|
68
|
-
# Dependency to get speech service
|
69
|
-
async def get_speech_service(model: str):
|
70
|
-
if model == "whisper":
|
71
|
-
return await ai_factory.get_speech_service("whisper")
|
72
|
-
else:
|
73
|
-
raise HTTPException(status_code=400, detail=f"Unsupported model: {model}")
|
74
|
-
|
75
|
-
# Endpoints
|
76
|
-
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
|
77
|
-
async def chat_completion(request: ChatCompletionRequest):
|
78
|
-
"""Generate chat completion"""
|
79
|
-
try:
|
80
|
-
# Get the appropriate service
|
81
|
-
service = await get_llm_service(request.model)
|
82
|
-
|
83
|
-
# Format messages
|
84
|
-
formatted_messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
|
85
|
-
|
86
|
-
# Extract system prompt if present
|
87
|
-
system_prompt = None
|
88
|
-
if formatted_messages and formatted_messages[0]["role"] == "system":
|
89
|
-
system_prompt = formatted_messages[0]["content"]
|
90
|
-
formatted_messages = formatted_messages[1:]
|
91
|
-
|
92
|
-
# Get user prompt (last user message)
|
93
|
-
user_prompt = ""
|
94
|
-
for msg in reversed(formatted_messages):
|
95
|
-
if msg["role"] == "user":
|
96
|
-
user_prompt = msg["content"]
|
97
|
-
break
|
98
|
-
|
99
|
-
if not user_prompt:
|
100
|
-
raise HTTPException(status_code=400, detail="No user message found")
|
101
|
-
|
102
|
-
# Set generation config
|
103
|
-
generation_config = {
|
104
|
-
"temperature": request.temperature,
|
105
|
-
"max_new_tokens": request.max_tokens,
|
106
|
-
"top_p": request.top_p,
|
107
|
-
"top_k": request.top_k
|
108
|
-
}
|
109
|
-
|
110
|
-
# Generate completion
|
111
|
-
completion = await service.generate(
|
112
|
-
prompt=user_prompt,
|
113
|
-
system_prompt=system_prompt,
|
114
|
-
generation_config=generation_config
|
115
|
-
)
|
116
|
-
|
117
|
-
# Format response
|
118
|
-
response = {
|
119
|
-
"model": request.model,
|
120
|
-
"choices": [
|
121
|
-
{
|
122
|
-
"message": {
|
123
|
-
"role": "assistant",
|
124
|
-
"content": completion
|
125
|
-
},
|
126
|
-
"finish_reason": "stop",
|
127
|
-
"index": 0
|
128
|
-
}
|
129
|
-
],
|
130
|
-
"usage": {
|
131
|
-
"prompt_tokens": len(user_prompt.split()),
|
132
|
-
"completion_tokens": len(completion.split()),
|
133
|
-
"total_tokens": len(user_prompt.split()) + len(completion.split())
|
134
|
-
}
|
135
|
-
}
|
136
|
-
|
137
|
-
return response
|
138
|
-
|
139
|
-
except Exception as e:
|
140
|
-
logger.error(f"Error in chat completion: {str(e)}")
|
141
|
-
raise HTTPException(status_code=500, detail=str(e))
|
142
|
-
|
143
|
-
@app.post("/v1/embeddings")
|
144
|
-
async def create_embedding(request: EmbeddingRequest):
|
145
|
-
"""Generate embeddings for text"""
|
146
|
-
try:
|
147
|
-
# Get the embedding service
|
148
|
-
service = await get_embedding_service("bge_embed")
|
149
|
-
|
150
|
-
# Generate embeddings
|
151
|
-
if isinstance(request.input, str):
|
152
|
-
embeddings = await service.embed(request.input, normalize=request.normalize)
|
153
|
-
data = [{"embedding": embeddings[0].tolist(), "index": 0}]
|
154
|
-
else:
|
155
|
-
embeddings = await service.embed(request.input, normalize=request.normalize)
|
156
|
-
data = [{"embedding": emb.tolist(), "index": i} for i, emb in enumerate(embeddings)]
|
157
|
-
|
158
|
-
# Format response
|
159
|
-
response = {
|
160
|
-
"model": request.model,
|
161
|
-
"data": data,
|
162
|
-
"usage": {
|
163
|
-
"prompt_tokens": sum(len(text.split()) for text in (request.input if isinstance(request.input, list) else [request.input])),
|
164
|
-
"total_tokens": sum(len(text.split()) for text in (request.input if isinstance(request.input, list) else [request.input]))
|
165
|
-
}
|
166
|
-
}
|
167
|
-
|
168
|
-
return response
|
169
|
-
|
170
|
-
except Exception as e:
|
171
|
-
logger.error(f"Error in embedding generation: {str(e)}")
|
172
|
-
raise HTTPException(status_code=500, detail=str(e))
|
173
|
-
|
174
|
-
@app.post("/v1/audio/transcriptions")
|
175
|
-
async def transcribe_audio(request: TranscriptionRequest):
|
176
|
-
"""Transcribe audio to text"""
|
177
|
-
try:
|
178
|
-
import base64
|
179
|
-
|
180
|
-
# Get the speech service
|
181
|
-
service = await get_speech_service("whisper")
|
182
|
-
|
183
|
-
# Process audio
|
184
|
-
if request.audio.startswith(("http://", "https://")):
|
185
|
-
# URL - download audio
|
186
|
-
import requests
|
187
|
-
audio_data = requests.get(request.audio).content
|
188
|
-
else:
|
189
|
-
# Base64 - decode
|
190
|
-
audio_data = base64.b64decode(request.audio)
|
191
|
-
|
192
|
-
# Transcribe
|
193
|
-
transcription = await service.transcribe(
|
194
|
-
audio=audio_data,
|
195
|
-
language=request.language
|
196
|
-
)
|
197
|
-
|
198
|
-
# Format response
|
199
|
-
response = {
|
200
|
-
"model": request.model,
|
201
|
-
"text": transcription
|
202
|
-
}
|
203
|
-
|
204
|
-
return response
|
205
|
-
|
206
|
-
except Exception as e:
|
207
|
-
logger.error(f"Error in audio transcription: {str(e)}")
|
208
|
-
raise HTTPException(status_code=500, detail=str(e))
|
209
|
-
|
210
|
-
# Health check endpoint
|
211
|
-
@app.get("/health")
|
212
|
-
async def health_check():
|
213
|
-
"""Health check endpoint"""
|
214
|
-
return {"status": "healthy"}
|
215
|
-
|
216
|
-
# Model info endpoint
|
217
|
-
@app.get("/v1/models")
|
218
|
-
async def list_models():
|
219
|
-
"""List available models"""
|
220
|
-
models = [
|
221
|
-
{
|
222
|
-
"id": "llama",
|
223
|
-
"type": "llm",
|
224
|
-
"description": "Llama3-8B language model"
|
225
|
-
},
|
226
|
-
{
|
227
|
-
"id": "gemma",
|
228
|
-
"type": "llm",
|
229
|
-
"description": "Gemma3-4B language model"
|
230
|
-
},
|
231
|
-
{
|
232
|
-
"id": "whisper",
|
233
|
-
"type": "speech",
|
234
|
-
"description": "Whisper-tiny speech-to-text model"
|
235
|
-
},
|
236
|
-
{
|
237
|
-
"id": "bge_embed",
|
238
|
-
"type": "embedding",
|
239
|
-
"description": "BGE-M3 text embedding model"
|
240
|
-
}
|
241
|
-
]
|
242
|
-
|
243
|
-
return {"data": models}
|
244
|
-
|
245
|
-
# Main entry point
|
246
|
-
if __name__ == "__main__":
|
247
|
-
import uvicorn
|
248
|
-
uvicorn.run(app, host="0.0.0.0", port=8080)
|
@@ -1,148 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Configuration system for stacked services
|
3
|
-
"""
|
4
|
-
|
5
|
-
from typing import Dict, Any, List, Optional
|
6
|
-
from dataclasses import dataclass, field
|
7
|
-
from enum import Enum
|
8
|
-
|
9
|
-
# Define stacked service specific layer types
|
10
|
-
class StackedLayerType(Enum):
|
11
|
-
"""Types of processing layers for stacked services"""
|
12
|
-
INTELLIGENCE = "intelligence" # High-level understanding
|
13
|
-
DETECTION = "detection" # Element/object detection
|
14
|
-
CLASSIFICATION = "classification" # Detailed classification
|
15
|
-
VALIDATION = "validation" # Result validation
|
16
|
-
TRANSFORMATION = "transformation" # Data transformation
|
17
|
-
GENERATION = "generation" # Content generation
|
18
|
-
ENHANCEMENT = "enhancement" # Quality enhancement
|
19
|
-
CONTROL = "control" # Precise control/refinement
|
20
|
-
UPSCALING = "upscaling" # Resolution enhancement
|
21
|
-
|
22
|
-
@dataclass
|
23
|
-
class LayerConfig:
|
24
|
-
"""Configuration for a processing layer"""
|
25
|
-
name: str
|
26
|
-
layer_type: StackedLayerType
|
27
|
-
service_type: str # e.g., 'vision', 'llm'
|
28
|
-
model_name: str
|
29
|
-
parameters: Dict[str, Any]
|
30
|
-
depends_on: List[str] # Layer dependencies
|
31
|
-
timeout: float = 30.0
|
32
|
-
retry_count: int = 1
|
33
|
-
fallback_enabled: bool = True
|
34
|
-
|
35
|
-
@dataclass
|
36
|
-
class LayerResult:
|
37
|
-
"""Result from a processing layer"""
|
38
|
-
layer_name: str
|
39
|
-
success: bool
|
40
|
-
data: Any
|
41
|
-
metadata: Dict[str, Any]
|
42
|
-
execution_time: float
|
43
|
-
error: Optional[str] = None
|
44
|
-
|
45
|
-
class WorkflowType(Enum):
|
46
|
-
"""Predefined workflow types"""
|
47
|
-
UI_ANALYSIS_FAST = "ui_analysis_fast"
|
48
|
-
UI_ANALYSIS_ACCURATE = "ui_analysis_accurate"
|
49
|
-
UI_ANALYSIS_COMPREHENSIVE = "ui_analysis_comprehensive"
|
50
|
-
SEARCH_PAGE_ANALYSIS = "search_page_analysis"
|
51
|
-
CONTENT_EXTRACTION = "content_extraction"
|
52
|
-
FORM_INTERACTION = "form_interaction"
|
53
|
-
NAVIGATION_ANALYSIS = "navigation_analysis"
|
54
|
-
CUSTOM = "custom"
|
55
|
-
|
56
|
-
@dataclass
|
57
|
-
class StackedServiceConfig:
|
58
|
-
"""Configuration for a stacked service workflow"""
|
59
|
-
name: str
|
60
|
-
workflow_type: WorkflowType
|
61
|
-
layers: List[LayerConfig] = field(default_factory=list)
|
62
|
-
global_timeout: float = 120.0
|
63
|
-
parallel_execution: bool = False
|
64
|
-
fail_fast: bool = False
|
65
|
-
metadata: Dict[str, Any] = field(default_factory=dict)
|
66
|
-
|
67
|
-
class ConfigManager:
|
68
|
-
"""Manager for stacked service configurations"""
|
69
|
-
|
70
|
-
PREDEFINED_CONFIGS = {
|
71
|
-
WorkflowType.UI_ANALYSIS_FAST: {
|
72
|
-
"name": "Fast UI Analysis",
|
73
|
-
"layers": [
|
74
|
-
LayerConfig(
|
75
|
-
name="page_intelligence",
|
76
|
-
layer_type=StackedLayerType.INTELLIGENCE,
|
77
|
-
service_type="vision",
|
78
|
-
model_name="gpt-4.1-nano",
|
79
|
-
parameters={"max_tokens": 300},
|
80
|
-
depends_on=[],
|
81
|
-
timeout=10.0,
|
82
|
-
fallback_enabled=True
|
83
|
-
),
|
84
|
-
LayerConfig(
|
85
|
-
name="element_detection",
|
86
|
-
layer_type=StackedLayerType.DETECTION,
|
87
|
-
service_type="vision",
|
88
|
-
model_name="omniparser",
|
89
|
-
parameters={
|
90
|
-
"imgsz": 480,
|
91
|
-
"box_threshold": 0.08,
|
92
|
-
"iou_threshold": 0.2
|
93
|
-
},
|
94
|
-
depends_on=["page_intelligence"],
|
95
|
-
timeout=15.0,
|
96
|
-
fallback_enabled=True
|
97
|
-
),
|
98
|
-
LayerConfig(
|
99
|
-
name="element_classification",
|
100
|
-
layer_type=StackedLayerType.CLASSIFICATION,
|
101
|
-
service_type="vision",
|
102
|
-
model_name="gpt-4.1-nano",
|
103
|
-
parameters={"max_tokens": 200},
|
104
|
-
depends_on=["page_intelligence", "element_detection"],
|
105
|
-
timeout=20.0,
|
106
|
-
fallback_enabled=False
|
107
|
-
)
|
108
|
-
],
|
109
|
-
"global_timeout": 60.0,
|
110
|
-
"parallel_execution": False,
|
111
|
-
"fail_fast": False,
|
112
|
-
"metadata": {
|
113
|
-
"description": "Fast UI analysis optimized for speed",
|
114
|
-
"expected_time": "30-45 seconds",
|
115
|
-
"accuracy": "medium"
|
116
|
-
}
|
117
|
-
}
|
118
|
-
}
|
119
|
-
|
120
|
-
@classmethod
|
121
|
-
def get_config(cls, workflow_type: WorkflowType) -> StackedServiceConfig:
|
122
|
-
"""Get predefined configuration for a workflow type"""
|
123
|
-
if workflow_type not in cls.PREDEFINED_CONFIGS:
|
124
|
-
raise ValueError(f"Unknown workflow type: {workflow_type}")
|
125
|
-
|
126
|
-
config_data = cls.PREDEFINED_CONFIGS[workflow_type]
|
127
|
-
|
128
|
-
return StackedServiceConfig(
|
129
|
-
name=config_data["name"],
|
130
|
-
workflow_type=workflow_type,
|
131
|
-
layers=config_data["layers"],
|
132
|
-
global_timeout=config_data["global_timeout"],
|
133
|
-
parallel_execution=config_data["parallel_execution"],
|
134
|
-
fail_fast=config_data["fail_fast"],
|
135
|
-
metadata=config_data["metadata"]
|
136
|
-
)
|
137
|
-
|
138
|
-
# Convenience function for quick access
|
139
|
-
def get_ui_analysis_config(speed: str = "accurate") -> StackedServiceConfig:
|
140
|
-
"""Get UI analysis configuration by speed preference"""
|
141
|
-
speed_mapping = {
|
142
|
-
"fast": WorkflowType.UI_ANALYSIS_FAST,
|
143
|
-
"accurate": WorkflowType.UI_ANALYSIS_ACCURATE,
|
144
|
-
"comprehensive": WorkflowType.UI_ANALYSIS_COMPREHENSIVE
|
145
|
-
}
|
146
|
-
|
147
|
-
workflow_type = speed_mapping.get(speed.lower(), WorkflowType.UI_ANALYSIS_ACCURATE)
|
148
|
-
return ConfigManager.get_config(workflow_type)
|