isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +1 -1
- isa_model/client.py +732 -565
- isa_model/core/cache/redis_cache.py +401 -0
- isa_model/core/config/config_manager.py +53 -10
- isa_model/core/config.py +1 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/migrations.py +277 -0
- isa_model/core/database/supabase_client.py +123 -0
- isa_model/core/models/__init__.py +37 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +36 -18
- isa_model/core/models/model_repo.py +44 -38
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +101 -370
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +7 -0
- isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
- isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
- isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/core/deployment_manager.py +6 -4
- isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
- isa_model/eval/benchmarks/__init__.py +27 -0
- isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
- isa_model/eval/benchmarks.py +244 -12
- isa_model/eval/evaluators/__init__.py +8 -2
- isa_model/eval/evaluators/audio_evaluator.py +727 -0
- isa_model/eval/evaluators/embedding_evaluator.py +742 -0
- isa_model/eval/evaluators/vision_evaluator.py +564 -0
- isa_model/eval/example_evaluation.py +395 -0
- isa_model/eval/factory.py +272 -5
- isa_model/eval/isa_benchmarks.py +700 -0
- isa_model/eval/isa_integration.py +582 -0
- isa_model/eval/metrics.py +159 -6
- isa_model/eval/tests/unit/test_basic.py +396 -0
- isa_model/inference/ai_factory.py +44 -8
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +32 -6
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/base_llm_service.py +30 -6
- isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
- isa_model/inference/services/llm/ollama_llm_service.py +2 -1
- isa_model/inference/services/llm/openai_llm_service.py +652 -55
- isa_model/inference/services/llm/yyds_llm_service.py +2 -1
- isa_model/inference/services/vision/__init__.py +5 -5
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/helpers/image_utils.py +11 -5
- isa_model/inference/services/vision/isa_vision_service.py +573 -0
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/serving/api/fastapi_server.py +88 -16
- isa_model/serving/api/middleware/auth.py +311 -0
- isa_model/serving/api/middleware/security.py +278 -0
- isa_model/serving/api/routes/analytics.py +486 -0
- isa_model/serving/api/routes/deployments.py +339 -0
- isa_model/serving/api/routes/evaluations.py +579 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/unified.py +324 -165
- isa_model/serving/api/startup.py +304 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/training/__init__.py +100 -6
- isa_model/training/core/__init__.py +4 -1
- isa_model/training/examples/intelligent_training_example.py +281 -0
- isa_model/training/intelligent/__init__.py +25 -0
- isa_model/training/intelligent/decision_engine.py +643 -0
- isa_model/training/intelligent/intelligent_factory.py +888 -0
- isa_model/training/intelligent/knowledge_base.py +751 -0
- isa_model/training/intelligent/resource_optimizer.py +839 -0
- isa_model/training/intelligent/task_classifier.py +576 -0
- isa_model/training/storage/__init__.py +24 -0
- isa_model/training/storage/core_integration.py +439 -0
- isa_model/training/storage/training_repository.py +552 -0
- isa_model/training/storage/training_storage.py +628 -0
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
- isa_model-0.4.0.dist-info/RECORD +182 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model-0.3.9.dist-info/RECORD +0 -138
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
|
|
1
1
|
import logging
|
2
|
+
import os
|
2
3
|
import aiohttp
|
3
4
|
from typing import Dict, Any, List, Union, Optional, BinaryIO
|
4
5
|
from openai import AsyncOpenAI
|
@@ -72,13 +73,38 @@ class OpenAISTTService(BaseSTTService):
|
|
72
73
|
if prompt:
|
73
74
|
transcription_params["prompt"] = prompt
|
74
75
|
|
75
|
-
# Handle file input
|
76
|
+
# Handle file input - support base64 strings, file paths, and file objects
|
76
77
|
if isinstance(audio_file, str):
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
78
|
+
# Check if it's a base64 string or file path
|
79
|
+
if len(audio_file) > 100 and not os.path.exists(audio_file):
|
80
|
+
# Likely a base64 string
|
81
|
+
try:
|
82
|
+
import base64
|
83
|
+
from io import BytesIO
|
84
|
+
logger.info(f"Attempting to decode base64 audio data (length: {len(audio_file)})")
|
85
|
+
audio_data = base64.b64decode(audio_file)
|
86
|
+
audio_buffer = BytesIO(audio_data)
|
87
|
+
audio_buffer.name = "audio.wav" # OpenAI needs a filename hint
|
88
|
+
logger.info(f"Successfully decoded base64 to {len(audio_data)} bytes")
|
89
|
+
transcription = await self.client.audio.transcriptions.create(
|
90
|
+
file=audio_buffer,
|
91
|
+
**transcription_params
|
92
|
+
)
|
93
|
+
except Exception as e:
|
94
|
+
# If base64 decoding fails, treat as file path
|
95
|
+
logger.error(f"Base64 decoding failed: {e}, treating as file path")
|
96
|
+
with open(audio_file, "rb") as f:
|
97
|
+
transcription = await self.client.audio.transcriptions.create(
|
98
|
+
file=f,
|
99
|
+
**transcription_params
|
100
|
+
)
|
101
|
+
else:
|
102
|
+
# Regular file path
|
103
|
+
with open(audio_file, "rb") as f:
|
104
|
+
transcription = await self.client.audio.transcriptions.create(
|
105
|
+
file=f,
|
106
|
+
**transcription_params
|
107
|
+
)
|
82
108
|
else:
|
83
109
|
transcription = await self.client.audio.transcriptions.create(
|
84
110
|
file=audio_file,
|
@@ -66,7 +66,8 @@ class BaseService(ABC):
|
|
66
66
|
output_tokens=output_tokens
|
67
67
|
)
|
68
68
|
|
69
|
-
# Track usage through
|
69
|
+
# Track usage through both systems (legacy and new)
|
70
|
+
# Legacy detailed tracking (will be phased out)
|
70
71
|
self.model_manager.billing_tracker.track_model_usage(
|
71
72
|
model_id=self.model_name,
|
72
73
|
operation_type="inference",
|
@@ -80,6 +81,21 @@ class BaseService(ABC):
|
|
80
81
|
cost_usd=cost_usd,
|
81
82
|
metadata=metadata
|
82
83
|
)
|
84
|
+
|
85
|
+
# New aggregated statistics tracking
|
86
|
+
self.model_manager.statistics_tracker.track_usage(
|
87
|
+
model_id=self.model_name,
|
88
|
+
provider=self.provider_name,
|
89
|
+
service_type=service_type if isinstance(service_type, str) else service_type.value,
|
90
|
+
operation_type="inference",
|
91
|
+
operation=operation,
|
92
|
+
input_tokens=input_tokens,
|
93
|
+
output_tokens=output_tokens,
|
94
|
+
input_units=input_units,
|
95
|
+
output_units=output_units,
|
96
|
+
cost_usd=cost_usd or 0.0,
|
97
|
+
metadata=metadata
|
98
|
+
)
|
83
99
|
except Exception as e:
|
84
100
|
# Don't let billing tracking break the service
|
85
101
|
import logging
|
@@ -0,0 +1,13 @@
|
|
1
|
+
"""
|
2
|
+
Embedding Services - Text and Document Embedding Services
|
3
|
+
"""
|
4
|
+
|
5
|
+
from .base_embed_service import BaseEmbedService
|
6
|
+
from .openai_embed_service import OpenAIEmbedService
|
7
|
+
from .ollama_embed_service import OllamaEmbedService
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
'BaseEmbedService',
|
11
|
+
'OpenAIEmbedService',
|
12
|
+
'OllamaEmbedService'
|
13
|
+
]
|
@@ -36,17 +36,29 @@ class BaseEmbedService(BaseService):
|
|
36
36
|
if not isinstance(input_data, list):
|
37
37
|
input_data = [input_data]
|
38
38
|
return await self.create_text_embeddings(input_data)
|
39
|
-
elif task
|
39
|
+
elif task in ["chunk", "chunk_and_embed"]:
|
40
40
|
if isinstance(input_data, list):
|
41
|
-
raise ValueError("
|
42
|
-
return await self.create_chunks(input_data, kwargs
|
41
|
+
raise ValueError("chunk task requires single text input")
|
42
|
+
return await self.create_chunks(input_data, **kwargs)
|
43
43
|
elif task == "similarity":
|
44
|
+
# Support both text-based and embedding-based similarity
|
45
|
+
candidates = kwargs.get("candidates")
|
44
46
|
embedding1 = kwargs.get("embedding1")
|
45
47
|
embedding2 = kwargs.get("embedding2")
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
48
|
+
|
49
|
+
if candidates:
|
50
|
+
# Text-based similarity - compute embeddings first
|
51
|
+
if isinstance(input_data, list):
|
52
|
+
raise ValueError("similarity task with candidates requires single query text")
|
53
|
+
# Remove candidates from kwargs to avoid duplicate parameter
|
54
|
+
similarity_kwargs = {k: v for k, v in kwargs.items() if k != 'candidates'}
|
55
|
+
return await self._text_similarity_search(input_data, candidates, **similarity_kwargs)
|
56
|
+
elif embedding1 and embedding2:
|
57
|
+
# Direct embedding similarity
|
58
|
+
similarity = await self.compute_similarity(embedding1, embedding2)
|
59
|
+
return {"similarity": similarity}
|
60
|
+
else:
|
61
|
+
raise ValueError("similarity task requires either 'candidates' parameter or both 'embedding1' and 'embedding2' parameters")
|
50
62
|
elif task == "find_similar":
|
51
63
|
query_embedding = kwargs.get("query_embedding")
|
52
64
|
candidate_embeddings = kwargs.get("candidate_embeddings")
|
@@ -57,6 +69,21 @@ class BaseEmbedService(BaseService):
|
|
57
69
|
candidate_embeddings,
|
58
70
|
kwargs.get("top_k", 5)
|
59
71
|
)
|
72
|
+
|
73
|
+
# ==================== 重排序类任务 ====================
|
74
|
+
elif task in ["rerank", "rerank_documents", "document_ranking"]:
|
75
|
+
query = kwargs.get("query") or input_data
|
76
|
+
documents = kwargs.get("documents")
|
77
|
+
if not documents:
|
78
|
+
raise ValueError("rerank task requires documents parameter")
|
79
|
+
if isinstance(query, list):
|
80
|
+
raise ValueError("rerank task requires single query string")
|
81
|
+
return await self.rerank_documents(
|
82
|
+
query=query,
|
83
|
+
documents=documents,
|
84
|
+
top_k=kwargs.get("top_k"),
|
85
|
+
return_documents=kwargs.get("return_documents", True)
|
86
|
+
)
|
60
87
|
else:
|
61
88
|
raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
|
62
89
|
|
@@ -67,7 +94,51 @@ class BaseEmbedService(BaseService):
|
|
67
94
|
Returns:
|
68
95
|
List of supported task names
|
69
96
|
"""
|
70
|
-
return ["embed", "embed_batch", "chunk_and_embed", "similarity", "find_similar"]
|
97
|
+
return ["embed", "embed_batch", "chunk", "chunk_and_embed", "similarity", "find_similar", "rerank", "rerank_documents", "document_ranking"]
|
98
|
+
|
99
|
+
async def _text_similarity_search(self, query_text: str, candidates: List[str], **kwargs) -> Dict[str, Any]:
|
100
|
+
"""
|
101
|
+
Helper method for text-based similarity search
|
102
|
+
|
103
|
+
Args:
|
104
|
+
query_text: Query text
|
105
|
+
candidates: List of candidate texts
|
106
|
+
**kwargs: Additional parameters (top_k, threshold, etc.)
|
107
|
+
|
108
|
+
Returns:
|
109
|
+
Dictionary containing similar documents with scores
|
110
|
+
"""
|
111
|
+
# Get embeddings for query and candidates
|
112
|
+
query_embedding = await self.create_text_embedding(query_text)
|
113
|
+
candidate_embeddings = await self.create_text_embeddings(candidates)
|
114
|
+
|
115
|
+
# Find similar texts
|
116
|
+
similar_results = await self.find_similar_texts(
|
117
|
+
query_embedding,
|
118
|
+
candidate_embeddings,
|
119
|
+
kwargs.get("top_k", len(candidates))
|
120
|
+
)
|
121
|
+
|
122
|
+
# Apply threshold if specified
|
123
|
+
threshold = kwargs.get("threshold")
|
124
|
+
if threshold is not None:
|
125
|
+
similar_results = [r for r in similar_results if r["similarity"] >= threshold]
|
126
|
+
|
127
|
+
# Convert to expected format with text content
|
128
|
+
similar_documents = []
|
129
|
+
for result in similar_results:
|
130
|
+
similar_documents.append({
|
131
|
+
"text": candidates[result["index"]],
|
132
|
+
"similarity": result["similarity"],
|
133
|
+
"index": result["index"]
|
134
|
+
})
|
135
|
+
|
136
|
+
return {
|
137
|
+
"similar_documents": similar_documents,
|
138
|
+
"query": query_text,
|
139
|
+
"total_candidates": len(candidates),
|
140
|
+
"returned_count": len(similar_documents)
|
141
|
+
}
|
71
142
|
|
72
143
|
@abstractmethod
|
73
144
|
async def create_text_embedding(self, text: str) -> List[float]:
|
@@ -170,6 +241,38 @@ class BaseEmbedService(BaseService):
|
|
170
241
|
"""
|
171
242
|
pass
|
172
243
|
|
244
|
+
async def rerank_documents(
|
245
|
+
self,
|
246
|
+
query: str,
|
247
|
+
documents: List[str],
|
248
|
+
top_k: Optional[int] = None,
|
249
|
+
return_documents: bool = True
|
250
|
+
) -> Dict[str, Any]:
|
251
|
+
"""
|
252
|
+
Rerank documents based on relevance to query
|
253
|
+
|
254
|
+
Default implementation returns NotImplementedError.
|
255
|
+
Override in subclasses that support reranking.
|
256
|
+
|
257
|
+
Args:
|
258
|
+
query: Search query string
|
259
|
+
documents: List of documents to rerank
|
260
|
+
top_k: Number of top results to return (None = all)
|
261
|
+
return_documents: Whether to include document text in results
|
262
|
+
|
263
|
+
Returns:
|
264
|
+
Dictionary containing:
|
265
|
+
- success: Boolean success status
|
266
|
+
- results: List of ranked documents with scores
|
267
|
+
- metadata: Additional information (model, timing, etc.)
|
268
|
+
"""
|
269
|
+
return {
|
270
|
+
'success': False,
|
271
|
+
'error': f'Reranking not supported by {self.__class__.__name__}',
|
272
|
+
'provider': getattr(self, 'provider_name', 'unknown'),
|
273
|
+
'service': getattr(self, 'model_name', 'unknown')
|
274
|
+
}
|
275
|
+
|
173
276
|
@abstractmethod
|
174
277
|
async def close(self):
|
175
278
|
"""Cleanup resources"""
|
@@ -0,0 +1,305 @@
|
|
1
|
+
"""
|
2
|
+
ISA Embedding Service
|
3
|
+
|
4
|
+
ISA reranking service using deployed Jina Reranker v2 via Modal
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
from typing import Dict, Any, List, Optional
|
9
|
+
|
10
|
+
try:
|
11
|
+
import modal
|
12
|
+
MODAL_AVAILABLE = True
|
13
|
+
except ImportError:
|
14
|
+
MODAL_AVAILABLE = False
|
15
|
+
modal = None
|
16
|
+
|
17
|
+
from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
class ISAEmbedService(BaseEmbedService):
|
22
|
+
"""
|
23
|
+
ISA Embedding Service - calls ISA deployed reranking models
|
24
|
+
|
25
|
+
Supported features:
|
26
|
+
- Document reranking (Jina Reranker v2 via Modal)
|
27
|
+
- Future: embedding generation
|
28
|
+
- Future: semantic similarity computation
|
29
|
+
"""
|
30
|
+
|
31
|
+
def __init__(self,
|
32
|
+
rerank_modal_app_name: str = "isa-embed-rerank",
|
33
|
+
timeout: int = 30):
|
34
|
+
"""
|
35
|
+
Initialize ISA Embedding service
|
36
|
+
|
37
|
+
Args:
|
38
|
+
rerank_modal_app_name: Modal reranking app name
|
39
|
+
timeout: Request timeout in seconds
|
40
|
+
"""
|
41
|
+
# For now, skip BaseService initialization to avoid config validation
|
42
|
+
# TODO: Properly configure ISA provider in config system
|
43
|
+
self.provider_name = "isa"
|
44
|
+
self.model_name = "isa-jina-reranker-v2-service"
|
45
|
+
self.rerank_modal_app_name = rerank_modal_app_name
|
46
|
+
self.timeout = timeout
|
47
|
+
|
48
|
+
# Initialize Modal client
|
49
|
+
if MODAL_AVAILABLE:
|
50
|
+
try:
|
51
|
+
# Get deployed Modal application
|
52
|
+
self.modal_app = modal.App.lookup(rerank_modal_app_name)
|
53
|
+
logger.info(f"Connected to Modal rerank app: {rerank_modal_app_name}")
|
54
|
+
|
55
|
+
self.modal_service = True # Mark service as available
|
56
|
+
logger.info("Modal rerank app connection established")
|
57
|
+
|
58
|
+
except Exception as e:
|
59
|
+
logger.warning(f"Failed to connect to Modal rerank app: {e}")
|
60
|
+
self.modal_app = None
|
61
|
+
self.modal_service = None
|
62
|
+
else:
|
63
|
+
logger.warning("Modal SDK not available")
|
64
|
+
self.modal_app = None
|
65
|
+
self.modal_service = None
|
66
|
+
|
67
|
+
# Service statistics
|
68
|
+
self.request_count = 0
|
69
|
+
self.total_cost = 0.0
|
70
|
+
|
71
|
+
async def rerank_documents(
|
72
|
+
self,
|
73
|
+
query: str,
|
74
|
+
documents: List[str],
|
75
|
+
top_k: Optional[int] = None,
|
76
|
+
return_documents: bool = True
|
77
|
+
) -> Dict[str, Any]:
|
78
|
+
"""
|
79
|
+
Rerank documents using Jina Reranker v2
|
80
|
+
|
81
|
+
Args:
|
82
|
+
query: Query string
|
83
|
+
documents: List of documents to rerank
|
84
|
+
top_k: Return top k results (None = all)
|
85
|
+
return_documents: Whether to include document content in results
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
Reranking results
|
89
|
+
"""
|
90
|
+
try:
|
91
|
+
if not self.modal_app or not self.modal_service:
|
92
|
+
return {
|
93
|
+
'success': False,
|
94
|
+
'provider': 'ISA',
|
95
|
+
'service': 'isa-embed-rerank',
|
96
|
+
'error': 'Modal rerank app or service not available'
|
97
|
+
}
|
98
|
+
|
99
|
+
# Call reranking service directly via Modal SDK
|
100
|
+
result = await self._call_rerank_service(query, documents, top_k, return_documents)
|
101
|
+
|
102
|
+
if result and result.get('success', False):
|
103
|
+
self.request_count += 1
|
104
|
+
|
105
|
+
# Record cost
|
106
|
+
if 'billing' in result:
|
107
|
+
cost = result['billing'].get('estimated_cost_usd', 0)
|
108
|
+
self.total_cost += cost
|
109
|
+
|
110
|
+
# Format response to match expected structure
|
111
|
+
formatted_result = {
|
112
|
+
'success': True,
|
113
|
+
'provider': 'ISA',
|
114
|
+
'service': 'isa-embed-rerank',
|
115
|
+
'result': {
|
116
|
+
'results': result.get('results', []),
|
117
|
+
'processing_time': result.get('processing_time'),
|
118
|
+
'billing': result.get('billing', {}),
|
119
|
+
'query': result.get('query'),
|
120
|
+
'num_documents': result.get('num_documents'),
|
121
|
+
'returned_count': result.get('returned_count')
|
122
|
+
},
|
123
|
+
'metadata': {
|
124
|
+
'model_used': result.get('model'),
|
125
|
+
'provider': result.get('provider', 'ISA'),
|
126
|
+
'billing': result.get('billing', {})
|
127
|
+
}
|
128
|
+
}
|
129
|
+
return formatted_result
|
130
|
+
else:
|
131
|
+
return {
|
132
|
+
'success': False,
|
133
|
+
'provider': 'ISA',
|
134
|
+
'service': 'isa-embed-rerank',
|
135
|
+
'error': f'Rerank service returned error: {result.get("error", "Unknown error") if result else "No response"}',
|
136
|
+
'details': result
|
137
|
+
}
|
138
|
+
|
139
|
+
except Exception as e:
|
140
|
+
logger.error(f"ISA document reranking failed: {e}")
|
141
|
+
import traceback
|
142
|
+
traceback.print_exc()
|
143
|
+
return {
|
144
|
+
'success': False,
|
145
|
+
'provider': 'ISA',
|
146
|
+
'service': 'isa-embed-rerank',
|
147
|
+
'error': str(e)
|
148
|
+
}
|
149
|
+
|
150
|
+
async def _call_rerank_service(
|
151
|
+
self,
|
152
|
+
query: str,
|
153
|
+
documents: List[str],
|
154
|
+
top_k: Optional[int],
|
155
|
+
return_documents: bool
|
156
|
+
) -> Dict[str, Any]:
|
157
|
+
"""
|
158
|
+
Call reranking service via Modal SDK
|
159
|
+
"""
|
160
|
+
try:
|
161
|
+
import modal
|
162
|
+
|
163
|
+
logger.info("Calling Jina Reranker v2 service via Modal SDK...")
|
164
|
+
|
165
|
+
# Correct Modal SDK usage: call deployed class method
|
166
|
+
ISAEmbedRerankService = modal.Cls.from_name(
|
167
|
+
app_name=self.rerank_modal_app_name,
|
168
|
+
name="ISAEmbedRerankService"
|
169
|
+
)
|
170
|
+
|
171
|
+
# Create instance and call method
|
172
|
+
instance = ISAEmbedRerankService()
|
173
|
+
result = instance.rerank_documents.remote(
|
174
|
+
query=query,
|
175
|
+
documents=documents,
|
176
|
+
top_k=top_k,
|
177
|
+
return_documents=return_documents
|
178
|
+
)
|
179
|
+
|
180
|
+
logger.info("Modal rerank SDK call successful")
|
181
|
+
return result
|
182
|
+
|
183
|
+
except Exception as e:
|
184
|
+
logger.error(f"Modal rerank SDK call failed: {e}")
|
185
|
+
return {
|
186
|
+
'success': False,
|
187
|
+
'error': f'Modal rerank SDK error: {str(e)}'
|
188
|
+
}
|
189
|
+
|
190
|
+
# ==================== Embedding methods (future implementation) ====================
|
191
|
+
|
192
|
+
async def create_text_embedding(self, text: str) -> List[float]:
|
193
|
+
"""Create single text embedding - not yet implemented"""
|
194
|
+
raise NotImplementedError("Text embedding not yet implemented in ISA service")
|
195
|
+
|
196
|
+
async def create_text_embeddings(self, texts: List[str]) -> List[List[float]]:
|
197
|
+
"""Create multiple text embeddings - not yet implemented"""
|
198
|
+
raise NotImplementedError("Text embeddings not yet implemented in ISA service")
|
199
|
+
|
200
|
+
async def create_chunks(self, text: str, metadata: Optional[Dict] = None) -> List[Dict]:
|
201
|
+
"""Create text chunks with embeddings - not yet implemented"""
|
202
|
+
raise NotImplementedError("Text chunking not yet implemented in ISA service")
|
203
|
+
|
204
|
+
async def compute_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
|
205
|
+
"""Compute embedding similarity - not yet implemented"""
|
206
|
+
raise NotImplementedError("Similarity computation not yet implemented in ISA service")
|
207
|
+
|
208
|
+
async def find_similar_texts(
|
209
|
+
self,
|
210
|
+
query_embedding: List[float],
|
211
|
+
candidate_embeddings: List[List[float]],
|
212
|
+
top_k: int = 5
|
213
|
+
) -> List[Dict[str, Any]]:
|
214
|
+
"""Find similar texts - not yet implemented"""
|
215
|
+
raise NotImplementedError("Similar text search not yet implemented in ISA service")
|
216
|
+
|
217
|
+
def get_embedding_dimension(self) -> int:
|
218
|
+
"""Get embedding dimension - not applicable for rerank-only service"""
|
219
|
+
raise NotImplementedError("Embedding dimension not available for rerank-only service")
|
220
|
+
|
221
|
+
def get_max_input_length(self) -> int:
|
222
|
+
"""Get maximum input length"""
|
223
|
+
return 1024 # Jina Reranker v2 max length
|
224
|
+
|
225
|
+
# ==================== Service management methods ====================
|
226
|
+
|
227
|
+
async def health_check(self) -> Dict[str, Any]:
|
228
|
+
"""Check ISA reranking service health"""
|
229
|
+
try:
|
230
|
+
# Simple health check: call reranking service
|
231
|
+
test_result = await self.rerank_documents(
|
232
|
+
query="test",
|
233
|
+
documents=["test document"],
|
234
|
+
top_k=1,
|
235
|
+
return_documents=False
|
236
|
+
)
|
237
|
+
|
238
|
+
return {
|
239
|
+
'success': True,
|
240
|
+
'provider': 'ISA',
|
241
|
+
'service': 'isa-embed-rerank',
|
242
|
+
'status': 'healthy' if test_result.get('success') else 'error',
|
243
|
+
'rerank_service': test_result.get('success', False),
|
244
|
+
'usage_stats': {
|
245
|
+
'total_requests': self.request_count,
|
246
|
+
'total_cost_usd': round(self.total_cost, 6)
|
247
|
+
}
|
248
|
+
}
|
249
|
+
|
250
|
+
except Exception as e:
|
251
|
+
return {
|
252
|
+
'success': False,
|
253
|
+
'provider': 'ISA',
|
254
|
+
'service': 'isa-embed-rerank',
|
255
|
+
'status': 'error',
|
256
|
+
'error': str(e)
|
257
|
+
}
|
258
|
+
|
259
|
+
async def get_usage_stats(self) -> Dict[str, Any]:
|
260
|
+
"""Get usage statistics"""
|
261
|
+
try:
|
262
|
+
modal_stats = {}
|
263
|
+
|
264
|
+
# Try to get Modal service statistics
|
265
|
+
if self.modal_app:
|
266
|
+
try:
|
267
|
+
# Can extend to get Modal service stats
|
268
|
+
pass
|
269
|
+
except Exception as e:
|
270
|
+
logger.warning(f"Failed to get Modal stats: {e}")
|
271
|
+
|
272
|
+
return {
|
273
|
+
'provider': 'ISA',
|
274
|
+
'service': 'isa-embed-rerank',
|
275
|
+
'client_stats': {
|
276
|
+
'total_requests': self.request_count,
|
277
|
+
'total_cost_usd': round(self.total_cost, 6)
|
278
|
+
},
|
279
|
+
'modal_stats': modal_stats,
|
280
|
+
'combined_cost': round(self.total_cost, 6)
|
281
|
+
}
|
282
|
+
|
283
|
+
except Exception as e:
|
284
|
+
return {
|
285
|
+
'provider': 'ISA',
|
286
|
+
'service': 'isa-embed-rerank',
|
287
|
+
'error': str(e)
|
288
|
+
}
|
289
|
+
|
290
|
+
def get_supported_tasks(self) -> List[str]:
|
291
|
+
"""Get supported task list"""
|
292
|
+
return [
|
293
|
+
'rerank', # Document reranking
|
294
|
+
'rerank_documents', # Document reranking (alias)
|
295
|
+
'document_ranking' # Document ranking (alias)
|
296
|
+
]
|
297
|
+
|
298
|
+
def get_supported_formats(self) -> List[str]:
|
299
|
+
"""Get supported formats"""
|
300
|
+
return ['text'] # Text only
|
301
|
+
|
302
|
+
async def close(self):
|
303
|
+
"""Cleanup resources"""
|
304
|
+
# Modal client doesn't need explicit closure
|
305
|
+
pass
|
@@ -129,11 +129,9 @@ class OpenAIEmbedService(BaseEmbedService):
|
|
129
129
|
logger.error(f"Error creating text embeddings: {e}")
|
130
130
|
raise
|
131
131
|
|
132
|
-
async def create_chunks(self, text: str, metadata: Optional[Dict] = None) -> List[Dict]:
|
132
|
+
async def create_chunks(self, text: str, metadata: Optional[Dict] = None, chunk_size: int = 400, overlap: int = 50, **kwargs) -> List[Dict]:
|
133
133
|
"""Create text chunks with embeddings"""
|
134
|
-
#
|
135
|
-
chunk_size = 400 # words
|
136
|
-
overlap = 50 # word overlap between chunks
|
134
|
+
# Use provided chunk_size and overlap, or defaults optimized for OpenAI models
|
137
135
|
|
138
136
|
words = text.split()
|
139
137
|
if not words:
|