isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/client.py +732 -565
  3. isa_model/core/cache/redis_cache.py +401 -0
  4. isa_model/core/config/config_manager.py +53 -10
  5. isa_model/core/config.py +1 -1
  6. isa_model/core/database/__init__.py +1 -0
  7. isa_model/core/database/migrations.py +277 -0
  8. isa_model/core/database/supabase_client.py +123 -0
  9. isa_model/core/models/__init__.py +37 -0
  10. isa_model/core/models/model_billing_tracker.py +60 -88
  11. isa_model/core/models/model_manager.py +36 -18
  12. isa_model/core/models/model_repo.py +44 -38
  13. isa_model/core/models/model_statistics_tracker.py +234 -0
  14. isa_model/core/models/model_storage.py +0 -1
  15. isa_model/core/models/model_version_manager.py +959 -0
  16. isa_model/core/pricing_manager.py +2 -249
  17. isa_model/core/resilience/circuit_breaker.py +366 -0
  18. isa_model/core/security/secrets.py +358 -0
  19. isa_model/core/services/__init__.py +2 -4
  20. isa_model/core/services/intelligent_model_selector.py +101 -370
  21. isa_model/core/storage/hf_storage.py +1 -1
  22. isa_model/core/types.py +7 -0
  23. isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
  24. isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
  25. isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
  26. isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
  27. isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
  28. isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
  29. isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
  30. isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
  31. isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
  32. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
  33. isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
  34. isa_model/deployment/core/deployment_manager.py +6 -4
  35. isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
  36. isa_model/eval/benchmarks/__init__.py +27 -0
  37. isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
  38. isa_model/eval/benchmarks.py +244 -12
  39. isa_model/eval/evaluators/__init__.py +8 -2
  40. isa_model/eval/evaluators/audio_evaluator.py +727 -0
  41. isa_model/eval/evaluators/embedding_evaluator.py +742 -0
  42. isa_model/eval/evaluators/vision_evaluator.py +564 -0
  43. isa_model/eval/example_evaluation.py +395 -0
  44. isa_model/eval/factory.py +272 -5
  45. isa_model/eval/isa_benchmarks.py +700 -0
  46. isa_model/eval/isa_integration.py +582 -0
  47. isa_model/eval/metrics.py +159 -6
  48. isa_model/eval/tests/unit/test_basic.py +396 -0
  49. isa_model/inference/ai_factory.py +44 -8
  50. isa_model/inference/services/audio/__init__.py +21 -0
  51. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  52. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  53. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  54. isa_model/inference/services/audio/openai_stt_service.py +32 -6
  55. isa_model/inference/services/base_service.py +17 -1
  56. isa_model/inference/services/embedding/__init__.py +13 -0
  57. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  58. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  59. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  60. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  61. isa_model/inference/services/img/__init__.py +2 -2
  62. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  63. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  64. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  65. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  66. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  67. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  68. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  69. isa_model/inference/services/llm/base_llm_service.py +30 -6
  70. isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
  71. isa_model/inference/services/llm/ollama_llm_service.py +2 -1
  72. isa_model/inference/services/llm/openai_llm_service.py +652 -55
  73. isa_model/inference/services/llm/yyds_llm_service.py +2 -1
  74. isa_model/inference/services/vision/__init__.py +5 -5
  75. isa_model/inference/services/vision/base_vision_service.py +118 -185
  76. isa_model/inference/services/vision/helpers/image_utils.py +11 -5
  77. isa_model/inference/services/vision/isa_vision_service.py +573 -0
  78. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  79. isa_model/serving/api/fastapi_server.py +88 -16
  80. isa_model/serving/api/middleware/auth.py +311 -0
  81. isa_model/serving/api/middleware/security.py +278 -0
  82. isa_model/serving/api/routes/analytics.py +486 -0
  83. isa_model/serving/api/routes/deployments.py +339 -0
  84. isa_model/serving/api/routes/evaluations.py +579 -0
  85. isa_model/serving/api/routes/logs.py +430 -0
  86. isa_model/serving/api/routes/settings.py +582 -0
  87. isa_model/serving/api/routes/unified.py +324 -165
  88. isa_model/serving/api/startup.py +304 -0
  89. isa_model/serving/modal_proxy_server.py +249 -0
  90. isa_model/training/__init__.py +100 -6
  91. isa_model/training/core/__init__.py +4 -1
  92. isa_model/training/examples/intelligent_training_example.py +281 -0
  93. isa_model/training/intelligent/__init__.py +25 -0
  94. isa_model/training/intelligent/decision_engine.py +643 -0
  95. isa_model/training/intelligent/intelligent_factory.py +888 -0
  96. isa_model/training/intelligent/knowledge_base.py +751 -0
  97. isa_model/training/intelligent/resource_optimizer.py +839 -0
  98. isa_model/training/intelligent/task_classifier.py +576 -0
  99. isa_model/training/storage/__init__.py +24 -0
  100. isa_model/training/storage/core_integration.py +439 -0
  101. isa_model/training/storage/training_repository.py +552 -0
  102. isa_model/training/storage/training_storage.py +628 -0
  103. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
  104. isa_model-0.4.0.dist-info/RECORD +182 -0
  105. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  106. isa_model/deployment/cloud/modal/register_models.py +0 -321
  107. isa_model/inference/adapter/unified_api.py +0 -248
  108. isa_model/inference/services/helpers/stacked_config.py +0 -148
  109. isa_model/inference/services/img/flux_professional_service.py +0 -603
  110. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  111. isa_model/inference/services/others/table_transformer_service.py +0 -61
  112. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  113. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  114. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  115. isa_model/scripts/inference_tracker.py +0 -283
  116. isa_model/scripts/mlflow_manager.py +0 -379
  117. isa_model/scripts/model_registry.py +0 -465
  118. isa_model/scripts/register_models.py +0 -370
  119. isa_model/scripts/register_models_with_embeddings.py +0 -510
  120. isa_model/scripts/start_mlflow.py +0 -95
  121. isa_model/scripts/training_tracker.py +0 -257
  122. isa_model-0.3.9.dist-info/RECORD +0 -138
  123. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
  124. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import os
2
3
  import aiohttp
3
4
  from typing import Dict, Any, List, Union, Optional, BinaryIO
4
5
  from openai import AsyncOpenAI
@@ -72,13 +73,38 @@ class OpenAISTTService(BaseSTTService):
72
73
  if prompt:
73
74
  transcription_params["prompt"] = prompt
74
75
 
75
- # Handle file input
76
+ # Handle file input - support base64 strings, file paths, and file objects
76
77
  if isinstance(audio_file, str):
77
- with open(audio_file, "rb") as f:
78
- transcription = await self.client.audio.transcriptions.create(
79
- file=f,
80
- **transcription_params
81
- )
78
+ # Check if it's a base64 string or file path
79
+ if len(audio_file) > 100 and not os.path.exists(audio_file):
80
+ # Likely a base64 string
81
+ try:
82
+ import base64
83
+ from io import BytesIO
84
+ logger.info(f"Attempting to decode base64 audio data (length: {len(audio_file)})")
85
+ audio_data = base64.b64decode(audio_file)
86
+ audio_buffer = BytesIO(audio_data)
87
+ audio_buffer.name = "audio.wav" # OpenAI needs a filename hint
88
+ logger.info(f"Successfully decoded base64 to {len(audio_data)} bytes")
89
+ transcription = await self.client.audio.transcriptions.create(
90
+ file=audio_buffer,
91
+ **transcription_params
92
+ )
93
+ except Exception as e:
94
+ # If base64 decoding fails, treat as file path
95
+ logger.error(f"Base64 decoding failed: {e}, treating as file path")
96
+ with open(audio_file, "rb") as f:
97
+ transcription = await self.client.audio.transcriptions.create(
98
+ file=f,
99
+ **transcription_params
100
+ )
101
+ else:
102
+ # Regular file path
103
+ with open(audio_file, "rb") as f:
104
+ transcription = await self.client.audio.transcriptions.create(
105
+ file=f,
106
+ **transcription_params
107
+ )
82
108
  else:
83
109
  transcription = await self.client.audio.transcriptions.create(
84
110
  file=audio_file,
@@ -66,7 +66,8 @@ class BaseService(ABC):
66
66
  output_tokens=output_tokens
67
67
  )
68
68
 
69
- # Track usage through model manager
69
+ # Track usage through both systems (legacy and new)
70
+ # Legacy detailed tracking (will be phased out)
70
71
  self.model_manager.billing_tracker.track_model_usage(
71
72
  model_id=self.model_name,
72
73
  operation_type="inference",
@@ -80,6 +81,21 @@ class BaseService(ABC):
80
81
  cost_usd=cost_usd,
81
82
  metadata=metadata
82
83
  )
84
+
85
+ # New aggregated statistics tracking
86
+ self.model_manager.statistics_tracker.track_usage(
87
+ model_id=self.model_name,
88
+ provider=self.provider_name,
89
+ service_type=service_type if isinstance(service_type, str) else service_type.value,
90
+ operation_type="inference",
91
+ operation=operation,
92
+ input_tokens=input_tokens,
93
+ output_tokens=output_tokens,
94
+ input_units=input_units,
95
+ output_units=output_units,
96
+ cost_usd=cost_usd or 0.0,
97
+ metadata=metadata
98
+ )
83
99
  except Exception as e:
84
100
  # Don't let billing tracking break the service
85
101
  import logging
@@ -0,0 +1,13 @@
1
+ """
2
+ Embedding Services - Text and Document Embedding Services
3
+ """
4
+
5
+ from .base_embed_service import BaseEmbedService
6
+ from .openai_embed_service import OpenAIEmbedService
7
+ from .ollama_embed_service import OllamaEmbedService
8
+
9
+ __all__ = [
10
+ 'BaseEmbedService',
11
+ 'OpenAIEmbedService',
12
+ 'OllamaEmbedService'
13
+ ]
@@ -36,17 +36,29 @@ class BaseEmbedService(BaseService):
36
36
  if not isinstance(input_data, list):
37
37
  input_data = [input_data]
38
38
  return await self.create_text_embeddings(input_data)
39
- elif task == "chunk_and_embed":
39
+ elif task in ["chunk", "chunk_and_embed"]:
40
40
  if isinstance(input_data, list):
41
- raise ValueError("chunk_and_embed task requires single text input")
42
- return await self.create_chunks(input_data, kwargs.get("metadata"))
41
+ raise ValueError("chunk task requires single text input")
42
+ return await self.create_chunks(input_data, **kwargs)
43
43
  elif task == "similarity":
44
+ # Support both text-based and embedding-based similarity
45
+ candidates = kwargs.get("candidates")
44
46
  embedding1 = kwargs.get("embedding1")
45
47
  embedding2 = kwargs.get("embedding2")
46
- if not embedding1 or not embedding2:
47
- raise ValueError("similarity task requires embedding1 and embedding2 parameters")
48
- similarity = await self.compute_similarity(embedding1, embedding2)
49
- return {"similarity": similarity}
48
+
49
+ if candidates:
50
+ # Text-based similarity - compute embeddings first
51
+ if isinstance(input_data, list):
52
+ raise ValueError("similarity task with candidates requires single query text")
53
+ # Remove candidates from kwargs to avoid duplicate parameter
54
+ similarity_kwargs = {k: v for k, v in kwargs.items() if k != 'candidates'}
55
+ return await self._text_similarity_search(input_data, candidates, **similarity_kwargs)
56
+ elif embedding1 and embedding2:
57
+ # Direct embedding similarity
58
+ similarity = await self.compute_similarity(embedding1, embedding2)
59
+ return {"similarity": similarity}
60
+ else:
61
+ raise ValueError("similarity task requires either 'candidates' parameter or both 'embedding1' and 'embedding2' parameters")
50
62
  elif task == "find_similar":
51
63
  query_embedding = kwargs.get("query_embedding")
52
64
  candidate_embeddings = kwargs.get("candidate_embeddings")
@@ -57,6 +69,21 @@ class BaseEmbedService(BaseService):
57
69
  candidate_embeddings,
58
70
  kwargs.get("top_k", 5)
59
71
  )
72
+
73
+ # ==================== 重排序类任务 ====================
74
+ elif task in ["rerank", "rerank_documents", "document_ranking"]:
75
+ query = kwargs.get("query") or input_data
76
+ documents = kwargs.get("documents")
77
+ if not documents:
78
+ raise ValueError("rerank task requires documents parameter")
79
+ if isinstance(query, list):
80
+ raise ValueError("rerank task requires single query string")
81
+ return await self.rerank_documents(
82
+ query=query,
83
+ documents=documents,
84
+ top_k=kwargs.get("top_k"),
85
+ return_documents=kwargs.get("return_documents", True)
86
+ )
60
87
  else:
61
88
  raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
62
89
 
@@ -67,7 +94,51 @@ class BaseEmbedService(BaseService):
67
94
  Returns:
68
95
  List of supported task names
69
96
  """
70
- return ["embed", "embed_batch", "chunk_and_embed", "similarity", "find_similar"]
97
+ return ["embed", "embed_batch", "chunk", "chunk_and_embed", "similarity", "find_similar", "rerank", "rerank_documents", "document_ranking"]
98
+
99
+ async def _text_similarity_search(self, query_text: str, candidates: List[str], **kwargs) -> Dict[str, Any]:
100
+ """
101
+ Helper method for text-based similarity search
102
+
103
+ Args:
104
+ query_text: Query text
105
+ candidates: List of candidate texts
106
+ **kwargs: Additional parameters (top_k, threshold, etc.)
107
+
108
+ Returns:
109
+ Dictionary containing similar documents with scores
110
+ """
111
+ # Get embeddings for query and candidates
112
+ query_embedding = await self.create_text_embedding(query_text)
113
+ candidate_embeddings = await self.create_text_embeddings(candidates)
114
+
115
+ # Find similar texts
116
+ similar_results = await self.find_similar_texts(
117
+ query_embedding,
118
+ candidate_embeddings,
119
+ kwargs.get("top_k", len(candidates))
120
+ )
121
+
122
+ # Apply threshold if specified
123
+ threshold = kwargs.get("threshold")
124
+ if threshold is not None:
125
+ similar_results = [r for r in similar_results if r["similarity"] >= threshold]
126
+
127
+ # Convert to expected format with text content
128
+ similar_documents = []
129
+ for result in similar_results:
130
+ similar_documents.append({
131
+ "text": candidates[result["index"]],
132
+ "similarity": result["similarity"],
133
+ "index": result["index"]
134
+ })
135
+
136
+ return {
137
+ "similar_documents": similar_documents,
138
+ "query": query_text,
139
+ "total_candidates": len(candidates),
140
+ "returned_count": len(similar_documents)
141
+ }
71
142
 
72
143
  @abstractmethod
73
144
  async def create_text_embedding(self, text: str) -> List[float]:
@@ -170,6 +241,38 @@ class BaseEmbedService(BaseService):
170
241
  """
171
242
  pass
172
243
 
244
+ async def rerank_documents(
245
+ self,
246
+ query: str,
247
+ documents: List[str],
248
+ top_k: Optional[int] = None,
249
+ return_documents: bool = True
250
+ ) -> Dict[str, Any]:
251
+ """
252
+ Rerank documents based on relevance to query
253
+
254
+ Default implementation returns NotImplementedError.
255
+ Override in subclasses that support reranking.
256
+
257
+ Args:
258
+ query: Search query string
259
+ documents: List of documents to rerank
260
+ top_k: Number of top results to return (None = all)
261
+ return_documents: Whether to include document text in results
262
+
263
+ Returns:
264
+ Dictionary containing:
265
+ - success: Boolean success status
266
+ - results: List of ranked documents with scores
267
+ - metadata: Additional information (model, timing, etc.)
268
+ """
269
+ return {
270
+ 'success': False,
271
+ 'error': f'Reranking not supported by {self.__class__.__name__}',
272
+ 'provider': getattr(self, 'provider_name', 'unknown'),
273
+ 'service': getattr(self, 'model_name', 'unknown')
274
+ }
275
+
173
276
  @abstractmethod
174
277
  async def close(self):
175
278
  """Cleanup resources"""
@@ -0,0 +1,305 @@
1
+ """
2
+ ISA Embedding Service
3
+
4
+ ISA reranking service using deployed Jina Reranker v2 via Modal
5
+ """
6
+
7
+ import logging
8
+ from typing import Dict, Any, List, Optional
9
+
10
+ try:
11
+ import modal
12
+ MODAL_AVAILABLE = True
13
+ except ImportError:
14
+ MODAL_AVAILABLE = False
15
+ modal = None
16
+
17
+ from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ class ISAEmbedService(BaseEmbedService):
22
+ """
23
+ ISA Embedding Service - calls ISA deployed reranking models
24
+
25
+ Supported features:
26
+ - Document reranking (Jina Reranker v2 via Modal)
27
+ - Future: embedding generation
28
+ - Future: semantic similarity computation
29
+ """
30
+
31
+ def __init__(self,
32
+ rerank_modal_app_name: str = "isa-embed-rerank",
33
+ timeout: int = 30):
34
+ """
35
+ Initialize ISA Embedding service
36
+
37
+ Args:
38
+ rerank_modal_app_name: Modal reranking app name
39
+ timeout: Request timeout in seconds
40
+ """
41
+ # For now, skip BaseService initialization to avoid config validation
42
+ # TODO: Properly configure ISA provider in config system
43
+ self.provider_name = "isa"
44
+ self.model_name = "isa-jina-reranker-v2-service"
45
+ self.rerank_modal_app_name = rerank_modal_app_name
46
+ self.timeout = timeout
47
+
48
+ # Initialize Modal client
49
+ if MODAL_AVAILABLE:
50
+ try:
51
+ # Get deployed Modal application
52
+ self.modal_app = modal.App.lookup(rerank_modal_app_name)
53
+ logger.info(f"Connected to Modal rerank app: {rerank_modal_app_name}")
54
+
55
+ self.modal_service = True # Mark service as available
56
+ logger.info("Modal rerank app connection established")
57
+
58
+ except Exception as e:
59
+ logger.warning(f"Failed to connect to Modal rerank app: {e}")
60
+ self.modal_app = None
61
+ self.modal_service = None
62
+ else:
63
+ logger.warning("Modal SDK not available")
64
+ self.modal_app = None
65
+ self.modal_service = None
66
+
67
+ # Service statistics
68
+ self.request_count = 0
69
+ self.total_cost = 0.0
70
+
71
+ async def rerank_documents(
72
+ self,
73
+ query: str,
74
+ documents: List[str],
75
+ top_k: Optional[int] = None,
76
+ return_documents: bool = True
77
+ ) -> Dict[str, Any]:
78
+ """
79
+ Rerank documents using Jina Reranker v2
80
+
81
+ Args:
82
+ query: Query string
83
+ documents: List of documents to rerank
84
+ top_k: Return top k results (None = all)
85
+ return_documents: Whether to include document content in results
86
+
87
+ Returns:
88
+ Reranking results
89
+ """
90
+ try:
91
+ if not self.modal_app or not self.modal_service:
92
+ return {
93
+ 'success': False,
94
+ 'provider': 'ISA',
95
+ 'service': 'isa-embed-rerank',
96
+ 'error': 'Modal rerank app or service not available'
97
+ }
98
+
99
+ # Call reranking service directly via Modal SDK
100
+ result = await self._call_rerank_service(query, documents, top_k, return_documents)
101
+
102
+ if result and result.get('success', False):
103
+ self.request_count += 1
104
+
105
+ # Record cost
106
+ if 'billing' in result:
107
+ cost = result['billing'].get('estimated_cost_usd', 0)
108
+ self.total_cost += cost
109
+
110
+ # Format response to match expected structure
111
+ formatted_result = {
112
+ 'success': True,
113
+ 'provider': 'ISA',
114
+ 'service': 'isa-embed-rerank',
115
+ 'result': {
116
+ 'results': result.get('results', []),
117
+ 'processing_time': result.get('processing_time'),
118
+ 'billing': result.get('billing', {}),
119
+ 'query': result.get('query'),
120
+ 'num_documents': result.get('num_documents'),
121
+ 'returned_count': result.get('returned_count')
122
+ },
123
+ 'metadata': {
124
+ 'model_used': result.get('model'),
125
+ 'provider': result.get('provider', 'ISA'),
126
+ 'billing': result.get('billing', {})
127
+ }
128
+ }
129
+ return formatted_result
130
+ else:
131
+ return {
132
+ 'success': False,
133
+ 'provider': 'ISA',
134
+ 'service': 'isa-embed-rerank',
135
+ 'error': f'Rerank service returned error: {result.get("error", "Unknown error") if result else "No response"}',
136
+ 'details': result
137
+ }
138
+
139
+ except Exception as e:
140
+ logger.error(f"ISA document reranking failed: {e}")
141
+ import traceback
142
+ traceback.print_exc()
143
+ return {
144
+ 'success': False,
145
+ 'provider': 'ISA',
146
+ 'service': 'isa-embed-rerank',
147
+ 'error': str(e)
148
+ }
149
+
150
+ async def _call_rerank_service(
151
+ self,
152
+ query: str,
153
+ documents: List[str],
154
+ top_k: Optional[int],
155
+ return_documents: bool
156
+ ) -> Dict[str, Any]:
157
+ """
158
+ Call reranking service via Modal SDK
159
+ """
160
+ try:
161
+ import modal
162
+
163
+ logger.info("Calling Jina Reranker v2 service via Modal SDK...")
164
+
165
+ # Correct Modal SDK usage: call deployed class method
166
+ ISAEmbedRerankService = modal.Cls.from_name(
167
+ app_name=self.rerank_modal_app_name,
168
+ name="ISAEmbedRerankService"
169
+ )
170
+
171
+ # Create instance and call method
172
+ instance = ISAEmbedRerankService()
173
+ result = instance.rerank_documents.remote(
174
+ query=query,
175
+ documents=documents,
176
+ top_k=top_k,
177
+ return_documents=return_documents
178
+ )
179
+
180
+ logger.info("Modal rerank SDK call successful")
181
+ return result
182
+
183
+ except Exception as e:
184
+ logger.error(f"Modal rerank SDK call failed: {e}")
185
+ return {
186
+ 'success': False,
187
+ 'error': f'Modal rerank SDK error: {str(e)}'
188
+ }
189
+
190
+ # ==================== Embedding methods (future implementation) ====================
191
+
192
+ async def create_text_embedding(self, text: str) -> List[float]:
193
+ """Create single text embedding - not yet implemented"""
194
+ raise NotImplementedError("Text embedding not yet implemented in ISA service")
195
+
196
+ async def create_text_embeddings(self, texts: List[str]) -> List[List[float]]:
197
+ """Create multiple text embeddings - not yet implemented"""
198
+ raise NotImplementedError("Text embeddings not yet implemented in ISA service")
199
+
200
+ async def create_chunks(self, text: str, metadata: Optional[Dict] = None) -> List[Dict]:
201
+ """Create text chunks with embeddings - not yet implemented"""
202
+ raise NotImplementedError("Text chunking not yet implemented in ISA service")
203
+
204
+ async def compute_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
205
+ """Compute embedding similarity - not yet implemented"""
206
+ raise NotImplementedError("Similarity computation not yet implemented in ISA service")
207
+
208
+ async def find_similar_texts(
209
+ self,
210
+ query_embedding: List[float],
211
+ candidate_embeddings: List[List[float]],
212
+ top_k: int = 5
213
+ ) -> List[Dict[str, Any]]:
214
+ """Find similar texts - not yet implemented"""
215
+ raise NotImplementedError("Similar text search not yet implemented in ISA service")
216
+
217
+ def get_embedding_dimension(self) -> int:
218
+ """Get embedding dimension - not applicable for rerank-only service"""
219
+ raise NotImplementedError("Embedding dimension not available for rerank-only service")
220
+
221
+ def get_max_input_length(self) -> int:
222
+ """Get maximum input length"""
223
+ return 1024 # Jina Reranker v2 max length
224
+
225
+ # ==================== Service management methods ====================
226
+
227
+ async def health_check(self) -> Dict[str, Any]:
228
+ """Check ISA reranking service health"""
229
+ try:
230
+ # Simple health check: call reranking service
231
+ test_result = await self.rerank_documents(
232
+ query="test",
233
+ documents=["test document"],
234
+ top_k=1,
235
+ return_documents=False
236
+ )
237
+
238
+ return {
239
+ 'success': True,
240
+ 'provider': 'ISA',
241
+ 'service': 'isa-embed-rerank',
242
+ 'status': 'healthy' if test_result.get('success') else 'error',
243
+ 'rerank_service': test_result.get('success', False),
244
+ 'usage_stats': {
245
+ 'total_requests': self.request_count,
246
+ 'total_cost_usd': round(self.total_cost, 6)
247
+ }
248
+ }
249
+
250
+ except Exception as e:
251
+ return {
252
+ 'success': False,
253
+ 'provider': 'ISA',
254
+ 'service': 'isa-embed-rerank',
255
+ 'status': 'error',
256
+ 'error': str(e)
257
+ }
258
+
259
+ async def get_usage_stats(self) -> Dict[str, Any]:
260
+ """Get usage statistics"""
261
+ try:
262
+ modal_stats = {}
263
+
264
+ # Try to get Modal service statistics
265
+ if self.modal_app:
266
+ try:
267
+ # Can extend to get Modal service stats
268
+ pass
269
+ except Exception as e:
270
+ logger.warning(f"Failed to get Modal stats: {e}")
271
+
272
+ return {
273
+ 'provider': 'ISA',
274
+ 'service': 'isa-embed-rerank',
275
+ 'client_stats': {
276
+ 'total_requests': self.request_count,
277
+ 'total_cost_usd': round(self.total_cost, 6)
278
+ },
279
+ 'modal_stats': modal_stats,
280
+ 'combined_cost': round(self.total_cost, 6)
281
+ }
282
+
283
+ except Exception as e:
284
+ return {
285
+ 'provider': 'ISA',
286
+ 'service': 'isa-embed-rerank',
287
+ 'error': str(e)
288
+ }
289
+
290
+ def get_supported_tasks(self) -> List[str]:
291
+ """Get supported task list"""
292
+ return [
293
+ 'rerank', # Document reranking
294
+ 'rerank_documents', # Document reranking (alias)
295
+ 'document_ranking' # Document ranking (alias)
296
+ ]
297
+
298
+ def get_supported_formats(self) -> List[str]:
299
+ """Get supported formats"""
300
+ return ['text'] # Text only
301
+
302
+ async def close(self):
303
+ """Cleanup resources"""
304
+ # Modal client doesn't need explicit closure
305
+ pass
@@ -129,11 +129,9 @@ class OpenAIEmbedService(BaseEmbedService):
129
129
  logger.error(f"Error creating text embeddings: {e}")
130
130
  raise
131
131
 
132
- async def create_chunks(self, text: str, metadata: Optional[Dict] = None) -> List[Dict]:
132
+ async def create_chunks(self, text: str, metadata: Optional[Dict] = None, chunk_size: int = 400, overlap: int = 50, **kwargs) -> List[Dict]:
133
133
  """Create text chunks with embeddings"""
134
- # Chunk size optimized for OpenAI models (roughly 512 tokens)
135
- chunk_size = 400 # words
136
- overlap = 50 # word overlap between chunks
134
+ # Use provided chunk_size and overlap, or defaults optimized for OpenAI models
137
135
 
138
136
  words = text.split()
139
137
  if not words: