PyPI - isa-model - Versions diffs - 0.3.91__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

isa-model 0.3.91py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

isa_model/client.py +732 -573
isa_model/core/cache/redis_cache.py +401 -0
isa_model/core/config/config_manager.py +53 -10
isa_model/core/config.py +1 -1
isa_model/core/database/__init__.py +1 -0
isa_model/core/database/migrations.py +277 -0
isa_model/core/database/supabase_client.py +123 -0
isa_model/core/models/__init__.py +37 -0
isa_model/core/models/model_billing_tracker.py +60 -88
isa_model/core/models/model_manager.py +36 -18
isa_model/core/models/model_repo.py +44 -38
isa_model/core/models/model_statistics_tracker.py +234 -0
isa_model/core/models/model_storage.py +0 -1
isa_model/core/models/model_version_manager.py +959 -0
isa_model/core/pricing_manager.py +2 -249
isa_model/core/resilience/circuit_breaker.py +366 -0
isa_model/core/security/secrets.py +358 -0
isa_model/core/services/__init__.py +2 -4
isa_model/core/services/intelligent_model_selector.py +101 -370
isa_model/core/storage/hf_storage.py +1 -1
isa_model/core/types.py +7 -0
isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
isa_model/deployment/core/deployment_manager.py +6 -4
isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
isa_model/eval/benchmarks/__init__.py +27 -0
isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
isa_model/eval/benchmarks.py +244 -12
isa_model/eval/evaluators/__init__.py +8 -2
isa_model/eval/evaluators/audio_evaluator.py +727 -0
isa_model/eval/evaluators/embedding_evaluator.py +742 -0
isa_model/eval/evaluators/vision_evaluator.py +564 -0
isa_model/eval/example_evaluation.py +395 -0
isa_model/eval/factory.py +272 -5
isa_model/eval/isa_benchmarks.py +700 -0
isa_model/eval/isa_integration.py +582 -0
isa_model/eval/metrics.py +159 -6
isa_model/eval/tests/unit/test_basic.py +396 -0
isa_model/inference/ai_factory.py +44 -8
isa_model/inference/services/audio/__init__.py +21 -0
isa_model/inference/services/audio/base_realtime_service.py +225 -0
isa_model/inference/services/audio/isa_tts_service.py +0 -0
isa_model/inference/services/audio/openai_realtime_service.py +320 -124
isa_model/inference/services/audio/openai_stt_service.py +32 -6
isa_model/inference/services/base_service.py +17 -1
isa_model/inference/services/embedding/__init__.py +13 -0
isa_model/inference/services/embedding/base_embed_service.py +111 -8
isa_model/inference/services/embedding/isa_embed_service.py +305 -0
isa_model/inference/services/embedding/openai_embed_service.py +2 -4
isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
isa_model/inference/services/img/__init__.py +2 -2
isa_model/inference/services/img/base_image_gen_service.py +24 -7
isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
isa_model/inference/services/img/services/replicate_flux.py +226 -0
isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
isa_model/inference/services/img/tests/test_img_client.py +297 -0
isa_model/inference/services/llm/base_llm_service.py +30 -6
isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
isa_model/inference/services/llm/ollama_llm_service.py +2 -1
isa_model/inference/services/llm/openai_llm_service.py +652 -55
isa_model/inference/services/llm/yyds_llm_service.py +2 -1
isa_model/inference/services/vision/__init__.py +5 -5
isa_model/inference/services/vision/base_vision_service.py +118 -185
isa_model/inference/services/vision/helpers/image_utils.py +11 -5
isa_model/inference/services/vision/isa_vision_service.py +573 -0
isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
isa_model/serving/api/fastapi_server.py +88 -16
isa_model/serving/api/middleware/auth.py +311 -0
isa_model/serving/api/middleware/security.py +278 -0
isa_model/serving/api/routes/analytics.py +486 -0
isa_model/serving/api/routes/deployments.py +339 -0
isa_model/serving/api/routes/evaluations.py +579 -0
isa_model/serving/api/routes/logs.py +430 -0
isa_model/serving/api/routes/settings.py +582 -0
isa_model/serving/api/routes/unified.py +324 -165
isa_model/serving/api/startup.py +304 -0
isa_model/serving/modal_proxy_server.py +249 -0
isa_model/training/__init__.py +100 -6
isa_model/training/core/__init__.py +4 -1
isa_model/training/examples/intelligent_training_example.py +281 -0
isa_model/training/intelligent/__init__.py +25 -0
isa_model/training/intelligent/decision_engine.py +643 -0
isa_model/training/intelligent/intelligent_factory.py +888 -0
isa_model/training/intelligent/knowledge_base.py +751 -0
isa_model/training/intelligent/resource_optimizer.py +839 -0
isa_model/training/intelligent/task_classifier.py +576 -0
isa_model/training/storage/__init__.py +24 -0
isa_model/training/storage/core_integration.py +439 -0
isa_model/training/storage/training_repository.py +552 -0
isa_model/training/storage/training_storage.py +628 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
isa_model-0.4.0.dist-info/RECORD +182 -0
isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
isa_model/deployment/cloud/modal/register_models.py +0 -321
isa_model/inference/adapter/unified_api.py +0 -248
isa_model/inference/services/helpers/stacked_config.py +0 -148
isa_model/inference/services/img/flux_professional_service.py +0 -603
isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/others/table_transformer_service.py +0 -61
isa_model/inference/services/vision/doc_analysis_service.py +0 -640
isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/vision/ui_analysis_service.py +0 -823
isa_model/scripts/inference_tracker.py +0 -283
isa_model/scripts/mlflow_manager.py +0 -379
isa_model/scripts/model_registry.py +0 -465
isa_model/scripts/register_models.py +0 -370
isa_model/scripts/register_models_with_embeddings.py +0 -510
isa_model/scripts/start_mlflow.py +0 -95
isa_model/scripts/training_tracker.py +0 -257
isa_model-0.3.91.dist-info/RECORD +0 -138
{isa_model-0.3.91.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0

isa_model/client.py CHANGED Viewed

@@ -2,15 +2,78 @@
 # -*- coding: utf-8 -*-
 """
-ISA Model Client - Unified interface for all AI services
-Provides intelligent model selection and simplified API
+ISA Model Client - Unified AI Service Interface
+===============================================
+功能描述：
+ISA Model平台的统一客户端接口，提供智能模型选择和简化的API调用
+主要功能：
+- 多模态AI服务统一接口：文本、视觉、音频、图像生成、嵌入向量
+- 智能模型自动选择：基于任务类型和输入数据自动选择最适合的模型
+- 流式响应支持：支持实时流式文本生成，提供更好的用户体验
+- 远程/本地服务：支持本地服务调用和远程API调用两种模式
+- 成本跟踪：自动计算和跟踪API调用成本
+- 工具支持：支持LangChain工具集成，扩展模型能力
+- 缓存机制：服务实例缓存，提高性能
+输入接口：
+- input_data: 多类型输入数据（文本、图像路径、音频文件、字节数据等）
+- task: 任务类型（chat, analyze, generate_speech, transcribe等）
+- service_type: 服务类型（text, vision, audio, image, embedding）
+- model: 可选模型名称（如不指定则智能选择）
+- provider: 可选提供商名称（openai, ollama, replicate等）
+输出格式：
+- 统一响应字典，包含result和metadata
+- 流式响应：包含stream异步生成器
+- 非流式响应：包含result结果数据
+- metadata：包含模型信息、计费信息、选择原因等
+核心依赖：
+- isa_model.inference.ai_factory: AI服务工厂
+- isa_model.core.services.intelligent_model_selector: 智能模型选择器
+- aiohttp: HTTP客户端（远程API模式）
+- asyncio: 异步编程支持
+使用示例：
+```python
+# 创建客户端
+client = ISAModelClient()
+# 流式文本生成
+result = await client.invoke("写一个故事", "chat", "text")
+async for token in result["stream"]:
+    print(token, end="", flush=True)
+# 图像分析
+result = await client.invoke("image.jpg", "analyze", "vision")
+print(result["result"])
+# 语音合成
+result = await client.invoke("Hello world", "generate_speech", "audio")
+print(result["result"])
+```
+架构特点：
+- 单例模式：确保配置一致性
+- 异步支持：所有操作都是异步的
+- 错误处理：统一的错误处理和响应格式
+- 可扩展性：支持新的服务提供商和模型
+优化建议：
+1. 增加请求重试机制：处理网络不稳定情况
+2. 添加请求限流：避免超出API限制
+3. 优化缓存策略：支持LRU缓存和TTL过期
+4. 增加监控指标：记录延迟、成功率等指标
+5. 支持批处理：提高大量请求的处理效率
+6. 添加配置验证：启动时验证API密钥和配置
 """
 import logging
 import asyncio
 from typing import Any, Dict, Optional, List, Union
 from pathlib import Path
-import aiohttp
 from isa_model.inference.ai_factory import AIFactory
@@ -36,41 +99,104 @@ class ISAModelClient:
         response = await client.invoke("audio.mp3", "transcribe", "audio")
     """
+    # Consolidated task mappings for all service types
+    TASK_MAPPINGS = {
+        "vision": {
+            # Core tasks (direct mapping)
+            "analyze": "analyze",
+            "describe": "describe",
+            "extract": "extract",
+            "detect": "detect",
+            "classify": "classify",
+            "compare": "compare",
+            # Common aliases (backward compatibility)
+            "analyze_image": "analyze",
+            "describe_image": "describe",
+            "extract_text": "extract",
+            "extract_table": "extract",
+            "detect_objects": "detect",
+            "detect_ui": "detect",
+            "detect_ui_elements": "detect",
+            "get_coordinates": "detect",
+            "ocr": "extract",
+            "ui_analysis": "analyze",
+            "navigation": "analyze"
+        },
+        "audio": {
+            "generate_speech": "synthesize",
+            "text_to_speech": "synthesize",
+            "tts": "synthesize",
+            "transcribe": "transcribe",
+            "speech_to_text": "transcribe",
+            "stt": "transcribe",
+            "translate": "translate",
+            "detect_language": "detect_language"
+        },
+        "text": {
+            "chat": "chat",
+            "generate": "generate",
+            "complete": "complete",
+            "translate": "translate",
+            "summarize": "summarize",
+            "analyze": "analyze",
+            "extract": "extract",
+            "classify": "classify"
+        },
+        "image": {
+            "generate_image": "generate",
+            "generate": "generate",
+            "img2img": "img2img",
+            "image_to_image": "img2img",
+            "generate_batch": "generate_batch"
+        },
+        "embedding": {
+            "create_embedding": "embed",
+            "embed": "embed",
+            "embed_batch": "embed_batch",
+            "chunk_and_embed": "chunk_and_embed",
+            "similarity": "similarity",
+            "find_similar": "find_similar",
+            "rerank": "rerank",
+            "rerank_documents": "rerank_documents",
+            "document_ranking": "document_ranking"
+        }
+    }
+    # Service type configuration
+    SUPPORTED_SERVICE_TYPES = {"vision", "audio", "text", "image", "embedding"}
     def __init__(self,
                  config: Optional[Dict[str, Any]] = None,
-                 mode: str = "local",
-                 api_url: Optional[str] = None,
+                 service_endpoint: Optional[str] = None,
                  api_key: Optional[str] = None):
         """Initialize ISA Model Client
         Args:
             config: Optional configuration override
-            mode: "local" for direct AI Factory, "api" for HTTP API calls
-            api_url: API base URL (required if mode="api")
-            api_key: API key for authentication (optional)
+            service_endpoint: Optional service endpoint URL (if None, uses local AI Factory)
+            api_key: Optional API key for authentication (can also be set via ISA_API_KEY env var)
         """
         self.config = config or {}
-        self.mode = mode
-        self.api_url = api_url.rstrip('/') if api_url else None
-        self.api_key = api_key
+        self.service_endpoint = service_endpoint
-        # Setup HTTP headers for API mode
-        if self.mode == "api":
-            if not self.api_url:
-                raise ValueError("api_url is required when mode='api'")
-            self.headers = {
-                "Content-Type": "application/json",
-                "User-Agent": "ISA-Model-Client/1.0.0"
-            }
-            if self.api_key:
-                self.headers["Authorization"] = f"Bearer {self.api_key}"
+        # Handle API key authentication
+        import os
+        self.api_key = api_key or os.getenv("ISA_API_KEY")
+        if self.api_key:
+            logger.info("API key provided for authentication")
+        else:
+            logger.debug("No API key provided - using anonymous access")
-        # Initialize AI Factory for local mode
-        if self.mode == "local":
+        # Initialize AI Factory for direct service access (when service_endpoint is None)
+        if not self.service_endpoint:
             self.ai_factory = AIFactory.get_instance()
         else:
             self.ai_factory = None
+            logger.info(f"Using remote service endpoint: {self.service_endpoint}")
+        # HTTP client for remote API calls
+        self._http_session = None
         # Initialize intelligent model selector
         self.model_selector = None
@@ -89,166 +215,352 @@ class ISAModelClient:
         logger.info("ISA Model Client initialized")
-    async def stream(
+    async def _get_http_session(self):
+        """Get or create HTTP session for remote API calls"""
+        if self._http_session is None:
+            import aiohttp
+            headers = {}
+            # Add API key authentication if available
+            if self.api_key:
+                headers["Authorization"] = f"Bearer {self.api_key}"
+                headers["X-API-Key"] = self.api_key
+            self._http_session = aiohttp.ClientSession(headers=headers)
+        return self._http_session
+    async def _make_api_request(self, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
+        """Make HTTP request to remote API endpoint"""
+        if not self.service_endpoint:
+            raise ValueError("Service endpoint not configured for remote API calls")
+        session = await self._get_http_session()
+        url = f"{self.service_endpoint.rstrip('/')}/{endpoint.lstrip('/')}"
+        try:
+            async with session.post(url, json=data) as response:
+                if response.status == 401:
+                    raise Exception("Authentication required or invalid API key")
+                elif response.status == 403:
+                    raise Exception("Insufficient permissions")
+                elif not response.ok:
+                    error_detail = await response.text()
+                    raise Exception(f"API request failed ({response.status}): {error_detail}")
+                return await response.json()
+        except Exception as e:
+            logger.error(f"Remote API request failed: {e}")
+            raise
+    async def close(self):
+        """Close HTTP session and cleanup resources"""
+        if self._http_session:
+            await self._http_session.close()
+            self._http_session = None
+    async def _invoke_remote_api(
         self,
-        input_data: Union[str, bytes, Path, Dict[str, Any]],
-        task: str,
+        input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
+        task: str,
         service_type: str,
-        model_hint: Optional[str] = None,
-        provider_hint: Optional[str] = None,
+        model: Optional[str] = None,
+        provider: Optional[str] = None,
+        stream: Optional[bool] = None,
         **kwargs
-    ):
-        """
-        Streaming invoke method that yields tokens in real-time
-        Args:
-            input_data: Input data (text for LLM streaming)
-            task: Task to perform
-            service_type: Type of service (only "text" supports streaming)
-            model_hint: Optional model preference
-            provider_hint: Optional provider preference
-            **kwargs: Additional parameters
-        Yields:
-            Individual tokens as they arrive from the model
-        Example:
-            async for token in client.stream("Hello world", "chat", "text"):
-                print(token, end="", flush=True)
-        """
-        if service_type != "text":
-            raise ValueError("Streaming is only supported for text/LLM services")
+    ) -> Dict[str, Any]:
+        """Invoke remote API endpoint"""
         try:
-            if self.mode == "api":
-                async for token in self._stream_api(input_data, task, service_type, model_hint, provider_hint, **kwargs):
-                    yield token
+            # Prepare request data for unified API
+            request_data = {
+                "task": task,
+                "service_type": service_type,
+                **kwargs
+            }
+            # Add model and provider if specified
+            if model:
+                request_data["model"] = model
+            if provider:
+                request_data["provider"] = provider
+            # For remote API, disable streaming to get JSON response
+            request_data["stream"] = False
+            # Handle different input data types
+            if isinstance(input_data, (str, Path)):
+                request_data["input_data"] = str(input_data)
+            elif isinstance(input_data, (dict, list)):
+                request_data["input_data"] = input_data
             else:
-                async for token in self._stream_local(input_data, task, service_type, model_hint, provider_hint, **kwargs):
-                    yield token
+                # For binary data, convert to base64
+                import base64
+                if isinstance(input_data, bytes):
+                    request_data["input_data"] = base64.b64encode(input_data).decode()
+                    request_data["data_type"] = "base64"
+                else:
+                    request_data["input_data"] = str(input_data)
+            # Make API request
+            response = await self._make_api_request("api/v1/invoke", request_data)
+            return response
         except Exception as e:
-            logger.error(f"Failed to stream {task} on {service_type}: {e}")
-            raise
+            logger.error(f"Remote API invocation failed: {e}")
+            return {
+                "success": False,
+                "error": str(e),
+                "metadata": {
+                    "task": task,
+                    "service_type": service_type,
+                    "endpoint": "remote"
+                }
+            }
     async def invoke(
         self,
-        input_data: Union[str, bytes, Path, Dict[str, Any]],
+        input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
         task: str,
         service_type: str,
-        model_hint: Optional[str] = None,
-        provider_hint: Optional[str] = None,
-        stream: bool = False,
-        tools: Optional[List[Any]] = None,
+        model: Optional[str] = None,
+        provider: Optional[str] = None,
+        stream: Optional[bool] = None,
+        show_reasoning: Optional[bool] = False,
         **kwargs
-    ) -> Union[Dict[str, Any], object]:
+    ) -> Dict[str, Any]:
         """
         Unified invoke method with intelligent model selection
         Args:
-            input_data: Input data (image path, text, audio, etc.)
-            task: Task to perform (analyze_image, generate_speech, transcribe, etc.)
-            service_type: Type of service (vision, audio, text, image, embedding)
-            model_hint: Optional model preference
-            provider_hint: Optional provider preference
-            stream: Enable streaming for text services (returns AsyncGenerator)
-            tools: Optional list of tools for function calling (only for text services)
-            **kwargs: Additional task-specific parameters
+            input_data: Input data (str, LangChain messages, image path, audio, etc.)
+            task: Task to perform (chat, analyze_image, generate_speech, transcribe, etc.)
+            service_type: Type of service (text, vision, audio, image, embedding)
+            model: Model name (if None, uses intelligent selection)
+            provider: Provider name (if None, uses intelligent selection)
+            stream: Enable streaming for text tasks (default True for chat/generate tasks, supports tools)
+            show_reasoning: Show reasoning process for O4 models (uses Responses API)
+            **kwargs: Additional task-specific parameters (including tools for LangChain)
         Returns:
-            If stream=False: Unified response dictionary with result and metadata
-            If stream=True: AsyncGenerator yielding tokens (only for text services)
+            Unified response dictionary with result and metadata
+            For streaming: result["stream"] contains async generator
+            For non-streaming: result["result"] contains the response
         Examples:
-            # Vision tasks
-            await client.invoke("image.jpg", "analyze_image", "vision")
-            await client.invoke("screenshot.png", "detect_ui_elements", "vision")
-            await client.invoke("document.pdf", "extract_table", "vision")
-            # Audio tasks
-            await client.invoke("Hello world", "generate_speech", "audio")
-            await client.invoke("audio.mp3", "transcribe", "audio")
-            # Text tasks
-            await client.invoke("Translate this text", "translate", "text")
-            await client.invoke("What is AI?", "chat", "text")
+            # Text tasks with streaming (default for chat)
+            result = await client.invoke("Write a story", "chat", "text")
+            if "stream" in result:
+                async for chunk in result["stream"]:
+                    print(chunk, end="", flush=True)
+            else:
+                print(result["result"])
-            # Streaming text
-            async for token in await client.invoke("Hello", "chat", "text", stream=True):
-                print(token, end="", flush=True)
+            # Text tasks with tools (also supports streaming)
+            result = await client.invoke("What's the weather?", "chat", "text", tools=[get_weather])
+            if "stream" in result:
+                async for chunk in result["stream"]:
+                    print(chunk, end="", flush=True)
+            else:
+                print(result["result"])
-            # Text with tools
-            await client.invoke("What's 5+3?", "chat", "text", tools=[calculator_function])
+            # Vision tasks (always non-streaming)
+            result = await client.invoke("image.jpg", "analyze", "vision")
+            print(result["result"])
-            # Streaming with tools
-            async for token in await client.invoke("What's 5+3?", "chat", "text", stream=True, tools=[calculator_function]):
-                print(token, end="")
+            # Audio tasks
+            result = await client.invoke("Hello world", "generate_speech", "audio")
+            print(result["result"])
             # Image generation
-            await client.invoke("A beautiful sunset", "generate_image", "image")
+            result = await client.invoke("A beautiful sunset", "generate_image", "image")
+            print(result["result"])
             # Embedding
-            await client.invoke("Text to embed", "create_embedding", "embedding")
+            result = await client.invoke("Text to embed", "create_embedding", "embedding")
+            print(result["result"])
         """
         try:
-            # Handle streaming case
-            if stream:
-                if service_type != "text":
-                    raise ValueError("Streaming is only supported for text services")
-                if self.mode == "api":
-                    return self._stream_api(
-                        input_data=input_data,
-                        task=task,
-                        service_type=service_type,
-                        model_hint=model_hint,
-                        provider_hint=provider_hint,
-                        tools=tools,
-                        **kwargs
-                    )
+            # If using remote service endpoint, make API call
+            if self.service_endpoint:
+                return await self._invoke_remote_api(
+                    input_data=input_data,
+                    task=task,
+                    service_type=service_type,
+                    model=model,
+                    provider=provider,
+                    stream=stream,
+                    **kwargs
+                )
+            # Set default streaming for text tasks
+            if stream is None and service_type == "text":
+                if task in ["chat", "generate"]:
+                    stream = True   # Enable streaming for chat and generate tasks
                 else:
-                    return self._stream_local(
-                        input_data=input_data,
-                        task=task,
-                        service_type=service_type,
-                        model_hint=model_hint,
-                        provider_hint=provider_hint,
-                        tools=tools,
-                        **kwargs
-                    )
+                    stream = False  # Disable for other text tasks
-            # Route to appropriate mode for non-streaming
-            if self.mode == "api":
-                return await self._invoke_api(
+            # If streaming is enabled for text tasks, return streaming response
+            if stream and service_type == "text":
+                return await self._invoke_service_streaming(
                     input_data=input_data,
                     task=task,
                     service_type=service_type,
-                    model_hint=model_hint,
-                    provider_hint=provider_hint,
-                    tools=tools,
+                    model_hint=model,
+                    provider_hint=provider,
+                    show_reasoning=show_reasoning,  # Explicitly pass show_reasoning
                     **kwargs
                 )
             else:
-                return await self._invoke_local(
+                # Use regular non-streaming service
+                return await self._invoke_service(
                     input_data=input_data,
                     task=task,
                     service_type=service_type,
-                    model_hint=model_hint,
-                    provider_hint=provider_hint,
-                    tools=tools,
+                    model_hint=model,
+                    provider_hint=provider,
+                    stream=False,  # Force non-streaming
                     **kwargs
                 )
         except Exception as e:
-            logger.error(f"Failed to invoke {task} on {service_type}: {e}")
-            return {
-                "success": False,
-                "error": str(e),
-                "metadata": {
-                    "task": task,
-                    "service_type": service_type,
-                    "input_type": type(input_data).__name__
-                }
-            }
+            return self._handle_error(e, {
+                "operation": "invoke",
+                "task": task,
+                "service_type": service_type,
+                "input_type": type(input_data).__name__
+            })
+    async def invoke_stream(
+        self,
+        input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
+        task: str,
+        service_type: str,
+        model: Optional[str] = None,
+        provider: Optional[str] = None,
+        return_metadata: bool = False,
+        **kwargs
+    ):
+        """
+        Unified streaming invoke method - returns async generator for real-time token streaming
+        Args:
+            input_data: Input data (str, LangChain messages, image path, audio, etc.)
+            task: Task to perform (chat, analyze_image, generate_speech, transcribe, etc.)
+            service_type: Type of service (text, vision, audio, image, embedding)
+            model: Model name (if None, uses intelligent selection)
+            provider: Provider name (if None, uses intelligent selection)
+            return_metadata: If True, yields ('metadata', metadata_dict) as final item
+            **kwargs: Additional task-specific parameters (including tools for LangChain)
+        Returns:
+            For text services: AsyncGenerator[Union[str, Tuple[str, Dict]], None] - yields tokens as they arrive
+            - Normal items: token strings
+            - Final item (if return_metadata=True): ('metadata', metadata_dict) with billing info
+            For other services: Raises ValueError (streaming not supported)
+        Examples:
+            # Simple streaming
+            async for token in client.invoke_stream("Hello!", "chat", "text"):
+                print(token, end='', flush=True)
+            # Streaming with metadata
+            async for item in client.invoke_stream("Hello!", "chat", "text", return_metadata=True):
+                if isinstance(item, tuple) and item[0] == 'metadata':
+                    print(f"\nBilling: {item[1]['billing']}")
+                else:
+                    print(item, end='', flush=True)
+        """
+        try:
+            # Only text services support streaming
+            if service_type != "text":
+                raise ValueError(f"Streaming not supported for service type: {service_type}")
+            # Tools are supported with streaming
+            # Step 1: Select best model for this task
+            selected_model = await self._select_model(
+                input_data=input_data,
+                task=task,
+                service_type=service_type,
+                model_hint=model,
+                provider_hint=provider
+            )
+            # Step 2: Get appropriate service
+            service = await self._get_service(
+                service_type=service_type,
+                model_name=selected_model["model_id"],
+                provider=selected_model["provider"],
+                task=task,
+                use_cache=False  # Don't cache for streaming to avoid state issues
+            )
+            # Step 3: Ensure service supports streaming
+            if not hasattr(service, 'astream'):
+                raise ValueError(f"Service {selected_model['provider']}/{selected_model['model_id']} does not support streaming")
+            # Step 4: Enable streaming on the service
+            if hasattr(service, 'streaming'):
+                service.streaming = True
+            # Step 5: Stream tokens and collect for billing
+            content_chunks = []
+            async for token in service.astream(input_data):
+                content_chunks.append(token)
+                yield token
+            # Step 6: After streaming is complete, calculate billing info and optionally return metadata
+            try:
+                await asyncio.sleep(0.01)  # Small delay to ensure billing tracking completes
+                # Get billing info (similar to _invoke_service)
+                billing_info = self._get_billing_info(service, selected_model["model_id"])
+                # Log billing info for tracking
+                logger.info(f"Streaming completed - Model: {selected_model['model_id']}, "
+                           f"Tokens: {billing_info.get('total_tokens', 'N/A')}, "
+                           f"Cost: ${billing_info.get('cost_usd', 0):.4f}")
+                # Return metadata if requested
+                if return_metadata:
+                    metadata = {
+                        "model_used": selected_model["model_id"],
+                        "provider": selected_model["provider"],
+                        "task": task,
+                        "service_type": service_type,
+                        "selection_reason": selected_model.get("reason", "Default selection"),
+                        "billing": billing_info,
+                        "streaming": True,
+                        "tokens_streamed": len(content_chunks),
+                        "content_length": len("".join(content_chunks))
+                    }
+                    yield ('metadata', metadata)
+            except Exception as billing_error:
+                logger.warning(f"Failed to track billing for streaming: {billing_error}")
+                if return_metadata:
+                    # Return fallback metadata even if billing fails
+                    fallback_metadata = {
+                        "model_used": selected_model["model_id"],
+                        "provider": selected_model["provider"],
+                        "task": task,
+                        "service_type": service_type,
+                        "selection_reason": selected_model.get("reason", "Default selection"),
+                        "billing": {
+                            "cost_usd": 0.0,
+                            "error": str(billing_error),
+                            "currency": "USD"
+                        },
+                        "streaming": True,
+                        "tokens_streamed": len(content_chunks),
+                        "content_length": len("".join(content_chunks))
+                    }
+                    yield ('metadata', fallback_metadata)
+        except Exception as e:
+            logger.error(f"Streaming invoke failed: {e}")
+            raise
     async def _select_model(
         self,
@@ -268,8 +580,26 @@ class ISAModelClient:
                 "reason": "User specified"
             }
+        # If model_hint provided but no provider_hint, handle special cases
+        if model_hint:
+            # Special handling for hybrid service
+            if model_hint == "hybrid":
+                return {
+                    "model_id": model_hint,
+                    "provider": "hybrid",
+                    "reason": "Hybrid service requested"
+                }
+            # If only model_hint provided, use default provider for that service type
+            elif provider_hint is None:
+                default_provider = self._get_default_provider(service_type)
+                return {
+                    "model_id": model_hint,
+                    "provider": default_provider,
+                    "reason": "Model specified with default provider"
+                }
         # Use intelligent model selector if available
-        if INTELLIGENT_SELECTOR_AVAILABLE:
+        if INTELLIGENT_SELECTOR_AVAILABLE and get_model_selector:
             try:
                 # Initialize model selector if not already done
                 if self.model_selector is None:
@@ -304,6 +634,17 @@ class ISAModelClient:
         # Fallback to default model selection
         return self._get_default_model(service_type, task, provider_hint)
+    def _get_default_provider(self, service_type: str) -> str:
+        """Get default provider for service type"""
+        defaults = {
+            "vision": "openai",
+            "audio": "openai",
+            "text": "openai",
+            "image": "replicate",
+            "embedding": "openai"
+        }
+        return defaults.get(service_type, "openai")
     def _get_default_model(
         self,
         service_type: str,
@@ -314,7 +655,7 @@ class ISAModelClient:
         defaults = {
             "vision": {
-                "model_id": "gpt-4o-mini",
+                "model_id": "gpt-4.1-nano",
                 "provider": "openai"
             },
             "audio": {
@@ -323,7 +664,7 @@ class ISAModelClient:
                 "default": {"model_id": "whisper-1", "provider": "openai"}
             },
             "text": {
-                "model_id": "gpt-4.1-mini",
+                "model_id": "gpt-4.1-nano",
                 "provider": "openai"
             },
             "image": {
@@ -331,8 +672,9 @@ class ISAModelClient:
                 "provider": "replicate"
             },
             "embedding": {
-                "model_id": "text-embedding-3-small",
-                "provider": "openai"
+                "embed": {"model_id": "text-embedding-3-small", "provider": "openai"},
+                "rerank": {"model_id": "isa-jina-reranker-v2-service", "provider": "isa"},
+                "default": {"model_id": "text-embedding-3-small", "provider": "openai"}
             }
         }
@@ -344,6 +686,14 @@ class ISAModelClient:
                 default = defaults["audio"]["stt"]
             else:
                 default = defaults["audio"]["default"]
+        # Handle embedding service type with task-specific models
+        elif service_type == "embedding":
+            if "rerank" in task:
+                default = defaults["embedding"]["rerank"]
+            elif "embed" in task:
+                default = defaults["embedding"]["embed"]
+            else:
+                default = defaults["embedding"]["default"]
         else:
             default = defaults.get(service_type, defaults["vision"])
@@ -363,59 +713,56 @@ class ISAModelClient:
         model_name: str,
         provider: str,
         task: str,
-        tools: Optional[List[Any]] = None
+        use_cache: bool = True
     ) -> Any:
         """Get appropriate service instance"""
         cache_key = f"{service_type}_{provider}_{model_name}"
-        # Check cache first
-        if cache_key in self._service_cache:
-            service = self._service_cache[cache_key]
-            # If tools are needed, bind them to the service
-            if tools and service_type == "text":
-                return service.bind_tools(tools)
-            return service
+        # Check cache first (if caching is enabled)
+        if use_cache and cache_key in self._service_cache:
+            return self._service_cache[cache_key]
         try:
+            # Validate service type
+            self._validate_service_type(service_type)
             # Route to appropriate AIFactory method
             if service_type == "vision":
                 service = self.ai_factory.get_vision(model_name, provider)
             elif service_type == "audio":
                 if "speech" in task or "tts" in task:
                     service = self.ai_factory.get_tts(model_name, provider)
                 elif "transcribe" in task or "stt" in task:
                     service = self.ai_factory.get_stt(model_name, provider)
                 else:
-                    # Default to STT for unknown audio tasks
                     service = self.ai_factory.get_stt(model_name, provider)
             elif service_type == "text":
                 service = self.ai_factory.get_llm(model_name, provider)
             elif service_type == "image":
                 service = self.ai_factory.get_img("t2i", model_name, provider)
             elif service_type == "embedding":
                 service = self.ai_factory.get_embed(model_name, provider)
-            else:
-                raise ValueError(f"Unsupported service type: {service_type}")
-            # Cache the service
-            self._service_cache[cache_key] = service
-            # If tools are needed, bind them to the service
-            if tools and service_type == "text":
-                return service.bind_tools(tools)
+            # Cache the service (if caching is enabled)
+            if use_cache:
+                self._service_cache[cache_key] = service
             return service
         except Exception as e:
             logger.error(f"Failed to get service {service_type}/{provider}/{model_name}: {e}")
             raise
+    def _validate_service_type(self, service_type: str) -> None:
+        """Validate service type is supported"""
+        if service_type not in self.SUPPORTED_SERVICE_TYPES:
+            raise ValueError(f"Unsupported service type: {service_type}")
+    def _map_task(self, task: str, service_type: str) -> str:
+        """Map common task names to unified task names"""
+        task_mapping = self.TASK_MAPPINGS.get(service_type, {})
+        return task_mapping.get(task, task)
     async def _execute_task(
         self,
         service: Any,
@@ -427,166 +774,69 @@ class ISAModelClient:
         """Execute the task using the appropriate service"""
         try:
+            self._validate_service_type(service_type)
+            unified_task = self._map_task(task, service_type)
             if service_type == "vision":
-                return await self._execute_vision_task(service, input_data, task, **kwargs)
+                return await service.invoke(
+                    image=input_data,
+                    task=unified_task,
+                    **kwargs
+                )
             elif service_type == "audio":
-                return await self._execute_audio_task(service, input_data, task, **kwargs)
+                if unified_task in ["synthesize", "text_to_speech", "tts"]:
+                    return await service.invoke(
+                        text=input_data,
+                        task=unified_task,
+                        **kwargs
+                    )
+                else:
+                    return await service.invoke(
+                        audio_input=input_data,
+                        task=unified_task,
+                        **kwargs
+                    )
             elif service_type == "text":
-                return await self._execute_text_task(service, input_data, task, **kwargs)
+                # Extract show_reasoning from kwargs if present
+                show_reasoning = kwargs.pop('show_reasoning', False)
+                result = await service.invoke(
+                    input_data=input_data,
+                    task=unified_task,
+                    show_reasoning=show_reasoning,
+                    **kwargs
+                )
+                logger.debug(f"Service result type: {type(result)}")
+                logger.debug(f"Service result: {result}")
+                if isinstance(result, dict) and 'message' in result:
+                    message = result['message']
+                    logger.debug(f"Extracted message type: {type(message)}")
+                    logger.debug(f"Extracted message: {message}")
+                    return message
+                else:
+                    return result
             elif service_type == "image":
-                return await self._execute_image_task(service, input_data, task, **kwargs)
+                return await service.invoke(
+                    prompt=input_data,
+                    task=unified_task,
+                    **kwargs
+                )
             elif service_type == "embedding":
-                return await self._execute_embedding_task(service, input_data, task, **kwargs)
-            else:
-                raise ValueError(f"Unsupported service type: {service_type}")
+                return await service.invoke(
+                    input_data=input_data,
+                    task=unified_task,
+                    **kwargs
+                )
         except Exception as e:
             logger.error(f"Task execution failed: {e}")
             raise
-    async def _execute_vision_task(self, service, input_data, task, **kwargs):
-        """Execute vision-related tasks using unified invoke method"""
-        # Map common task names to unified task names
-        task_mapping = {
-            "analyze_image": "analyze_image",
-            "detect_ui_elements": "detect_ui",
-            "extract_table": "extract_table",
-            "extract_text": "extract_text",
-            "ocr": "extract_text",
-            "describe": "analyze_image"
-        }
-        unified_task = task_mapping.get(task, task)
-        # Use unified invoke method with proper parameters
-        return await service.invoke(
-            image=input_data,
-            task=unified_task,
-            **kwargs
-        )
-    async def _execute_audio_task(self, service, input_data, task, **kwargs):
-        """Execute audio-related tasks using unified invoke method"""
-        # Map common task names to unified task names
-        task_mapping = {
-            "generate_speech": "synthesize",
-            "text_to_speech": "synthesize",
-            "tts": "synthesize",
-            "transcribe": "transcribe",
-            "speech_to_text": "transcribe",
-            "stt": "transcribe",
-            "translate": "translate",
-            "detect_language": "detect_language"
-        }
-        unified_task = task_mapping.get(task, task)
-        # Use unified invoke method with correct parameter name based on task type
-        if unified_task in ["synthesize", "text_to_speech", "tts"]:
-            # TTS services expect 'text' parameter
-            return await service.invoke(
-                text=input_data,
-                task=unified_task,
-                **kwargs
-            )
-        else:
-            # STT services expect 'audio_input' parameter
-            return await service.invoke(
-                audio_input=input_data,
-                task=unified_task,
-                **kwargs
-            )
-    async def _execute_text_task(self, service, input_data, task, **kwargs):
-        """Execute text-related tasks using unified invoke method"""
-        # Map common task names to unified task names
-        task_mapping = {
-            "chat": "chat",
-            "generate": "generate",
-            "complete": "complete",
-            "translate": "translate",
-            "summarize": "summarize",
-            "analyze": "analyze",
-            "extract": "extract",
-            "classify": "classify"
-        }
-        unified_task = task_mapping.get(task, task)
-        # Use unified invoke method
-        result = await service.invoke(
-            input_data=input_data,
-            task=unified_task,
-            **kwargs
-        )
-        # Handle the new response format from LLM services
-        # LLM services now return {"message": ..., "success": ..., "metadata": ...}
-        if isinstance(result, dict) and "message" in result:
-            # Extract the message content (convert AIMessage to string)
-            message = result["message"]
-            if hasattr(message, 'content'):
-                # Handle langchain AIMessage objects
-                return message.content
-            elif isinstance(message, str):
-                return message
-            else:
-                # Fallback: convert to string
-                return str(message)
-        # Fallback for other service types or legacy format
-        return result
-    async def _execute_image_task(self, service, input_data, task, **kwargs):
-        """Execute image generation tasks using unified invoke method"""
-        # Map common task names to unified task names
-        task_mapping = {
-            "generate_image": "generate",
-            "generate": "generate",
-            "img2img": "img2img",
-            "image_to_image": "img2img",
-            "generate_batch": "generate_batch"
-        }
-        unified_task = task_mapping.get(task, task)
-        # Use unified invoke method
-        return await service.invoke(
-            prompt=input_data,
-            task=unified_task,
-            **kwargs
-        )
-    async def _execute_embedding_task(self, service, input_data, task, **kwargs):
-        """Execute embedding tasks using unified invoke method"""
-        # Map common task names to unified task names
-        task_mapping = {
-            "create_embedding": "embed",
-            "embed": "embed",
-            "embed_batch": "embed_batch",
-            "chunk_and_embed": "chunk_and_embed",
-            "similarity": "similarity",
-            "find_similar": "find_similar"
-        }
-        unified_task = task_mapping.get(task, task)
-        # Use unified invoke method
-        return await service.invoke(
-            input_data=input_data,
-            task=unified_task,
-            **kwargs
-        )
     def clear_cache(self):
         """Clear service cache"""
@@ -602,7 +852,7 @@ class ISAModelClient:
         Returns:
             List of available models with metadata
         """
-        if INTELLIGENT_SELECTOR_AVAILABLE:
+        if INTELLIGENT_SELECTOR_AVAILABLE and get_model_selector:
             try:
                 if self.model_selector is None:
                     self.model_selector = await get_model_selector(self.config)
@@ -649,17 +899,26 @@ class ISAModelClient:
                 "error": str(e)
             }
-    async def _invoke_local(
+    def _handle_error(self, e: Exception, context: Dict[str, Any]) -> Dict[str, Any]:
+        """Handle errors consistently across methods"""
+        error_msg = f"Failed to {context.get('operation', 'execute')} {context.get('task', '')} on {context.get('service_type', '')}: {e}"
+        logger.error(error_msg)
+        return {
+            "success": False,
+            "error": str(e),
+            "metadata": context
+        }
+    async def _invoke_service_streaming(
         self,
-        input_data: Union[str, bytes, Path, Dict[str, Any]],
+        input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
         task: str,
         service_type: str,
         model_hint: Optional[str] = None,
         provider_hint: Optional[str] = None,
-        tools: Optional[List[Any]] = None,
         **kwargs
     ) -> Dict[str, Any]:
-        """Local invoke using AI Factory (original logic)"""
+        """Service invoke that returns streaming response with async generator"""
         try:
             # Step 1: Select best model for this task
             selected_model = await self._select_model(
@@ -676,305 +935,205 @@ class ISAModelClient:
                 model_name=selected_model["model_id"],
                 provider=selected_model["provider"],
                 task=task,
-                tools=tools
+                use_cache=False  # Don't cache for streaming to avoid state issues
             )
-            # Step 3: Execute task with unified interface
-            result = await self._execute_task(
-                service=service,
-                input_data=input_data,
-                task=task,
-                service_type=service_type,
-                **kwargs
-            )
+            # Step 3: Handle tools for LLM services (bind tools if provided)
+            tools = kwargs.pop("tools", None)
+            if service_type == "text" and tools:
+                service = await self._get_service(
+                    service_type=service_type,
+                    model_name=selected_model["model_id"],
+                    provider=selected_model["provider"],
+                    task=task,
+                    use_cache=False
+                )
+                service = service.bind_tools(tools)
+            # Step 4: Ensure service supports streaming
+            if not hasattr(service, 'astream'):
+                raise ValueError(f"Service {selected_model['provider']}/{selected_model['model_id']} does not support streaming")
+            # Step 5: Enable streaming on the service
+            if hasattr(service, 'streaming'):
+                service.streaming = True
-            # Step 4: Return unified response
+            # Step 6: Create async generator wrapper that yields tokens
+            async def stream_generator():
+                # Pass show_reasoning parameter if available for LLM services
+                if service_type == "text" and hasattr(service, 'astream'):
+                    show_reasoning = kwargs.get('show_reasoning', False)
+                    logger.debug(f"Stream generator: show_reasoning={show_reasoning}")
+                    if 'show_reasoning' in kwargs:
+                        async for token in service.astream(input_data, show_reasoning=show_reasoning):
+                            yield token
+                    else:
+                        async for token in service.astream(input_data):
+                            yield token
+                else:
+                    async for token in service.astream(input_data):
+                        yield token
+            # Return response with stream generator and metadata
             return {
                 "success": True,
-                "result": result,
+                "stream": stream_generator(),
                 "metadata": {
                     "model_used": selected_model["model_id"],
                     "provider": selected_model["provider"],
                     "task": task,
                     "service_type": service_type,
-                    "selection_reason": selected_model.get("reason", "Default selection")
+                    "selection_reason": selected_model.get("reason", "Default selection"),
+                    "streaming": True
                 }
             }
         except Exception as e:
-            logger.error(f"Local invoke failed: {e}")
+            logger.error(f"Streaming service invoke failed: {e}")
             raise
-    async def _invoke_api(
+    async def _invoke_service(
         self,
-        input_data: Union[str, bytes, Path, Dict[str, Any]],
+        input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
         task: str,
         service_type: str,
         model_hint: Optional[str] = None,
         provider_hint: Optional[str] = None,
+        stream: Optional[bool] = None,
         **kwargs
     ) -> Dict[str, Any]:
-        """API invoke using HTTP requests"""
-        # Handle file inputs
-        if isinstance(input_data, Path):
-            return await self._invoke_api_file(
-                file_path=input_data,
-                task=task,
+        """Direct service invoke - passes LangChain objects and tools directly to services"""
+        try:
+            # Step 1: Select best model for this task
+            selected_model = await self._select_model(
+                input_data=input_data,
+                task=task,
                 service_type=service_type,
                 model_hint=model_hint,
-                provider_hint=provider_hint,
-                **kwargs
+                provider_hint=provider_hint
             )
-        # Handle binary data
-        if isinstance(input_data, bytes):
-            return await self._invoke_api_binary(
-                data=input_data,
+            # Step 2: Get appropriate service
+            service = await self._get_service(
+                service_type=service_type,
+                model_name=selected_model["model_id"],
+                provider=selected_model["provider"],
+                task=task
+            )
+            # Step 3: Handle tools for LLM services (bind tools if provided)
+            tools = kwargs.pop("tools", None)
+            if service_type == "text" and tools:
+                service = await self._get_service(
+                    service_type=service_type,
+                    model_name=selected_model["model_id"],
+                    provider=selected_model["provider"],
+                    task=task,
+                    use_cache=False
+                )
+                service = service.bind_tools(tools)
+                # Note: streaming is still supported with tools
+            # Step 4: Set streaming for text services
+            if service_type == "text" and stream is not None:
+                if hasattr(service, 'streaming'):
+                    service.streaming = stream
+            # Step 5: Execute task with unified interface
+            result = await self._execute_task(
+                service=service,
+                input_data=input_data,
                 task=task,
                 service_type=service_type,
-                model_hint=model_hint,
-                provider_hint=provider_hint,
                 **kwargs
             )
-        # Handle text/JSON data
-        payload = {
-            "input_data": input_data,
-            "task": task,
-            "service_type": service_type,
-            "model_hint": model_hint,
-            "provider_hint": provider_hint,
-            "parameters": kwargs
-        }
-        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
-            try:
-                async with session.post(
-                    f"{self.api_url}/api/v1/invoke",
-                    json=payload,
-                    headers=self.headers
-                ) as response:
-                    if response.status == 200:
-                        return await response.json()
-                    else:
-                        error_data = await response.text()
-                        raise Exception(f"API error {response.status}: {error_data}")
-            except Exception as e:
-                logger.error(f"API invoke failed: {e}")
-                raise
+            # Step 6: Wait for billing tracking to complete, then get billing information
+            await asyncio.sleep(0.01)  # Small delay to ensure billing tracking completes
+            billing_info = self._get_billing_info(service, selected_model["model_id"])
+            # Return unified response
+            return {
+                "success": True,
+                "result": result,
+                "metadata": {
+                    "model_used": selected_model["model_id"],
+                    "provider": selected_model["provider"],
+                    "task": task,
+                    "service_type": service_type,
+                    "selection_reason": selected_model.get("reason", "Default selection"),
+                    "billing": billing_info
+                }
+            }
+        except Exception as e:
+            logger.error(f"Service invoke failed: {e}")
+            raise
-    async def _invoke_api_file(
-        self,
-        file_path: Path,
-        task: str,
-        service_type: str,
-        model_hint: Optional[str] = None,
-        provider_hint: Optional[str] = None,
-        **kwargs
-    ) -> Dict[str, Any]:
-        """API file upload"""
-        if not file_path.exists():
-            raise FileNotFoundError(f"File not found: {file_path}")
-        data = aiohttp.FormData()
-        data.add_field('task', task)
-        data.add_field('service_type', service_type)
-        if model_hint:
-            data.add_field('model_hint', model_hint)
-        if provider_hint:
-            data.add_field('provider_hint', provider_hint)
-        data.add_field('file',
-                      open(file_path, 'rb'),
-                      filename=file_path.name,
-                      content_type='application/octet-stream')
-        headers = {k: v for k, v in self.headers.items() if k != "Content-Type"}
-        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
-            try:
-                async with session.post(
-                    f"{self.api_url}/api/v1/invoke-file",
-                    data=data,
-                    headers=headers
-                ) as response:
+    def _get_billing_info(self, service: Any, model_id: str) -> Dict[str, Any]:
+        """Extract billing information from service after task execution"""
+        try:
+            # Check if service has model_manager with billing_tracker
+            if hasattr(service, 'model_manager') and hasattr(service.model_manager, 'billing_tracker'):
+                billing_tracker = service.model_manager.billing_tracker
+                # Get the latest usage record for this model
+                model_records = [
+                    record for record in billing_tracker.usage_records
+                    if record.model_id == model_id
+                ]
+                if model_records:
+                    # Get the most recent record
+                    latest_record = max(model_records, key=lambda r: r.timestamp)
-                    if response.status == 200:
-                        return await response.json()
-                    else:
-                        error_data = await response.text()
-                        raise Exception(f"API error {response.status}: {error_data}")
-            except Exception as e:
-                logger.error(f"API file upload failed: {e}")
-                raise
+                    return {
+                        "cost_usd": latest_record.cost_usd,
+                        "input_tokens": latest_record.input_tokens,
+                        "output_tokens": latest_record.output_tokens,
+                        "total_tokens": latest_record.total_tokens,
+                        "operation": latest_record.operation,
+                        "timestamp": latest_record.timestamp,
+                        "currency": "USD"
+                    }
+            # Fallback: no billing info available
+            return {
+                "cost_usd": 0.0,
+                "input_tokens": None,
+                "output_tokens": None,
+                "total_tokens": None,
+                "operation": None,
+                "timestamp": None,
+                "currency": "USD",
+                "note": "Billing information not available"
+            }
+        except Exception as e:
+            logger.warning(f"Failed to get billing info: {e}")
+            return {
+                "cost_usd": 0.0,
+                "error": str(e),
+                "currency": "USD"
+            }
-    async def _invoke_api_binary(
-        self,
-        data: bytes,
-        task: str,
-        service_type: str,
-        model_hint: Optional[str] = None,
-        provider_hint: Optional[str] = None,
-        **kwargs
-    ) -> Dict[str, Any]:
-        """API binary upload"""
-        form_data = aiohttp.FormData()
-        form_data.add_field('task', task)
-        form_data.add_field('service_type', service_type)
-        if model_hint:
-            form_data.add_field('model_hint', model_hint)
-        if provider_hint:
-            form_data.add_field('provider_hint', provider_hint)
-        form_data.add_field('file',
-                           data,
-                           filename='data.bin',
-                           content_type='application/octet-stream')
-        headers = {k: v for k, v in self.headers.items() if k != "Content-Type"}
-        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
-            try:
-                async with session.post(
-                    f"{self.api_url}/api/v1/invoke-file",
-                    data=form_data,
-                    headers=headers
-                ) as response:
-                    if response.status == 200:
-                        return await response.json()
-                    else:
-                        error_data = await response.text()
-                        raise Exception(f"API error {response.status}: {error_data}")
-            except Exception as e:
-                logger.error(f"API binary upload failed: {e}")
-                raise
-    async def _stream_local(
-        self,
-        input_data: Union[str, bytes, Path, Dict[str, Any]],
-        task: str,
-        service_type: str,
-        model_hint: Optional[str] = None,
-        provider_hint: Optional[str] = None,
-        tools: Optional[List[Any]] = None,
-        **kwargs
-    ):
-        """Local streaming using AI Factory"""
-        # Step 1: Select best model for this task
-        selected_model = await self._select_model(
-            input_data=input_data,
-            task=task,
-            service_type=service_type,
-            model_hint=model_hint,
-            provider_hint=provider_hint
-        )
-        # Step 2: Get appropriate service
-        service = await self._get_service(
-            service_type=service_type,
-            model_name=selected_model["model_id"],
-            provider=selected_model["provider"],
-            task=task,
-            tools=tools
-        )
-        # Step 3: Yield tokens from the stream
-        async for token in service.astream(input_data):
-            yield token
-    async def _stream_api(
-        self,
-        input_data: Union[str, bytes, Path, Dict[str, Any]],
-        task: str,
-        service_type: str,
-        model_hint: Optional[str] = None,
-        provider_hint: Optional[str] = None,
-        **kwargs
-    ):
-        """API streaming using Server-Sent Events (SSE)"""
-        # Only support text streaming for now
-        if not isinstance(input_data, (str, dict)):
-            raise ValueError("API streaming only supports text input")
-        payload = {
-            "input_data": input_data,
-            "task": task,
-            "service_type": service_type,
-            "model_hint": model_hint,
-            "provider_hint": provider_hint,
-            "stream": True,
-            "parameters": kwargs
-        }
-        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
-            try:
-                async with session.post(
-                    f"{self.api_url}/api/v1/stream",
-                    json=payload,
-                    headers=self.headers
-                ) as response:
-                    if response.status == 200:
-                        # Parse SSE stream
-                        async for line in response.content:
-                            if line:
-                                line_str = line.decode().strip()
-                                if line_str.startswith("data: "):
-                                    try:
-                                        # Parse SSE data
-                                        import json
-                                        json_str = line_str[6:]  # Remove "data: " prefix
-                                        data = json.loads(json_str)
-                                        if data.get("type") == "token" and "token" in data:
-                                            yield data["token"]
-                                        elif data.get("type") == "completion":
-                                            # End of stream
-                                            break
-                                        elif data.get("type") == "error":
-                                            raise Exception(f"Server error: {data.get('error')}")
-                                    except json.JSONDecodeError:
-                                        # Skip malformed lines
-                                        continue
-                    else:
-                        error_data = await response.text()
-                        raise Exception(f"API streaming error {response.status}: {error_data}")
-            except Exception as e:
-                logger.error(f"API streaming failed: {e}")
-                raise
 # Convenience function for quick access
 def create_client(
     config: Optional[Dict[str, Any]] = None,
-    mode: str = "local",
-    api_url: Optional[str] = None,
+    service_endpoint: Optional[str] = None,
     api_key: Optional[str] = None
 ) -> ISAModelClient:
     """Create ISA Model Client instance
     Args:
         config: Optional configuration
-        mode: "local" for direct AI Factory, "api" for HTTP API calls
-        api_url: API base URL (required if mode="api")
-        api_key: API key for authentication (optional)
+        service_endpoint: Optional service endpoint URL (if None, uses local AI Factory)
+        api_key: Optional API key for authentication (can also be set via ISA_API_KEY env var)
     Returns:
         ISAModelClient instance
     """
-    return ISAModelClient(config=config, mode=mode, api_url=api_url, api_key=api_key)
+    return ISAModelClient(config=config, service_endpoint=service_endpoint, api_key=api_key)
 # Export for easy import

isa-model 0.3.91__py3-none-any.whl → 0.4.0__py3-none-any.whl

isa-model 0.3.91py3-none-any.whl → 0.4.0py3-none-any.whl