isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +937 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
  15. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  16. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
  17. isa_model/deployment/cloud/modal/register_models.py +321 -0
  18. isa_model/deployment/runtime/deployed_service.py +338 -0
  19. isa_model/deployment/services/__init__.py +9 -0
  20. isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
  21. isa_model/deployment/services/model_service.py +332 -0
  22. isa_model/deployment/services/service_monitor.py +356 -0
  23. isa_model/deployment/services/service_registry.py +527 -0
  24. isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
  25. isa_model/eval/__init__.py +80 -44
  26. isa_model/eval/config/__init__.py +10 -0
  27. isa_model/eval/config/evaluation_config.py +108 -0
  28. isa_model/eval/evaluators/__init__.py +18 -0
  29. isa_model/eval/evaluators/base_evaluator.py +503 -0
  30. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  31. isa_model/eval/factory.py +417 -709
  32. isa_model/eval/infrastructure/__init__.py +24 -0
  33. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  34. isa_model/eval/metrics.py +191 -21
  35. isa_model/inference/ai_factory.py +257 -601
  36. isa_model/inference/services/audio/base_stt_service.py +65 -1
  37. isa_model/inference/services/audio/base_tts_service.py +75 -1
  38. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  39. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  40. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  41. isa_model/inference/services/base_service.py +55 -17
  42. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  43. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  44. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  45. isa_model/inference/services/helpers/stacked_config.py +148 -0
  46. isa_model/inference/services/img/__init__.py +18 -0
  47. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
  48. isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
  49. isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
  50. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
  51. isa_model/inference/services/llm/__init__.py +3 -3
  52. isa_model/inference/services/llm/base_llm_service.py +492 -40
  53. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  54. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  55. isa_model/inference/services/llm/ollama_llm_service.py +51 -17
  56. isa_model/inference/services/llm/openai_llm_service.py +70 -19
  57. isa_model/inference/services/llm/yyds_llm_service.py +24 -23
  58. isa_model/inference/services/vision/__init__.py +38 -4
  59. isa_model/inference/services/vision/base_vision_service.py +218 -117
  60. isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
  61. isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
  62. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  63. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  64. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  65. isa_model/inference/services/vision/openai_vision_service.py +104 -307
  66. isa_model/inference/services/vision/replicate_vision_service.py +140 -325
  67. isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
  68. isa_model/scripts/register_models.py +370 -0
  69. isa_model/scripts/register_models_with_embeddings.py +510 -0
  70. isa_model/serving/api/fastapi_server.py +6 -1
  71. isa_model/serving/api/routes/unified.py +274 -0
  72. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
  73. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
  74. isa_model/config/__init__.py +0 -9
  75. isa_model/config/config_manager.py +0 -213
  76. isa_model/core/model_manager.py +0 -213
  77. isa_model/core/model_registry.py +0 -375
  78. isa_model/core/vision_models_init.py +0 -116
  79. isa_model/inference/billing_tracker.py +0 -406
  80. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  81. isa_model/inference/services/stacked/__init__.py +0 -26
  82. isa_model/inference/services/stacked/config.py +0 -426
  83. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  84. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  85. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  86. /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
  87. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
  88. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,426 @@
1
+ """
2
+ Pricing Manager for ISA Model SDK
3
+
4
+ Centralized pricing management for all AI providers and models.
5
+ Supports external configuration, dynamic updates, and multiple pricing models.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import yaml
11
+ import logging
12
+ from typing import Dict, Optional, Any, Union, List
13
+ from pathlib import Path
14
+ from datetime import datetime, timedelta
15
+ from dataclasses import dataclass, field
16
+
17
+ from .types import Provider
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclass
23
+ class ModelPricing:
24
+ """Pricing information for a model"""
25
+ provider: str
26
+ model_name: str
27
+ input_cost: float = 0.0 # Cost per input unit
28
+ output_cost: float = 0.0 # Cost per output unit
29
+ unit_type: str = "token" # "token", "character", "minute", "request", "image"
30
+ base_cost: float = 0.0 # Fixed cost per request
31
+ infrastructure_cost_per_hour: float = 0.0 # For self-hosted models
32
+ currency: str = "USD"
33
+ last_updated: Optional[datetime] = None
34
+ metadata: Dict[str, Any] = field(default_factory=dict)
35
+
36
+
37
+ class PricingManager:
38
+ """
39
+ Manages pricing information for all AI models and providers.
40
+
41
+ Features:
42
+ - Load pricing from YAML/JSON configuration files
43
+ - Support multiple pricing models (per token, per minute, per request, etc.)
44
+ - Dynamic pricing updates without code changes
45
+ - Caching for performance
46
+ - Fallback pricing when specific models aren't found
47
+
48
+ Example:
49
+ ```python
50
+ from isa_model.core.pricing_manager import PricingManager
51
+
52
+ pricing = PricingManager()
53
+
54
+ # Get pricing for a specific model
55
+ cost = pricing.calculate_cost(
56
+ provider="openai",
57
+ model_name="gpt-4o-mini",
58
+ input_units=1000,
59
+ output_units=500
60
+ )
61
+
62
+ # Check if pricing is available
63
+ if pricing.has_pricing("openai", "gpt-4o"):
64
+ print("Pricing available for GPT-4o")
65
+ ```
66
+ """
67
+
68
+ def __init__(self, pricing_config_path: Optional[Path] = None):
69
+ """Initialize pricing manager"""
70
+ self.pricing_data: Dict[str, Dict[str, ModelPricing]] = {}
71
+ self.config_path = pricing_config_path
72
+ self._last_load_time: Optional[datetime] = None
73
+ self._cache_ttl_hours = 24 # Reload pricing daily
74
+
75
+ self._load_pricing_data()
76
+ logger.info("PricingManager initialized")
77
+
78
+ def _load_pricing_data(self):
79
+ """Load pricing data from configuration files"""
80
+ # Try to load from specified config path first
81
+ if self.config_path and self.config_path.exists():
82
+ self._load_from_file(self.config_path)
83
+ return
84
+
85
+ # Try to find configuration files in common locations
86
+ possible_paths = [
87
+ Path.cwd() / "pricing.yaml",
88
+ Path.cwd() / "pricing.yml",
89
+ Path.cwd() / "pricing.json",
90
+ Path.cwd() / "config" / "pricing.yaml",
91
+ self._find_project_root() / "pricing.yaml",
92
+ self._find_project_root() / "config" / "pricing.yaml",
93
+ ]
94
+
95
+ for path in possible_paths:
96
+ if path.exists():
97
+ logger.info(f"Loading pricing from {path}")
98
+ self._load_from_file(path)
99
+ self.config_path = path
100
+ return
101
+
102
+ # If no config file found, load default pricing
103
+ logger.warning("No pricing configuration file found, loading defaults")
104
+ self._load_default_pricing()
105
+
106
+ def _find_project_root(self) -> Path:
107
+ """Find the project root directory"""
108
+ current = Path(__file__).parent
109
+ while current != current.parent:
110
+ if (current / "pyproject.toml").exists() or (current / "setup.py").exists():
111
+ return current
112
+ current = current.parent
113
+ return Path.cwd()
114
+
115
+ def _load_from_file(self, file_path: Path):
116
+ """Load pricing from a YAML or JSON file"""
117
+ try:
118
+ with open(file_path, 'r') as f:
119
+ if file_path.suffix.lower() in ['.yaml', '.yml']:
120
+ data = yaml.safe_load(f)
121
+ else:
122
+ data = json.load(f)
123
+
124
+ self._parse_pricing_data(data)
125
+ self._last_load_time = datetime.now()
126
+ logger.info(f"Loaded pricing for {len(self.pricing_data)} providers")
127
+
128
+ except Exception as e:
129
+ logger.error(f"Failed to load pricing from {file_path}: {e}")
130
+ self._load_default_pricing()
131
+
132
+ def _parse_pricing_data(self, data: Dict[str, Any]):
133
+ """Parse pricing data from configuration"""
134
+ self.pricing_data = {}
135
+
136
+ providers_data = data.get("providers", data) # Support both formats
137
+
138
+ for provider_name, models_data in providers_data.items():
139
+ self.pricing_data[provider_name] = {}
140
+
141
+ for model_name, pricing_info in models_data.items():
142
+ if isinstance(pricing_info, dict):
143
+ pricing = ModelPricing(
144
+ provider=provider_name,
145
+ model_name=model_name,
146
+ input_cost=pricing_info.get("input", 0.0),
147
+ output_cost=pricing_info.get("output", 0.0),
148
+ unit_type=pricing_info.get("unit_type", "token"),
149
+ base_cost=pricing_info.get("base_cost", 0.0),
150
+ infrastructure_cost_per_hour=pricing_info.get("infrastructure_cost_per_hour", 0.0),
151
+ currency=pricing_info.get("currency", "USD"),
152
+ metadata=pricing_info.get("metadata", {})
153
+ )
154
+ self.pricing_data[provider_name][model_name] = pricing
155
+
156
+ def _load_default_pricing(self):
157
+ """Load default pricing data as fallback"""
158
+ default_pricing = {
159
+ "openai": {
160
+ "gpt-4o-mini": {"input": 0.000000150, "output": 0.000000600, "unit_type": "token"},
161
+ "gpt-4o": {"input": 0.000005, "output": 0.000015, "unit_type": "token"},
162
+ "gpt-4-turbo": {"input": 0.00001, "output": 0.00003, "unit_type": "token"},
163
+ "gpt-4": {"input": 0.00003, "output": 0.00006, "unit_type": "token"},
164
+ "gpt-3.5-turbo": {"input": 0.0000005, "output": 0.0000015, "unit_type": "token"},
165
+ "text-embedding-3-small": {"input": 0.00000002, "output": 0.0, "unit_type": "token"},
166
+ "text-embedding-3-large": {"input": 0.00000013, "output": 0.0, "unit_type": "token"},
167
+ "whisper-1": {"input": 0.006, "output": 0.0, "unit_type": "minute"},
168
+ "dall-e-3": {"input": 0.04, "output": 0.0, "unit_type": "image"},
169
+ },
170
+ "anthropic": {
171
+ "claude-3-opus": {"input": 0.000015, "output": 0.000075, "unit_type": "token"},
172
+ "claude-3-sonnet": {"input": 0.000003, "output": 0.000015, "unit_type": "token"},
173
+ "claude-3-haiku": {"input": 0.00000025, "output": 0.00000125, "unit_type": "token"},
174
+ },
175
+ "replicate": {
176
+ "black-forest-labs/flux-dev": {"input": 0.003, "output": 0.0, "unit_type": "image"},
177
+ "meta/meta-llama-3-70b-instruct": {"input": 0.00000065, "output": 0.00000275, "unit_type": "token"},
178
+ },
179
+ "ollama": {
180
+ "default": {"input": 0.0, "output": 0.0, "unit_type": "token"},
181
+ },
182
+ "modal": {
183
+ "default": {"input": 0.0, "output": 0.0, "infrastructure_cost_per_hour": 0.4, "unit_type": "token"},
184
+ }
185
+ }
186
+
187
+ self._parse_pricing_data({"providers": default_pricing})
188
+ logger.info("Loaded default pricing data")
189
+
190
+ def get_model_pricing(self, provider: str, model_name: str) -> Optional[ModelPricing]:
191
+ """Get pricing information for a specific model"""
192
+ self._refresh_if_needed()
193
+
194
+ provider_data = self.pricing_data.get(provider, {})
195
+
196
+ # Try exact match first
197
+ if model_name in provider_data:
198
+ return provider_data[model_name]
199
+
200
+ # Try partial matches (for versioned models)
201
+ for available_model, pricing in provider_data.items():
202
+ if model_name.startswith(available_model) or available_model in model_name:
203
+ return pricing
204
+
205
+ # Try default for provider
206
+ if "default" in provider_data:
207
+ return provider_data["default"]
208
+
209
+ return None
210
+
211
+ def has_pricing(self, provider: str, model_name: str) -> bool:
212
+ """Check if pricing is available for a model"""
213
+ return self.get_model_pricing(provider, model_name) is not None
214
+
215
+ def calculate_cost(self,
216
+ provider: str,
217
+ model_name: str,
218
+ input_units: Union[int, float] = 0,
219
+ output_units: Union[int, float] = 0,
220
+ requests: int = 1) -> float:
221
+ """
222
+ Calculate the cost for using a model.
223
+
224
+ Args:
225
+ provider: Provider name (e.g., "openai", "anthropic")
226
+ model_name: Model name (e.g., "gpt-4o-mini")
227
+ input_units: Number of input units (tokens, characters, minutes, etc.)
228
+ output_units: Number of output units
229
+ requests: Number of requests made
230
+
231
+ Returns:
232
+ Total cost in USD
233
+ """
234
+ pricing = self.get_model_pricing(provider, model_name)
235
+ if not pricing:
236
+ logger.warning(f"No pricing found for {provider}/{model_name}")
237
+ return 0.0
238
+
239
+ total_cost = 0.0
240
+
241
+ # Calculate variable costs based on usage
242
+ if pricing.unit_type == "token":
243
+ # Standard per-token pricing
244
+ total_cost += (input_units / 1000000) * pricing.input_cost # Cost per 1M tokens
245
+ total_cost += (output_units / 1000000) * pricing.output_cost
246
+ elif pricing.unit_type == "character":
247
+ # Per-character pricing (TTS)
248
+ total_cost += (input_units / 1000) * pricing.input_cost # Cost per 1K characters
249
+ elif pricing.unit_type == "minute":
250
+ # Per-minute pricing (audio)
251
+ total_cost += input_units * pricing.input_cost
252
+ elif pricing.unit_type == "image":
253
+ # Per-image pricing
254
+ total_cost += input_units * pricing.input_cost
255
+ elif pricing.unit_type == "request":
256
+ # Per-request pricing
257
+ total_cost += requests * pricing.input_cost
258
+
259
+ # Add base cost per request
260
+ total_cost += requests * pricing.base_cost
261
+
262
+ return total_cost
263
+
264
+ def get_cheapest_model(self,
265
+ provider: Optional[str] = None,
266
+ unit_type: str = "token",
267
+ min_input_units: int = 1000) -> Optional[Dict[str, Any]]:
268
+ """
269
+ Find the cheapest model for a given usage pattern.
270
+
271
+ Args:
272
+ provider: Specific provider to search, or None for all providers
273
+ unit_type: Type of units to optimize for
274
+ min_input_units: Minimum expected input units for cost calculation
275
+
276
+ Returns:
277
+ Dictionary with provider, model_name, and estimated_cost
278
+ """
279
+ self._refresh_if_needed()
280
+
281
+ candidates = []
282
+
283
+ providers_to_check = [provider] if provider else self.pricing_data.keys()
284
+
285
+ for prov in providers_to_check:
286
+ if prov not in self.pricing_data:
287
+ continue
288
+
289
+ for model_name, pricing in self.pricing_data[prov].items():
290
+ if pricing.unit_type != unit_type:
291
+ continue
292
+
293
+ # Calculate cost for the given usage
294
+ estimated_cost = self.calculate_cost(
295
+ prov, model_name,
296
+ input_units=min_input_units,
297
+ output_units=min_input_units
298
+ )
299
+
300
+ candidates.append({
301
+ "provider": prov,
302
+ "model_name": model_name,
303
+ "estimated_cost": estimated_cost,
304
+ "pricing": pricing
305
+ })
306
+
307
+ if not candidates:
308
+ return None
309
+
310
+ # Sort by cost and return cheapest
311
+ candidates.sort(key=lambda x: x["estimated_cost"])
312
+ return candidates[0]
313
+
314
+ def get_provider_summary(self, provider: str) -> Dict[str, Any]:
315
+ """Get summary of pricing for a provider"""
316
+ self._refresh_if_needed()
317
+
318
+ if provider not in self.pricing_data:
319
+ return {"provider": provider, "models": [], "total_models": 0}
320
+
321
+ models = []
322
+ for model_name, pricing in self.pricing_data[provider].items():
323
+ models.append({
324
+ "model_name": model_name,
325
+ "unit_type": pricing.unit_type,
326
+ "input_cost": pricing.input_cost,
327
+ "output_cost": pricing.output_cost,
328
+ "base_cost": pricing.base_cost
329
+ })
330
+
331
+ return {
332
+ "provider": provider,
333
+ "models": models,
334
+ "total_models": len(models)
335
+ }
336
+
337
+ def update_model_pricing(self,
338
+ provider: str,
339
+ model_name: str,
340
+ input_cost: Optional[float] = None,
341
+ output_cost: Optional[float] = None,
342
+ **kwargs):
343
+ """Update pricing for a specific model"""
344
+ if provider not in self.pricing_data:
345
+ self.pricing_data[provider] = {}
346
+
347
+ if model_name not in self.pricing_data[provider]:
348
+ self.pricing_data[provider][model_name] = ModelPricing(
349
+ provider=provider,
350
+ model_name=model_name
351
+ )
352
+
353
+ pricing = self.pricing_data[provider][model_name]
354
+
355
+ if input_cost is not None:
356
+ pricing.input_cost = input_cost
357
+ if output_cost is not None:
358
+ pricing.output_cost = output_cost
359
+
360
+ for key, value in kwargs.items():
361
+ if hasattr(pricing, key):
362
+ setattr(pricing, key, value)
363
+
364
+ pricing.last_updated = datetime.now()
365
+ logger.info(f"Updated pricing for {provider}/{model_name}")
366
+
367
+ def save_pricing_config(self, file_path: Optional[Path] = None):
368
+ """Save current pricing to configuration file"""
369
+ if file_path is None:
370
+ file_path = self.config_path or Path.cwd() / "pricing.yaml"
371
+
372
+ # Convert pricing data to serializable format
373
+ config_data = {"providers": {}}
374
+
375
+ for provider, models in self.pricing_data.items():
376
+ config_data["providers"][provider] = {}
377
+ for model_name, pricing in models.items():
378
+ config_data["providers"][provider][model_name] = {
379
+ "input": pricing.input_cost,
380
+ "output": pricing.output_cost,
381
+ "unit_type": pricing.unit_type,
382
+ "base_cost": pricing.base_cost,
383
+ "infrastructure_cost_per_hour": pricing.infrastructure_cost_per_hour,
384
+ "currency": pricing.currency,
385
+ "metadata": pricing.metadata
386
+ }
387
+
388
+ try:
389
+ with open(file_path, 'w') as f:
390
+ yaml.dump(config_data, f, default_flow_style=False)
391
+ logger.info(f"Saved pricing configuration to {file_path}")
392
+ except Exception as e:
393
+ logger.error(f"Failed to save pricing configuration: {e}")
394
+
395
+ def _refresh_if_needed(self):
396
+ """Refresh pricing data if cache is stale"""
397
+ if self._last_load_time is None:
398
+ return
399
+
400
+ time_since_load = datetime.now() - self._last_load_time
401
+ if time_since_load > timedelta(hours=self._cache_ttl_hours):
402
+ logger.info("Refreshing pricing data (cache expired)")
403
+ self._load_pricing_data()
404
+
405
+ def get_all_providers(self) -> List[str]:
406
+ """Get list of all providers with pricing data"""
407
+ self._refresh_if_needed()
408
+ return list(self.pricing_data.keys())
409
+
410
+ def get_provider_models(self, provider: str) -> List[str]:
411
+ """Get list of models for a provider"""
412
+ self._refresh_if_needed()
413
+ return list(self.pricing_data.get(provider, {}).keys())
414
+
415
+
416
+ # Global pricing manager instance
417
+ pricing_manager = PricingManager()
418
+
419
+ # Convenience functions
420
+ def get_model_cost(provider: str, model_name: str, input_units: int, output_units: int) -> float:
421
+ """Calculate cost for a model usage"""
422
+ return pricing_manager.calculate_cost(provider, model_name, input_units, output_units)
423
+
424
+ def has_pricing(provider: str, model_name: str) -> bool:
425
+ """Check if pricing is available"""
426
+ return pricing_manager.has_pricing(provider, model_name)
@@ -0,0 +1,19 @@
1
+ """
2
+ Core services for the ISA Model platform
3
+
4
+ This module contains platform-wide services including:
5
+ - IntelligentModelSelector: AI-driven model selection with user feedback
6
+ - ServiceRegistry: Managing deployed model services (in separate location)
7
+ """
8
+
9
+ from .intelligent_model_selector import (
10
+ IntelligentModelSelector,
11
+ ModelSelectionRequest,
12
+ ModelRecommendation
13
+ )
14
+
15
+ __all__ = [
16
+ "IntelligentModelSelector",
17
+ "ModelSelectionRequest",
18
+ "ModelRecommendation"
19
+ ]