isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +937 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
  15. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  16. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
  17. isa_model/deployment/cloud/modal/register_models.py +321 -0
  18. isa_model/deployment/runtime/deployed_service.py +338 -0
  19. isa_model/deployment/services/__init__.py +9 -0
  20. isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
  21. isa_model/deployment/services/model_service.py +332 -0
  22. isa_model/deployment/services/service_monitor.py +356 -0
  23. isa_model/deployment/services/service_registry.py +527 -0
  24. isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
  25. isa_model/eval/__init__.py +80 -44
  26. isa_model/eval/config/__init__.py +10 -0
  27. isa_model/eval/config/evaluation_config.py +108 -0
  28. isa_model/eval/evaluators/__init__.py +18 -0
  29. isa_model/eval/evaluators/base_evaluator.py +503 -0
  30. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  31. isa_model/eval/factory.py +417 -709
  32. isa_model/eval/infrastructure/__init__.py +24 -0
  33. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  34. isa_model/eval/metrics.py +191 -21
  35. isa_model/inference/ai_factory.py +257 -601
  36. isa_model/inference/services/audio/base_stt_service.py +65 -1
  37. isa_model/inference/services/audio/base_tts_service.py +75 -1
  38. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  39. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  40. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  41. isa_model/inference/services/base_service.py +55 -17
  42. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  43. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  44. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  45. isa_model/inference/services/helpers/stacked_config.py +148 -0
  46. isa_model/inference/services/img/__init__.py +18 -0
  47. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
  48. isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
  49. isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
  50. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
  51. isa_model/inference/services/llm/__init__.py +3 -3
  52. isa_model/inference/services/llm/base_llm_service.py +492 -40
  53. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  54. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  55. isa_model/inference/services/llm/ollama_llm_service.py +51 -17
  56. isa_model/inference/services/llm/openai_llm_service.py +70 -19
  57. isa_model/inference/services/llm/yyds_llm_service.py +24 -23
  58. isa_model/inference/services/vision/__init__.py +38 -4
  59. isa_model/inference/services/vision/base_vision_service.py +218 -117
  60. isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
  61. isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
  62. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  63. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  64. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  65. isa_model/inference/services/vision/openai_vision_service.py +104 -307
  66. isa_model/inference/services/vision/replicate_vision_service.py +140 -325
  67. isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
  68. isa_model/scripts/register_models.py +370 -0
  69. isa_model/scripts/register_models_with_embeddings.py +510 -0
  70. isa_model/serving/api/fastapi_server.py +6 -1
  71. isa_model/serving/api/routes/unified.py +274 -0
  72. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
  73. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
  74. isa_model/config/__init__.py +0 -9
  75. isa_model/config/config_manager.py +0 -213
  76. isa_model/core/model_manager.py +0 -213
  77. isa_model/core/model_registry.py +0 -375
  78. isa_model/core/vision_models_init.py +0 -116
  79. isa_model/inference/billing_tracker.py +0 -406
  80. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  81. isa_model/inference/services/stacked/__init__.py +0 -26
  82. isa_model/inference/services/stacked/config.py +0 -426
  83. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  84. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  85. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  86. /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
  87. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
  88. {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
@@ -1,406 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
-
4
- """
5
- Billing Tracker for isA_Model Services
6
- Tracks usage and costs across all AI providers
7
- """
8
-
9
- from typing import Dict, List, Optional, Any, Union
10
- from datetime import datetime, timezone
11
- from dataclasses import dataclass, asdict
12
- import json
13
- import logging
14
- from pathlib import Path
15
- from enum import Enum
16
- import os
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
- class ServiceType(Enum):
21
- """Types of AI services"""
22
- LLM = "llm"
23
- EMBEDDING = "embedding"
24
- VISION = "vision"
25
- IMAGE_GENERATION = "image_generation"
26
- AUDIO_STT = "audio_stt"
27
- AUDIO_TTS = "audio_tts"
28
-
29
- class Provider(Enum):
30
- """AI service providers"""
31
- OPENAI = "openai"
32
- REPLICATE = "replicate"
33
- OLLAMA = "ollama"
34
- ANTHROPIC = "anthropic"
35
- GOOGLE = "google"
36
-
37
- @dataclass
38
- class UsageRecord:
39
- """Record of a single API usage"""
40
- timestamp: str
41
- provider: str
42
- service_type: str
43
- model_name: str
44
- operation: str
45
- input_tokens: Optional[int] = None
46
- output_tokens: Optional[int] = None
47
- total_tokens: Optional[int] = None
48
- input_units: Optional[float] = None # For non-token based services (images, audio)
49
- output_units: Optional[float] = None
50
- cost_usd: Optional[float] = None
51
- metadata: Optional[Dict[str, Any]] = None
52
-
53
- def to_dict(self) -> Dict[str, Any]:
54
- """Convert to dictionary"""
55
- return asdict(self)
56
-
57
- @classmethod
58
- def from_dict(cls, data: Dict[str, Any]) -> 'UsageRecord':
59
- """Create from dictionary"""
60
- return cls(**data)
61
-
62
- class BillingTracker:
63
- """
64
- Tracks billing and usage across all AI services
65
- """
66
-
67
- def __init__(self, storage_path: Optional[str] = None):
68
- """
69
- Initialize billing tracker
70
-
71
- Args:
72
- storage_path: Path to store billing data (defaults to project root)
73
- """
74
- if storage_path is None:
75
- project_root = Path(__file__).parent.parent.parent
76
- self.storage_path = project_root / "billing_data.json"
77
- else:
78
- self.storage_path = Path(storage_path)
79
- self.usage_records: List[UsageRecord] = []
80
- self.session_start = datetime.now(timezone.utc).isoformat()
81
-
82
- # Load existing data
83
- self._load_data()
84
-
85
- def _load_data(self):
86
- """Load existing billing data"""
87
- try:
88
- if self.storage_path.exists():
89
- with open(self.storage_path, 'r') as f:
90
- data = json.load(f)
91
- self.usage_records = [
92
- UsageRecord.from_dict(record)
93
- for record in data.get('usage_records', [])
94
- ]
95
- logger.info(f"Loaded {len(self.usage_records)} billing records")
96
- except Exception as e:
97
- logger.warning(f"Could not load billing data: {e}")
98
- self.usage_records = []
99
-
100
- def _save_data(self):
101
- """Save billing data to storage"""
102
- try:
103
- # Ensure directory exists
104
- self.storage_path.parent.mkdir(parents=True, exist_ok=True)
105
-
106
- data = {
107
- "session_start": self.session_start,
108
- "last_updated": datetime.now(timezone.utc).isoformat(),
109
- "usage_records": [record.to_dict() for record in self.usage_records]
110
- }
111
-
112
- with open(self.storage_path, 'w') as f:
113
- json.dump(data, f, indent=2)
114
-
115
- except Exception as e:
116
- logger.error(f"Could not save billing data: {e}")
117
-
118
- def track_usage(
119
- self,
120
- provider: Union[str, Provider],
121
- service_type: Union[str, ServiceType],
122
- model_name: str,
123
- operation: str,
124
- input_tokens: Optional[int] = None,
125
- output_tokens: Optional[int] = None,
126
- input_units: Optional[float] = None,
127
- output_units: Optional[float] = None,
128
- metadata: Optional[Dict[str, Any]] = None
129
- ) -> UsageRecord:
130
- """
131
- Track a usage event
132
-
133
- Args:
134
- provider: AI provider name
135
- service_type: Type of service used
136
- model_name: Name of the model
137
- operation: Operation performed (e.g., 'chat', 'embedding', 'image_generation')
138
- input_tokens: Number of input tokens
139
- output_tokens: Number of output tokens
140
- input_units: Input units for non-token services (e.g., audio seconds, image count)
141
- output_units: Output units for non-token services
142
- metadata: Additional metadata
143
-
144
- Returns:
145
- UsageRecord object
146
- """
147
- # Convert enums to strings
148
- if isinstance(provider, Provider):
149
- provider = provider.value
150
- if isinstance(service_type, ServiceType):
151
- service_type = service_type.value
152
-
153
- # Calculate total tokens
154
- total_tokens = None
155
- if input_tokens is not None or output_tokens is not None:
156
- total_tokens = (input_tokens or 0) + (output_tokens or 0)
157
-
158
- # Calculate cost
159
- cost_usd = self._calculate_cost(
160
- provider, model_name, operation,
161
- input_tokens, output_tokens, input_units, output_units
162
- )
163
-
164
- # Create usage record
165
- record = UsageRecord(
166
- timestamp=datetime.now(timezone.utc).isoformat(),
167
- provider=provider,
168
- service_type=service_type,
169
- model_name=model_name,
170
- operation=operation,
171
- input_tokens=input_tokens,
172
- output_tokens=output_tokens,
173
- total_tokens=total_tokens,
174
- input_units=input_units,
175
- output_units=output_units,
176
- cost_usd=cost_usd,
177
- metadata=metadata or {}
178
- )
179
-
180
- # Add to records and save
181
- self.usage_records.append(record)
182
- self._save_data()
183
-
184
- logger.info(f"Tracked usage: {provider}/{model_name} - ${cost_usd:.6f}")
185
- return record
186
-
187
- def _get_model_pricing(self, provider: str, model_name: str) -> Optional[Dict[str, float]]:
188
- """Get pricing information from ModelManager"""
189
- try:
190
- from isa_model.core.model_manager import ModelManager
191
- pricing = ModelManager.MODEL_PRICING.get(provider, {}).get(model_name)
192
- if pricing:
193
- return pricing
194
-
195
- # Fallback to legacy pricing for backward compatibility
196
- legacy_pricing = self._get_legacy_pricing(provider, model_name)
197
- if legacy_pricing:
198
- return legacy_pricing
199
-
200
- return None
201
- except ImportError:
202
- # Fallback to legacy pricing if ModelManager is not available
203
- return self._get_legacy_pricing(provider, model_name)
204
-
205
- def _get_legacy_pricing(self, provider: str, model_name: str) -> Optional[Dict[str, float]]:
206
- """Legacy pricing information for backward compatibility"""
207
- LEGACY_PRICING = {
208
- "openai": {
209
- "gpt-4.1-mini": {"input": 0.4, "output": 1.6},
210
- "gpt-4o": {"input": 5.0, "output": 15.0},
211
- "gpt-4o-mini": {"input": 0.15, "output": 0.6},
212
- "text-embedding-3-small": {"input": 0.02, "output": 0.0},
213
- "text-embedding-3-large": {"input": 0.13, "output": 0.0},
214
- "whisper-1": {"input": 6.0, "output": 0.0},
215
- "tts-1": {"input": 15.0, "output": 0.0},
216
- "tts-1-hd": {"input": 30.0, "output": 0.0},
217
- },
218
- "ollama": {
219
- "default": {"input": 0.0, "output": 0.0}
220
- },
221
- "replicate": {
222
- "black-forest-labs/flux-schnell": {"input": 0.003, "output": 0.0},
223
- "meta/meta-llama-3-8b-instruct": {"input": 0.05, "output": 0.25},
224
- }
225
- }
226
-
227
- provider_pricing = LEGACY_PRICING.get(provider, {})
228
- return provider_pricing.get(model_name) or provider_pricing.get("default")
229
-
230
- def _calculate_cost(
231
- self,
232
- provider: str,
233
- model_name: str,
234
- operation: str,
235
- input_tokens: Optional[int] = None,
236
- output_tokens: Optional[int] = None,
237
- input_units: Optional[float] = None,
238
- output_units: Optional[float] = None
239
- ) -> float:
240
- """Calculate cost for a usage event"""
241
- try:
242
- # Get pricing using unified model manager
243
- model_pricing = self._get_model_pricing(provider, model_name)
244
-
245
- if not model_pricing:
246
- logger.warning(f"No pricing found for {provider}/{model_name}")
247
- return 0.0
248
-
249
- cost = 0.0
250
-
251
- # Token-based pricing (per 1M tokens)
252
- if input_tokens is not None and "input" in model_pricing:
253
- cost += (input_tokens / 1000000) * model_pricing["input"]
254
-
255
- if output_tokens is not None and "output" in model_pricing:
256
- cost += (output_tokens / 1000000) * model_pricing["output"]
257
-
258
- return cost
259
-
260
- except Exception as e:
261
- logger.error(f"Error calculating cost: {e}")
262
- return 0.0
263
-
264
- def get_session_summary(self) -> Dict[str, Any]:
265
- """Get billing summary for current session"""
266
- session_records = [
267
- record for record in self.usage_records
268
- if record.timestamp >= self.session_start
269
- ]
270
-
271
- return self._generate_summary(session_records, "Current Session")
272
-
273
- def get_total_summary(self) -> Dict[str, Any]:
274
- """Get total billing summary"""
275
- return self._generate_summary(self.usage_records, "Total Usage")
276
-
277
- def get_provider_summary(self, provider: Union[str, Provider]) -> Dict[str, Any]:
278
- """Get billing summary for a specific provider"""
279
- if isinstance(provider, Provider):
280
- provider = provider.value
281
-
282
- provider_records = [
283
- record for record in self.usage_records
284
- if record.provider == provider
285
- ]
286
-
287
- return self._generate_summary(provider_records, f"{provider.title()} Usage")
288
-
289
- def _generate_summary(self, records: List[UsageRecord], title: str) -> Dict[str, Any]:
290
- """Generate billing summary from records"""
291
- if not records:
292
- return {
293
- "title": title,
294
- "total_cost": 0.0,
295
- "total_requests": 0,
296
- "providers": {},
297
- "services": {},
298
- "models": {}
299
- }
300
-
301
- total_cost = sum(record.cost_usd or 0 for record in records)
302
- total_requests = len(records)
303
-
304
- # Group by provider
305
- providers = {}
306
- for record in records:
307
- if record.provider not in providers:
308
- providers[record.provider] = {
309
- "cost": 0.0,
310
- "requests": 0,
311
- "models": set()
312
- }
313
- providers[record.provider]["cost"] += record.cost_usd or 0
314
- providers[record.provider]["requests"] += 1
315
- providers[record.provider]["models"].add(record.model_name)
316
-
317
- # Convert sets to lists for JSON serialization
318
- for provider_data in providers.values():
319
- provider_data["models"] = list(provider_data["models"])
320
-
321
- # Group by service type
322
- services = {}
323
- for record in records:
324
- if record.service_type not in services:
325
- services[record.service_type] = {
326
- "cost": 0.0,
327
- "requests": 0
328
- }
329
- services[record.service_type]["cost"] += record.cost_usd or 0
330
- services[record.service_type]["requests"] += 1
331
-
332
- # Group by model
333
- models = {}
334
- for record in records:
335
- model_key = f"{record.provider}/{record.model_name}"
336
- if model_key not in models:
337
- models[model_key] = {
338
- "cost": 0.0,
339
- "requests": 0,
340
- "total_tokens": 0
341
- }
342
- models[model_key]["cost"] += record.cost_usd or 0
343
- models[model_key]["requests"] += 1
344
- if record.total_tokens:
345
- models[model_key]["total_tokens"] += record.total_tokens
346
-
347
- return {
348
- "title": title,
349
- "total_cost": round(total_cost, 6),
350
- "total_requests": total_requests,
351
- "providers": providers,
352
- "services": services,
353
- "models": models,
354
- "period": {
355
- "start": records[0].timestamp if records else None,
356
- "end": records[-1].timestamp if records else None
357
- }
358
- }
359
-
360
- def print_summary(self, summary_type: str = "session"):
361
- """Print billing summary to console"""
362
- if summary_type == "session":
363
- summary = self.get_session_summary()
364
- elif summary_type == "total":
365
- summary = self.get_total_summary()
366
- else:
367
- raise ValueError("summary_type must be 'session' or 'total'")
368
-
369
- print(f"\n💰 {summary['title']} Billing Summary")
370
- print("=" * 50)
371
- print(f"💵 Total Cost: ${summary['total_cost']:.6f}")
372
- print(f"📊 Total Requests: {summary['total_requests']}")
373
-
374
- if summary['providers']:
375
- print("\n📈 By Provider:")
376
- for provider, data in summary['providers'].items():
377
- print(f" {provider}: ${data['cost']:.6f} ({data['requests']} requests)")
378
-
379
- if summary['services']:
380
- print("\n🔧 By Service:")
381
- for service, data in summary['services'].items():
382
- print(f" {service}: ${data['cost']:.6f} ({data['requests']} requests)")
383
-
384
- if summary['models']:
385
- print("\n🤖 By Model:")
386
- for model, data in summary['models'].items():
387
- tokens_info = f" ({data['total_tokens']} tokens)" if data['total_tokens'] > 0 else ""
388
- print(f" {model}: ${data['cost']:.6f} ({data['requests']} requests){tokens_info}")
389
-
390
- # Global billing tracker instance
391
- _global_tracker: Optional[BillingTracker] = None
392
-
393
- def get_billing_tracker() -> BillingTracker:
394
- """Get the global billing tracker instance"""
395
- global _global_tracker
396
- if _global_tracker is None:
397
- _global_tracker = BillingTracker()
398
- return _global_tracker
399
-
400
- def track_usage(**kwargs) -> UsageRecord:
401
- """Convenience function to track usage"""
402
- return get_billing_tracker().track_usage(**kwargs)
403
-
404
- def print_billing_summary(summary_type: str = "session"):
405
- """Convenience function to print billing summary"""
406
- get_billing_tracker().print_summary(summary_type)