isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,460 +0,0 @@
1
- """
2
- Multimodal Dataset Support for ISA Model evaluation framework.
3
-
4
- Provides dataset loaders for:
5
- - VQA v2.0 (Visual Question Answering)
6
- - COCO Captions (Image Captioning)
7
- - DocVQA (Document Visual Question Answering)
8
- - Audio datasets (LibriSpeech, Common Voice)
9
- """
10
-
11
- import os
12
- import json
13
- import logging
14
- import requests
15
- import zipfile
16
- from pathlib import Path
17
- from typing import Dict, List, Any, Optional, Union
18
- import pandas as pd
19
- from PIL import Image
20
- import base64
21
- from io import BytesIO
22
-
23
- logger = logging.getLogger(__name__)
24
-
25
-
26
- class MultimodalDatasetDownloader:
27
- """Utility class for downloading multimodal datasets."""
28
-
29
- def __init__(self, cache_dir: str = "~/.isa_model/multimodal_datasets"):
30
- self.cache_dir = Path(cache_dir).expanduser()
31
- self.cache_dir.mkdir(parents=True, exist_ok=True)
32
-
33
- self.dataset_info = {
34
- "vqa_v2": {
35
- "annotations_url": "https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Val_mscoco.zip",
36
- "questions_url": "https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Val_mscoco.zip",
37
- "images_url": "http://images.cocodataset.org/zips/val2014.zip",
38
- "description": "VQA v2.0 validation set"
39
- },
40
- "coco_captions": {
41
- "annotations_url": "http://images.cocodataset.org/annotations/annotations_trainval2014.zip",
42
- "images_url": "http://images.cocodataset.org/zips/val2014.zip",
43
- "description": "COCO Captions validation set"
44
- },
45
- "docvqa": {
46
- "url": "https://datasets.cvc.uab.es/rrc/DocVQA/train.tar.gz",
47
- "description": "DocVQA training set"
48
- },
49
- "librispeech": {
50
- "url": "http://www.openslr.org/resources/12/test-clean.tar.gz",
51
- "description": "LibriSpeech test-clean set"
52
- }
53
- }
54
-
55
- def download_dataset(self, dataset_name: str, subset: str = "val", force_download: bool = False) -> Optional[Path]:
56
- """Download and cache a multimodal dataset."""
57
- if dataset_name not in self.dataset_info:
58
- raise ValueError(f"Unknown dataset: {dataset_name}")
59
-
60
- dataset_dir = self.cache_dir / dataset_name
61
- dataset_dir.mkdir(exist_ok=True)
62
-
63
- try:
64
- if dataset_name == "vqa_v2":
65
- return self._download_vqa_v2(dataset_dir, force_download)
66
- elif dataset_name == "coco_captions":
67
- return self._download_coco_captions(dataset_dir, force_download)
68
- elif dataset_name == "docvqa":
69
- return self._download_docvqa(dataset_dir, force_download)
70
- elif dataset_name == "librispeech":
71
- return self._download_librispeech(dataset_dir, force_download)
72
- except Exception as e:
73
- logger.error(f"Failed to download {dataset_name}: {e}")
74
- return None
75
-
76
- def _download_vqa_v2(self, dataset_dir: Path, force_download: bool) -> Path:
77
- """Download VQA v2.0 dataset."""
78
- annotations_file = dataset_dir / "v2_mscoco_val2014_annotations.json"
79
- questions_file = dataset_dir / "v2_OpenEnded_mscoco_val2014_questions.json"
80
-
81
- if annotations_file.exists() and questions_file.exists() and not force_download:
82
- logger.info("Using cached VQA v2.0 dataset")
83
- return dataset_dir
84
-
85
- info = self.dataset_info["vqa_v2"]
86
-
87
- # Download annotations
88
- if not annotations_file.exists() or force_download:
89
- logger.info("Downloading VQA v2.0 annotations")
90
- self._download_and_extract(info["annotations_url"], dataset_dir)
91
-
92
- # Download questions
93
- if not questions_file.exists() or force_download:
94
- logger.info("Downloading VQA v2.0 questions")
95
- self._download_and_extract(info["questions_url"], dataset_dir)
96
-
97
- return dataset_dir
98
-
99
- def _download_coco_captions(self, dataset_dir: Path, force_download: bool) -> Path:
100
- """Download COCO Captions dataset."""
101
- captions_file = dataset_dir / "annotations" / "captions_val2014.json"
102
-
103
- if captions_file.exists() and not force_download:
104
- logger.info("Using cached COCO Captions dataset")
105
- return dataset_dir
106
-
107
- info = self.dataset_info["coco_captions"]
108
-
109
- # Download annotations
110
- logger.info("Downloading COCO Captions annotations")
111
- self._download_and_extract(info["annotations_url"], dataset_dir)
112
-
113
- return dataset_dir
114
-
115
- def _download_docvqa(self, dataset_dir: Path, force_download: bool) -> Path:
116
- """Download DocVQA dataset (placeholder implementation)."""
117
- # This would require actual DocVQA dataset access
118
- logger.warning("DocVQA dataset download not implemented - using placeholder")
119
- return dataset_dir
120
-
121
- def _download_librispeech(self, dataset_dir: Path, force_download: bool) -> Path:
122
- """Download LibriSpeech dataset (placeholder implementation)."""
123
- # This would require actual LibriSpeech dataset download
124
- logger.warning("LibriSpeech dataset download not implemented - using placeholder")
125
- return dataset_dir
126
-
127
- def _download_and_extract(self, url: str, extract_dir: Path):
128
- """Download and extract a file."""
129
- filename = url.split('/')[-1]
130
- file_path = extract_dir / filename
131
-
132
- # Download
133
- response = requests.get(url, stream=True)
134
- response.raise_for_status()
135
-
136
- with open(file_path, 'wb') as f:
137
- for chunk in response.iter_content(chunk_size=8192):
138
- f.write(chunk)
139
-
140
- # Extract
141
- if filename.endswith('.zip'):
142
- with zipfile.ZipFile(file_path, 'r') as zip_ref:
143
- zip_ref.extractall(extract_dir)
144
- elif filename.endswith('.tar.gz'):
145
- import tarfile
146
- with tarfile.open(file_path, 'r:gz') as tar_ref:
147
- tar_ref.extractall(extract_dir)
148
-
149
- # Clean up archive file
150
- file_path.unlink()
151
-
152
-
153
- class VQAv2Dataset:
154
- """VQA v2.0 Dataset loader."""
155
-
156
- def __init__(self, cache_dir: str = "~/.isa_model/multimodal_datasets"):
157
- self.downloader = MultimodalDatasetDownloader(cache_dir)
158
- self.dataset_dir = None
159
- self.annotations = None
160
- self.questions = None
161
-
162
- def load_data(self, max_samples: Optional[int] = None, use_real_data: bool = True) -> List[Dict[str, Any]]:
163
- """Load VQA v2.0 data."""
164
- if use_real_data:
165
- try:
166
- return self._load_real_data(max_samples)
167
- except Exception as e:
168
- logger.warning(f"Failed to load real VQA data: {e}. Using placeholder data.")
169
- return self._load_placeholder_data(max_samples)
170
- else:
171
- return self._load_placeholder_data(max_samples)
172
-
173
- def _load_real_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
174
- """Load real VQA v2.0 data."""
175
- self.dataset_dir = self.downloader.download_dataset("vqa_v2")
176
- if not self.dataset_dir:
177
- raise FileNotFoundError("VQA v2.0 dataset not found")
178
-
179
- # Load annotations and questions
180
- annotations_file = self.dataset_dir / "v2_mscoco_val2014_annotations.json"
181
- questions_file = self.dataset_dir / "v2_OpenEnded_mscoco_val2014_questions.json"
182
-
183
- with open(annotations_file, 'r') as f:
184
- annotations_data = json.load(f)
185
-
186
- with open(questions_file, 'r') as f:
187
- questions_data = json.load(f)
188
-
189
- # Create question_id -> annotation mapping
190
- annotations_dict = {ann['question_id']: ann for ann in annotations_data['annotations']}
191
-
192
- data = []
193
- for i, question in enumerate(questions_data['questions']):
194
- if max_samples and i >= max_samples:
195
- break
196
-
197
- question_id = question['question_id']
198
- if question_id in annotations_dict:
199
- annotation = annotations_dict[question_id]
200
-
201
- # Get the most common answer
202
- answers = [ans['answer'] for ans in annotation['answers']]
203
- most_common_answer = max(set(answers), key=answers.count)
204
-
205
- sample = {
206
- "image_id": question['image_id'],
207
- "question": question['question'],
208
- "expected_output": most_common_answer,
209
- "task_type": "vqa",
210
- "id": f"vqa_{question_id}",
211
- "image": f"COCO_val2014_{question['image_id']:012d}.jpg" # COCO image filename format
212
- }
213
- data.append(sample)
214
-
215
- logger.info(f"Loaded {len(data)} real VQA v2.0 samples")
216
- return data
217
-
218
- def _load_placeholder_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
219
- """Load placeholder VQA data."""
220
- sample_questions = [
221
- {"question": "What color is the cat?", "answer": "orange"},
222
- {"question": "How many people are in the image?", "answer": "3"},
223
- {"question": "What is the weather like?", "answer": "sunny"},
224
- {"question": "What vehicle is shown?", "answer": "car"},
225
- {"question": "What room is this?", "answer": "kitchen"}
226
- ]
227
-
228
- data = []
229
- for i, item in enumerate(sample_questions):
230
- if max_samples and i >= max_samples:
231
- break
232
-
233
- sample = {
234
- "image_id": f"placeholder_{i}",
235
- "question": item["question"],
236
- "expected_output": item["answer"],
237
- "task_type": "vqa",
238
- "id": f"vqa_placeholder_{i}",
239
- "image": None # Placeholder - no actual image
240
- }
241
- data.append(sample)
242
-
243
- logger.info(f"Loaded {len(data)} placeholder VQA samples")
244
- return data
245
-
246
-
247
- class COCOCaptionsDataset:
248
- """COCO Captions Dataset loader."""
249
-
250
- def __init__(self, cache_dir: str = "~/.isa_model/multimodal_datasets"):
251
- self.downloader = MultimodalDatasetDownloader(cache_dir)
252
- self.dataset_dir = None
253
-
254
- def load_data(self, max_samples: Optional[int] = None, use_real_data: bool = True) -> List[Dict[str, Any]]:
255
- """Load COCO Captions data."""
256
- if use_real_data:
257
- try:
258
- return self._load_real_data(max_samples)
259
- except Exception as e:
260
- logger.warning(f"Failed to load real COCO Captions data: {e}. Using placeholder data.")
261
- return self._load_placeholder_data(max_samples)
262
- else:
263
- return self._load_placeholder_data(max_samples)
264
-
265
- def _load_real_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
266
- """Load real COCO Captions data."""
267
- self.dataset_dir = self.downloader.download_dataset("coco_captions")
268
- if not self.dataset_dir:
269
- raise FileNotFoundError("COCO Captions dataset not found")
270
-
271
- # Load captions
272
- captions_file = self.dataset_dir / "annotations" / "captions_val2014.json"
273
-
274
- with open(captions_file, 'r') as f:
275
- captions_data = json.load(f)
276
-
277
- # Group captions by image_id
278
- image_captions = {}
279
- for annotation in captions_data['annotations']:
280
- image_id = annotation['image_id']
281
- if image_id not in image_captions:
282
- image_captions[image_id] = []
283
- image_captions[image_id].append(annotation['caption'])
284
-
285
- data = []
286
- for i, (image_id, captions) in enumerate(image_captions.items()):
287
- if max_samples and i >= max_samples:
288
- break
289
-
290
- # Use the first caption as the expected output
291
- sample = {
292
- "image_id": image_id,
293
- "expected_output": captions[0],
294
- "all_captions": captions, # Keep all captions for evaluation
295
- "task_type": "caption",
296
- "prompt": "Generate a detailed caption describing this image.",
297
- "id": f"coco_caption_{image_id}",
298
- "image": f"COCO_val2014_{image_id:012d}.jpg"
299
- }
300
- data.append(sample)
301
-
302
- logger.info(f"Loaded {len(data)} real COCO Captions samples")
303
- return data
304
-
305
- def _load_placeholder_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
306
- """Load placeholder captions data."""
307
- sample_captions = [
308
- "A cat sitting on a windowsill looking outside",
309
- "Three people walking in a park on a sunny day",
310
- "A red car parked on a city street",
311
- "A kitchen with modern appliances and granite countertops",
312
- "A dog playing fetch in a grassy field"
313
- ]
314
-
315
- data = []
316
- for i, caption in enumerate(sample_captions):
317
- if max_samples and i >= max_samples:
318
- break
319
-
320
- sample = {
321
- "image_id": f"placeholder_{i}",
322
- "expected_output": caption,
323
- "task_type": "caption",
324
- "prompt": "Generate a detailed caption describing this image.",
325
- "id": f"coco_caption_placeholder_{i}",
326
- "image": None # Placeholder - no actual image
327
- }
328
- data.append(sample)
329
-
330
- logger.info(f"Loaded {len(data)} placeholder caption samples")
331
- return data
332
-
333
-
334
- class DocVQADataset:
335
- """DocVQA Dataset loader."""
336
-
337
- def __init__(self, cache_dir: str = "~/.isa_model/multimodal_datasets"):
338
- self.downloader = MultimodalDatasetDownloader(cache_dir)
339
- self.dataset_dir = None
340
-
341
- def load_data(self, max_samples: Optional[int] = None, use_real_data: bool = False) -> List[Dict[str, Any]]:
342
- """Load DocVQA data (currently placeholder only)."""
343
- # For now, only placeholder data since DocVQA requires special access
344
- return self._load_placeholder_data(max_samples)
345
-
346
- def _load_placeholder_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
347
- """Load placeholder DocVQA data."""
348
- sample_doc_questions = [
349
- {"question": "What is the title of this document?", "answer": "Annual Report 2023"},
350
- {"question": "Who is the author?", "answer": "John Smith"},
351
- {"question": "What is the total revenue?", "answer": "$1.2 million"},
352
- {"question": "How many pages does this document have?", "answer": "45"},
353
- {"question": "What year was this published?", "answer": "2023"}
354
- ]
355
-
356
- data = []
357
- for i, item in enumerate(sample_doc_questions):
358
- if max_samples and i >= max_samples:
359
- break
360
-
361
- sample = {
362
- "document_id": f"doc_{i}",
363
- "question": item["question"],
364
- "expected_output": item["answer"],
365
- "task_type": "document_vqa",
366
- "id": f"docvqa_placeholder_{i}",
367
- "image": None # Placeholder - no actual document image
368
- }
369
- data.append(sample)
370
-
371
- logger.info(f"Loaded {len(data)} placeholder DocVQA samples")
372
- return data
373
-
374
-
375
- class AudioDatasetLoader:
376
- """Audio dataset loader for speech tasks."""
377
-
378
- def __init__(self, cache_dir: str = "~/.isa_model/multimodal_datasets"):
379
- self.downloader = MultimodalDatasetDownloader(cache_dir)
380
-
381
- def load_librispeech_data(self, max_samples: Optional[int] = None, use_real_data: bool = False) -> List[Dict[str, Any]]:
382
- """Load LibriSpeech data (currently placeholder only)."""
383
- return self._load_placeholder_speech_data(max_samples)
384
-
385
- def _load_placeholder_speech_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
386
- """Load placeholder speech data."""
387
- sample_transcripts = [
388
- "The quick brown fox jumps over the lazy dog",
389
- "Machine learning is transforming artificial intelligence",
390
- "Natural language processing enables computers to understand human speech",
391
- "Deep learning models require large amounts of training data",
392
- "Speech recognition technology has improved significantly in recent years"
393
- ]
394
-
395
- data = []
396
- for i, transcript in enumerate(sample_transcripts):
397
- if max_samples and i >= max_samples:
398
- break
399
-
400
- sample = {
401
- "audio_id": f"speech_{i}",
402
- "expected_output": transcript,
403
- "task_type": "stt",
404
- "id": f"librispeech_placeholder_{i}",
405
- "audio": None, # Placeholder - no actual audio file
406
- "metadata": {
407
- "speaker": f"speaker_{i % 3}",
408
- "gender": "male" if i % 2 == 0 else "female",
409
- "duration": 3.5 + i * 0.5
410
- }
411
- }
412
- data.append(sample)
413
-
414
- logger.info(f"Loaded {len(data)} placeholder speech samples")
415
- return data
416
-
417
- def load_emotion_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
418
- """Load placeholder emotion recognition data."""
419
- emotions = ["happy", "sad", "angry", "neutral", "surprised"]
420
-
421
- data = []
422
- for i in range(min(max_samples or 20, 20)):
423
- emotion = emotions[i % len(emotions)]
424
-
425
- sample = {
426
- "audio_id": f"emotion_{i}",
427
- "expected_output": emotion,
428
- "task_type": "emotion",
429
- "id": f"emotion_placeholder_{i}",
430
- "audio": None, # Placeholder - no actual audio file
431
- "metadata": {
432
- "speaker": f"speaker_{i % 5}",
433
- "intensity": "medium"
434
- }
435
- }
436
- data.append(sample)
437
-
438
- logger.info(f"Loaded {len(data)} placeholder emotion samples")
439
- return data
440
-
441
-
442
- # Convenience functions
443
- def create_vqa_dataset(cache_dir: str = "~/.isa_model/multimodal_datasets") -> VQAv2Dataset:
444
- """Create VQA v2.0 dataset instance."""
445
- return VQAv2Dataset(cache_dir)
446
-
447
-
448
- def create_coco_captions_dataset(cache_dir: str = "~/.isa_model/multimodal_datasets") -> COCOCaptionsDataset:
449
- """Create COCO Captions dataset instance."""
450
- return COCOCaptionsDataset(cache_dir)
451
-
452
-
453
- def create_docvqa_dataset(cache_dir: str = "~/.isa_model/multimodal_datasets") -> DocVQADataset:
454
- """Create DocVQA dataset instance."""
455
- return DocVQADataset(cache_dir)
456
-
457
-
458
- def create_audio_dataset_loader(cache_dir: str = "~/.isa_model/multimodal_datasets") -> AudioDatasetLoader:
459
- """Create audio dataset loader instance."""
460
- return AudioDatasetLoader(cache_dir)