isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,460 +0,0 @@
1
- """
2
- Multimodal Dataset Support for ISA Model evaluation framework.
3
-
4
- Provides dataset loaders for:
5
- - VQA v2.0 (Visual Question Answering)
6
- - COCO Captions (Image Captioning)
7
- - DocVQA (Document Visual Question Answering)
8
- - Audio datasets (LibriSpeech, Common Voice)
9
- """
10
-
11
- import os
12
- import json
13
- import logging
14
- import requests
15
- import zipfile
16
- from pathlib import Path
17
- from typing import Dict, List, Any, Optional, Union
18
- import pandas as pd
19
- from PIL import Image
20
- import base64
21
- from io import BytesIO
22
-
23
- logger = logging.getLogger(__name__)
24
-
25
-
26
- class MultimodalDatasetDownloader:
27
- """Utility class for downloading multimodal datasets."""
28
-
29
- def __init__(self, cache_dir: str = "~/.isa_model/multimodal_datasets"):
30
- self.cache_dir = Path(cache_dir).expanduser()
31
- self.cache_dir.mkdir(parents=True, exist_ok=True)
32
-
33
- self.dataset_info = {
34
- "vqa_v2": {
35
- "annotations_url": "https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Val_mscoco.zip",
36
- "questions_url": "https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Val_mscoco.zip",
37
- "images_url": "http://images.cocodataset.org/zips/val2014.zip",
38
- "description": "VQA v2.0 validation set"
39
- },
40
- "coco_captions": {
41
- "annotations_url": "http://images.cocodataset.org/annotations/annotations_trainval2014.zip",
42
- "images_url": "http://images.cocodataset.org/zips/val2014.zip",
43
- "description": "COCO Captions validation set"
44
- },
45
- "docvqa": {
46
- "url": "https://datasets.cvc.uab.es/rrc/DocVQA/train.tar.gz",
47
- "description": "DocVQA training set"
48
- },
49
- "librispeech": {
50
- "url": "http://www.openslr.org/resources/12/test-clean.tar.gz",
51
- "description": "LibriSpeech test-clean set"
52
- }
53
- }
54
-
55
- def download_dataset(self, dataset_name: str, subset: str = "val", force_download: bool = False) -> Optional[Path]:
56
- """Download and cache a multimodal dataset."""
57
- if dataset_name not in self.dataset_info:
58
- raise ValueError(f"Unknown dataset: {dataset_name}")
59
-
60
- dataset_dir = self.cache_dir / dataset_name
61
- dataset_dir.mkdir(exist_ok=True)
62
-
63
- try:
64
- if dataset_name == "vqa_v2":
65
- return self._download_vqa_v2(dataset_dir, force_download)
66
- elif dataset_name == "coco_captions":
67
- return self._download_coco_captions(dataset_dir, force_download)
68
- elif dataset_name == "docvqa":
69
- return self._download_docvqa(dataset_dir, force_download)
70
- elif dataset_name == "librispeech":
71
- return self._download_librispeech(dataset_dir, force_download)
72
- except Exception as e:
73
- logger.error(f"Failed to download {dataset_name}: {e}")
74
- return None
75
-
76
- def _download_vqa_v2(self, dataset_dir: Path, force_download: bool) -> Path:
77
- """Download VQA v2.0 dataset."""
78
- annotations_file = dataset_dir / "v2_mscoco_val2014_annotations.json"
79
- questions_file = dataset_dir / "v2_OpenEnded_mscoco_val2014_questions.json"
80
-
81
- if annotations_file.exists() and questions_file.exists() and not force_download:
82
- logger.info("Using cached VQA v2.0 dataset")
83
- return dataset_dir
84
-
85
- info = self.dataset_info["vqa_v2"]
86
-
87
- # Download annotations
88
- if not annotations_file.exists() or force_download:
89
- logger.info("Downloading VQA v2.0 annotations")
90
- self._download_and_extract(info["annotations_url"], dataset_dir)
91
-
92
- # Download questions
93
- if not questions_file.exists() or force_download:
94
- logger.info("Downloading VQA v2.0 questions")
95
- self._download_and_extract(info["questions_url"], dataset_dir)
96
-
97
- return dataset_dir
98
-
99
- def _download_coco_captions(self, dataset_dir: Path, force_download: bool) -> Path:
100
- """Download COCO Captions dataset."""
101
- captions_file = dataset_dir / "annotations" / "captions_val2014.json"
102
-
103
- if captions_file.exists() and not force_download:
104
- logger.info("Using cached COCO Captions dataset")
105
- return dataset_dir
106
-
107
- info = self.dataset_info["coco_captions"]
108
-
109
- # Download annotations
110
- logger.info("Downloading COCO Captions annotations")
111
- self._download_and_extract(info["annotations_url"], dataset_dir)
112
-
113
- return dataset_dir
114
-
115
- def _download_docvqa(self, dataset_dir: Path, force_download: bool) -> Path:
116
- """Download DocVQA dataset (placeholder implementation)."""
117
- # This would require actual DocVQA dataset access
118
- logger.warning("DocVQA dataset download not implemented - using placeholder")
119
- return dataset_dir
120
-
121
- def _download_librispeech(self, dataset_dir: Path, force_download: bool) -> Path:
122
- """Download LibriSpeech dataset (placeholder implementation)."""
123
- # This would require actual LibriSpeech dataset download
124
- logger.warning("LibriSpeech dataset download not implemented - using placeholder")
125
- return dataset_dir
126
-
127
- def _download_and_extract(self, url: str, extract_dir: Path):
128
- """Download and extract a file."""
129
- filename = url.split('/')[-1]
130
- file_path = extract_dir / filename
131
-
132
- # Download
133
- response = requests.get(url, stream=True)
134
- response.raise_for_status()
135
-
136
- with open(file_path, 'wb') as f:
137
- for chunk in response.iter_content(chunk_size=8192):
138
- f.write(chunk)
139
-
140
- # Extract
141
- if filename.endswith('.zip'):
142
- with zipfile.ZipFile(file_path, 'r') as zip_ref:
143
- zip_ref.extractall(extract_dir)
144
- elif filename.endswith('.tar.gz'):
145
- import tarfile
146
- with tarfile.open(file_path, 'r:gz') as tar_ref:
147
- tar_ref.extractall(extract_dir)
148
-
149
- # Clean up archive file
150
- file_path.unlink()
151
-
152
-
153
- class VQAv2Dataset:
154
- """VQA v2.0 Dataset loader."""
155
-
156
- def __init__(self, cache_dir: str = "~/.isa_model/multimodal_datasets"):
157
- self.downloader = MultimodalDatasetDownloader(cache_dir)
158
- self.dataset_dir = None
159
- self.annotations = None
160
- self.questions = None
161
-
162
- def load_data(self, max_samples: Optional[int] = None, use_real_data: bool = True) -> List[Dict[str, Any]]:
163
- """Load VQA v2.0 data."""
164
- if use_real_data:
165
- try:
166
- return self._load_real_data(max_samples)
167
- except Exception as e:
168
- logger.warning(f"Failed to load real VQA data: {e}. Using placeholder data.")
169
- return self._load_placeholder_data(max_samples)
170
- else:
171
- return self._load_placeholder_data(max_samples)
172
-
173
- def _load_real_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
174
- """Load real VQA v2.0 data."""
175
- self.dataset_dir = self.downloader.download_dataset("vqa_v2")
176
- if not self.dataset_dir:
177
- raise FileNotFoundError("VQA v2.0 dataset not found")
178
-
179
- # Load annotations and questions
180
- annotations_file = self.dataset_dir / "v2_mscoco_val2014_annotations.json"
181
- questions_file = self.dataset_dir / "v2_OpenEnded_mscoco_val2014_questions.json"
182
-
183
- with open(annotations_file, 'r') as f:
184
- annotations_data = json.load(f)
185
-
186
- with open(questions_file, 'r') as f:
187
- questions_data = json.load(f)
188
-
189
- # Create question_id -> annotation mapping
190
- annotations_dict = {ann['question_id']: ann for ann in annotations_data['annotations']}
191
-
192
- data = []
193
- for i, question in enumerate(questions_data['questions']):
194
- if max_samples and i >= max_samples:
195
- break
196
-
197
- question_id = question['question_id']
198
- if question_id in annotations_dict:
199
- annotation = annotations_dict[question_id]
200
-
201
- # Get the most common answer
202
- answers = [ans['answer'] for ans in annotation['answers']]
203
- most_common_answer = max(set(answers), key=answers.count)
204
-
205
- sample = {
206
- "image_id": question['image_id'],
207
- "question": question['question'],
208
- "expected_output": most_common_answer,
209
- "task_type": "vqa",
210
- "id": f"vqa_{question_id}",
211
- "image": f"COCO_val2014_{question['image_id']:012d}.jpg" # COCO image filename format
212
- }
213
- data.append(sample)
214
-
215
- logger.info(f"Loaded {len(data)} real VQA v2.0 samples")
216
- return data
217
-
218
- def _load_placeholder_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
219
- """Load placeholder VQA data."""
220
- sample_questions = [
221
- {"question": "What color is the cat?", "answer": "orange"},
222
- {"question": "How many people are in the image?", "answer": "3"},
223
- {"question": "What is the weather like?", "answer": "sunny"},
224
- {"question": "What vehicle is shown?", "answer": "car"},
225
- {"question": "What room is this?", "answer": "kitchen"}
226
- ]
227
-
228
- data = []
229
- for i, item in enumerate(sample_questions):
230
- if max_samples and i >= max_samples:
231
- break
232
-
233
- sample = {
234
- "image_id": f"placeholder_{i}",
235
- "question": item["question"],
236
- "expected_output": item["answer"],
237
- "task_type": "vqa",
238
- "id": f"vqa_placeholder_{i}",
239
- "image": None # Placeholder - no actual image
240
- }
241
- data.append(sample)
242
-
243
- logger.info(f"Loaded {len(data)} placeholder VQA samples")
244
- return data
245
-
246
-
247
- class COCOCaptionsDataset:
248
- """COCO Captions Dataset loader."""
249
-
250
- def __init__(self, cache_dir: str = "~/.isa_model/multimodal_datasets"):
251
- self.downloader = MultimodalDatasetDownloader(cache_dir)
252
- self.dataset_dir = None
253
-
254
- def load_data(self, max_samples: Optional[int] = None, use_real_data: bool = True) -> List[Dict[str, Any]]:
255
- """Load COCO Captions data."""
256
- if use_real_data:
257
- try:
258
- return self._load_real_data(max_samples)
259
- except Exception as e:
260
- logger.warning(f"Failed to load real COCO Captions data: {e}. Using placeholder data.")
261
- return self._load_placeholder_data(max_samples)
262
- else:
263
- return self._load_placeholder_data(max_samples)
264
-
265
- def _load_real_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
266
- """Load real COCO Captions data."""
267
- self.dataset_dir = self.downloader.download_dataset("coco_captions")
268
- if not self.dataset_dir:
269
- raise FileNotFoundError("COCO Captions dataset not found")
270
-
271
- # Load captions
272
- captions_file = self.dataset_dir / "annotations" / "captions_val2014.json"
273
-
274
- with open(captions_file, 'r') as f:
275
- captions_data = json.load(f)
276
-
277
- # Group captions by image_id
278
- image_captions = {}
279
- for annotation in captions_data['annotations']:
280
- image_id = annotation['image_id']
281
- if image_id not in image_captions:
282
- image_captions[image_id] = []
283
- image_captions[image_id].append(annotation['caption'])
284
-
285
- data = []
286
- for i, (image_id, captions) in enumerate(image_captions.items()):
287
- if max_samples and i >= max_samples:
288
- break
289
-
290
- # Use the first caption as the expected output
291
- sample = {
292
- "image_id": image_id,
293
- "expected_output": captions[0],
294
- "all_captions": captions, # Keep all captions for evaluation
295
- "task_type": "caption",
296
- "prompt": "Generate a detailed caption describing this image.",
297
- "id": f"coco_caption_{image_id}",
298
- "image": f"COCO_val2014_{image_id:012d}.jpg"
299
- }
300
- data.append(sample)
301
-
302
- logger.info(f"Loaded {len(data)} real COCO Captions samples")
303
- return data
304
-
305
- def _load_placeholder_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
306
- """Load placeholder captions data."""
307
- sample_captions = [
308
- "A cat sitting on a windowsill looking outside",
309
- "Three people walking in a park on a sunny day",
310
- "A red car parked on a city street",
311
- "A kitchen with modern appliances and granite countertops",
312
- "A dog playing fetch in a grassy field"
313
- ]
314
-
315
- data = []
316
- for i, caption in enumerate(sample_captions):
317
- if max_samples and i >= max_samples:
318
- break
319
-
320
- sample = {
321
- "image_id": f"placeholder_{i}",
322
- "expected_output": caption,
323
- "task_type": "caption",
324
- "prompt": "Generate a detailed caption describing this image.",
325
- "id": f"coco_caption_placeholder_{i}",
326
- "image": None # Placeholder - no actual image
327
- }
328
- data.append(sample)
329
-
330
- logger.info(f"Loaded {len(data)} placeholder caption samples")
331
- return data
332
-
333
-
334
- class DocVQADataset:
335
- """DocVQA Dataset loader."""
336
-
337
- def __init__(self, cache_dir: str = "~/.isa_model/multimodal_datasets"):
338
- self.downloader = MultimodalDatasetDownloader(cache_dir)
339
- self.dataset_dir = None
340
-
341
- def load_data(self, max_samples: Optional[int] = None, use_real_data: bool = False) -> List[Dict[str, Any]]:
342
- """Load DocVQA data (currently placeholder only)."""
343
- # For now, only placeholder data since DocVQA requires special access
344
- return self._load_placeholder_data(max_samples)
345
-
346
- def _load_placeholder_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
347
- """Load placeholder DocVQA data."""
348
- sample_doc_questions = [
349
- {"question": "What is the title of this document?", "answer": "Annual Report 2023"},
350
- {"question": "Who is the author?", "answer": "John Smith"},
351
- {"question": "What is the total revenue?", "answer": "$1.2 million"},
352
- {"question": "How many pages does this document have?", "answer": "45"},
353
- {"question": "What year was this published?", "answer": "2023"}
354
- ]
355
-
356
- data = []
357
- for i, item in enumerate(sample_doc_questions):
358
- if max_samples and i >= max_samples:
359
- break
360
-
361
- sample = {
362
- "document_id": f"doc_{i}",
363
- "question": item["question"],
364
- "expected_output": item["answer"],
365
- "task_type": "document_vqa",
366
- "id": f"docvqa_placeholder_{i}",
367
- "image": None # Placeholder - no actual document image
368
- }
369
- data.append(sample)
370
-
371
- logger.info(f"Loaded {len(data)} placeholder DocVQA samples")
372
- return data
373
-
374
-
375
- class AudioDatasetLoader:
376
- """Audio dataset loader for speech tasks."""
377
-
378
- def __init__(self, cache_dir: str = "~/.isa_model/multimodal_datasets"):
379
- self.downloader = MultimodalDatasetDownloader(cache_dir)
380
-
381
- def load_librispeech_data(self, max_samples: Optional[int] = None, use_real_data: bool = False) -> List[Dict[str, Any]]:
382
- """Load LibriSpeech data (currently placeholder only)."""
383
- return self._load_placeholder_speech_data(max_samples)
384
-
385
- def _load_placeholder_speech_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
386
- """Load placeholder speech data."""
387
- sample_transcripts = [
388
- "The quick brown fox jumps over the lazy dog",
389
- "Machine learning is transforming artificial intelligence",
390
- "Natural language processing enables computers to understand human speech",
391
- "Deep learning models require large amounts of training data",
392
- "Speech recognition technology has improved significantly in recent years"
393
- ]
394
-
395
- data = []
396
- for i, transcript in enumerate(sample_transcripts):
397
- if max_samples and i >= max_samples:
398
- break
399
-
400
- sample = {
401
- "audio_id": f"speech_{i}",
402
- "expected_output": transcript,
403
- "task_type": "stt",
404
- "id": f"librispeech_placeholder_{i}",
405
- "audio": None, # Placeholder - no actual audio file
406
- "metadata": {
407
- "speaker": f"speaker_{i % 3}",
408
- "gender": "male" if i % 2 == 0 else "female",
409
- "duration": 3.5 + i * 0.5
410
- }
411
- }
412
- data.append(sample)
413
-
414
- logger.info(f"Loaded {len(data)} placeholder speech samples")
415
- return data
416
-
417
- def load_emotion_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
418
- """Load placeholder emotion recognition data."""
419
- emotions = ["happy", "sad", "angry", "neutral", "surprised"]
420
-
421
- data = []
422
- for i in range(min(max_samples or 20, 20)):
423
- emotion = emotions[i % len(emotions)]
424
-
425
- sample = {
426
- "audio_id": f"emotion_{i}",
427
- "expected_output": emotion,
428
- "task_type": "emotion",
429
- "id": f"emotion_placeholder_{i}",
430
- "audio": None, # Placeholder - no actual audio file
431
- "metadata": {
432
- "speaker": f"speaker_{i % 5}",
433
- "intensity": "medium"
434
- }
435
- }
436
- data.append(sample)
437
-
438
- logger.info(f"Loaded {len(data)} placeholder emotion samples")
439
- return data
440
-
441
-
442
- # Convenience functions
443
- def create_vqa_dataset(cache_dir: str = "~/.isa_model/multimodal_datasets") -> VQAv2Dataset:
444
- """Create VQA v2.0 dataset instance."""
445
- return VQAv2Dataset(cache_dir)
446
-
447
-
448
- def create_coco_captions_dataset(cache_dir: str = "~/.isa_model/multimodal_datasets") -> COCOCaptionsDataset:
449
- """Create COCO Captions dataset instance."""
450
- return COCOCaptionsDataset(cache_dir)
451
-
452
-
453
- def create_docvqa_dataset(cache_dir: str = "~/.isa_model/multimodal_datasets") -> DocVQADataset:
454
- """Create DocVQA dataset instance."""
455
- return DocVQADataset(cache_dir)
456
-
457
-
458
- def create_audio_dataset_loader(cache_dir: str = "~/.isa_model/multimodal_datasets") -> AudioDatasetLoader:
459
- """Create audio dataset loader instance."""
460
- return AudioDatasetLoader(cache_dir)