mirage-benchmark 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mirage-benchmark might be problematic. Click here for more details.

@@ -0,0 +1,31 @@
1
+ """
2
+ Utilities module for MiRAGE - Preflight checks, statistics, and ablation studies.
3
+
4
+ Imports are lazy to avoid loading optional dependencies at import time.
5
+ """
6
+
7
+ _LAZY_IMPORTS = {
8
+ # Preflight
9
+ "run_preflight_checks": ("preflight", "run_preflight_checks"),
10
+ "check_gpu_availability": ("preflight", "check_gpu_availability"),
11
+ "check_api_connectivity": ("preflight", "check_api_connectivity"),
12
+ "PreflightChecker": ("preflight", "PreflightChecker"),
13
+ # Statistics
14
+ "compute_dataset_stats": ("stats", "compute_dataset_stats"),
15
+ "print_dataset_stats": ("stats", "print_dataset_stats"),
16
+ "compute_qa_category_stats": ("stats", "compute_qa_category_stats"),
17
+ "print_qa_category_stats": ("stats", "print_qa_category_stats"),
18
+ # Ablation
19
+ "run_ablation_study": ("ablation", "run_ablation_study"),
20
+ "AblationConfig": ("ablation", "AblationConfig"),
21
+ }
22
+
23
+
24
+ def __getattr__(name):
25
+ """Lazy import to avoid loading optional dependencies at import time."""
26
+ if name in _LAZY_IMPORTS:
27
+ module_name, attr_name = _LAZY_IMPORTS[name]
28
+ import importlib
29
+ module = importlib.import_module(f"mirage.utils.{module_name}")
30
+ return getattr(module, attr_name)
31
+ raise AttributeError(f"module 'mirage.utils' has no attribute '{name}'")
@@ -0,0 +1,360 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Unified Ablation Study Runner
4
+
5
+ Runs the full MiRAGE pipeline for:
6
+ 1. Baseline (all features enabled)
7
+ 2. Each ablation configuration (one at a time)
8
+
9
+ Results are saved to separate directories under the base output path.
10
+
11
+ Usage:
12
+ python run_ablation_study.py [--config config.yaml] [--skip-baseline] [--only ABLATION_NAME]
13
+
14
+ Examples:
15
+ python run_ablation_study.py # Run all (baseline + all ablations)
16
+ python run_ablation_study.py --skip-baseline # Run only ablations
17
+ python run_ablation_study.py --only disable_verifier # Run only one ablation
18
+ python run_ablation_study.py --only baseline # Run only baseline
19
+ """
20
+
21
+ import os
22
+ import sys
23
+ import yaml
24
+ import shutil
25
+ import argparse
26
+ import subprocess
27
+ from datetime import datetime
28
+ from pathlib import Path
29
+ from typing import Dict, Any, List, Optional
30
+
31
+ # Ablation configurations to test
32
+ ABLATION_MODES = [
33
+ {
34
+ "name": "baseline",
35
+ "description": "Full MiRAGE framework (all features enabled)",
36
+ "config_changes": {} # No changes for baseline
37
+ },
38
+ {
39
+ "name": "no_multihop",
40
+ "description": "Disable Multihop Context Optimization Loop",
41
+ "config_changes": {
42
+ "ablation.disable_multihop_context.enabled": True
43
+ }
44
+ },
45
+ {
46
+ "name": "no_verifier",
47
+ "description": "Disable Verifier Agent",
48
+ "config_changes": {
49
+ "ablation.disable_verifier.enabled": True
50
+ }
51
+ },
52
+ {
53
+ "name": "no_persona",
54
+ "description": "Disable Domain/Persona Injection",
55
+ "config_changes": {
56
+ "ablation.disable_persona.enabled": True
57
+ }
58
+ },
59
+ {
60
+ "name": "fixed_chunking",
61
+ "description": "Use Fixed-Length Chunking (2048 tokens)",
62
+ "config_changes": {
63
+ "ablation.fixed_chunking.enabled": True
64
+ }
65
+ },
66
+ {
67
+ "name": "description_only",
68
+ "description": "Multimodal: Description Only (no raw images)",
69
+ "config_changes": {
70
+ "ablation.description_only.enabled": True
71
+ }
72
+ },
73
+ {
74
+ "name": "image_only",
75
+ "description": "Multimodal: Image Only (no generated descriptions)",
76
+ "config_changes": {
77
+ "ablation.image_only.enabled": True
78
+ }
79
+ },
80
+ ]
81
+
82
+
83
+ def load_config(config_path: str) -> Dict[str, Any]:
84
+ """Load YAML configuration file."""
85
+ with open(config_path, 'r') as f:
86
+ return yaml.safe_load(f)
87
+
88
+
89
+ def save_config(config: Dict[str, Any], config_path: str):
90
+ """Save configuration to YAML file."""
91
+ with open(config_path, 'w') as f:
92
+ yaml.dump(config, f, default_flow_style=False, sort_keys=False)
93
+
94
+
95
+ def set_nested_value(config: Dict, key_path: str, value: Any):
96
+ """Set a nested dictionary value using dot notation."""
97
+ keys = key_path.split('.')
98
+ d = config
99
+ for key in keys[:-1]:
100
+ if key not in d:
101
+ d[key] = {}
102
+ d = d[key]
103
+ d[keys[-1]] = value
104
+
105
+
106
+ def reset_all_ablations(config: Dict) -> Dict:
107
+ """Reset all ablation settings to disabled."""
108
+ if 'ablation' not in config:
109
+ config['ablation'] = {}
110
+
111
+ ablation = config['ablation']
112
+
113
+ # Reset each ablation to disabled
114
+ for ablation_key in ['disable_multihop_context', 'disable_verifier',
115
+ 'disable_persona', 'fixed_chunking',
116
+ 'description_only', 'image_only']:
117
+ if ablation_key not in ablation:
118
+ ablation[ablation_key] = {}
119
+ ablation[ablation_key]['enabled'] = False
120
+
121
+ return config
122
+
123
+
124
+ def apply_ablation_config(config: Dict, ablation_mode: Dict) -> Dict:
125
+ """Apply ablation-specific configuration changes."""
126
+ # First reset all ablations
127
+ config = reset_all_ablations(config)
128
+
129
+ # Apply specific changes for this ablation
130
+ for key_path, value in ablation_mode.get('config_changes', {}).items():
131
+ set_nested_value(config, key_path, value)
132
+
133
+ return config
134
+
135
+
136
+ def get_output_dir(base_output_dir: str, ablation_name: str) -> str:
137
+ """Generate output directory path for an ablation run."""
138
+ return os.path.join(base_output_dir, ablation_name)
139
+
140
+
141
+ def run_pipeline(config_path: str, ablation_name: str) -> bool:
142
+ """Run the main pipeline and return success status."""
143
+ print(f"\n{'='*70}")
144
+ print(f"šŸš€ Running: {ablation_name}")
145
+ print(f"{'='*70}\n")
146
+
147
+ try:
148
+ # Run main.py
149
+ result = subprocess.run(
150
+ [sys.executable, 'main.py'],
151
+ cwd=os.path.dirname(os.path.abspath(__file__)),
152
+ check=False
153
+ )
154
+
155
+ if result.returncode == 0:
156
+ print(f"\nāœ… {ablation_name}: COMPLETED SUCCESSFULLY")
157
+ return True
158
+ else:
159
+ print(f"\nāŒ {ablation_name}: FAILED (exit code {result.returncode})")
160
+ return False
161
+
162
+ except Exception as e:
163
+ print(f"\nāŒ {ablation_name}: ERROR - {e}")
164
+ return False
165
+
166
+
167
+ def create_summary_report(results: List[Dict], output_dir: str):
168
+ """Create a summary report of all ablation runs."""
169
+ report_path = os.path.join(output_dir, "ablation_study_summary.txt")
170
+
171
+ with open(report_path, 'w') as f:
172
+ f.write("=" * 70 + "\n")
173
+ f.write("ABLATION STUDY SUMMARY\n")
174
+ f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
175
+ f.write("=" * 70 + "\n\n")
176
+
177
+ successful = sum(1 for r in results if r['success'])
178
+ total = len(results)
179
+
180
+ f.write(f"Total Runs: {total}\n")
181
+ f.write(f"Successful: {successful}\n")
182
+ f.write(f"Failed: {total - successful}\n\n")
183
+
184
+ f.write("-" * 70 + "\n")
185
+ f.write("INDIVIDUAL RESULTS\n")
186
+ f.write("-" * 70 + "\n\n")
187
+
188
+ for result in results:
189
+ status = "āœ… SUCCESS" if result['success'] else "āŒ FAILED"
190
+ f.write(f"{result['name']:20} {status}\n")
191
+ f.write(f" Description: {result['description']}\n")
192
+ f.write(f" Output Dir: {result['output_dir']}\n")
193
+ f.write(f" Duration: {result.get('duration', 'N/A')}\n\n")
194
+
195
+ print(f"\nšŸ“„ Summary report saved: {report_path}")
196
+
197
+
198
+ def main():
199
+ parser = argparse.ArgumentParser(
200
+ description="Run unified ablation study for MiRAGE pipeline",
201
+ formatter_class=argparse.RawDescriptionHelpFormatter,
202
+ epilog="""
203
+ Examples:
204
+ python run_ablation_study.py # Run all
205
+ python run_ablation_study.py --skip-baseline # Skip baseline
206
+ python run_ablation_study.py --only no_verifier # Run specific ablation
207
+ python run_ablation_study.py --only baseline # Run baseline only
208
+ python run_ablation_study.py --list # List available modes
209
+ """
210
+ )
211
+ parser.add_argument('--config', default='config.yaml',
212
+ help='Path to config file (default: config.yaml)')
213
+ parser.add_argument('--skip-baseline', action='store_true',
214
+ help='Skip the baseline run')
215
+ parser.add_argument('--only', type=str, default=None,
216
+ help='Run only a specific ablation mode')
217
+ parser.add_argument('--list', action='store_true',
218
+ help='List available ablation modes and exit')
219
+ parser.add_argument('--dry-run', action='store_true',
220
+ help='Show what would be run without executing')
221
+
222
+ args = parser.parse_args()
223
+
224
+ # List available modes
225
+ if args.list:
226
+ print("\nAvailable Ablation Modes:")
227
+ print("-" * 50)
228
+ for mode in ABLATION_MODES:
229
+ print(f" {mode['name']:20} - {mode['description']}")
230
+ print()
231
+ return 0
232
+
233
+ # Validate --only argument
234
+ if args.only:
235
+ valid_names = [m['name'] for m in ABLATION_MODES]
236
+ if args.only not in valid_names:
237
+ print(f"āŒ Error: Unknown ablation mode '{args.only}'")
238
+ print(f" Valid modes: {', '.join(valid_names)}")
239
+ return 1
240
+
241
+ # Load original config
242
+ config_path = args.config
243
+ if not os.path.exists(config_path):
244
+ print(f"āŒ Error: Config file not found: {config_path}")
245
+ return 1
246
+
247
+ print("=" * 70)
248
+ print("šŸ”¬ UNIFIED ABLATION STUDY")
249
+ print("=" * 70)
250
+ print(f"Config: {config_path}")
251
+ print(f"Start Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
252
+
253
+ # Load and backup original config
254
+ original_config = load_config(config_path)
255
+ backup_path = config_path + '.backup'
256
+ shutil.copy2(config_path, backup_path)
257
+ print(f"Config backed up: {backup_path}")
258
+
259
+ # Get base output directory
260
+ base_output_dir = original_config.get('paths', {}).get('output_dir', 'output')
261
+ print(f"Base Output Dir: {base_output_dir}")
262
+
263
+ # Determine which modes to run
264
+ modes_to_run = ABLATION_MODES.copy()
265
+
266
+ if args.only:
267
+ modes_to_run = [m for m in modes_to_run if m['name'] == args.only]
268
+ elif args.skip_baseline:
269
+ modes_to_run = [m for m in modes_to_run if m['name'] != 'baseline']
270
+
271
+ print(f"\nModes to run: {[m['name'] for m in modes_to_run]}")
272
+ print("-" * 70)
273
+
274
+ if args.dry_run:
275
+ print("\nšŸ” DRY RUN - Would execute the following:")
276
+ for mode in modes_to_run:
277
+ output_dir = get_output_dir(base_output_dir, mode['name'])
278
+ print(f"\n [{mode['name']}]")
279
+ print(f" Description: {mode['description']}")
280
+ print(f" Output: {output_dir}")
281
+ print(f" Config changes: {mode['config_changes']}")
282
+ print("\nāœ… Dry run complete. Use without --dry-run to execute.")
283
+ return 0
284
+
285
+ # Run each ablation mode
286
+ results = []
287
+
288
+ try:
289
+ for mode in modes_to_run:
290
+ start_time = datetime.now()
291
+
292
+ # Create output directory for this ablation
293
+ output_dir = get_output_dir(base_output_dir, mode['name'])
294
+ os.makedirs(output_dir, exist_ok=True)
295
+
296
+ # Prepare config for this run
297
+ config = load_config(config_path) # Reload fresh each time
298
+ config = apply_ablation_config(config, mode)
299
+
300
+ # Update output directory
301
+ if 'paths' not in config:
302
+ config['paths'] = {}
303
+ config['paths']['output_dir'] = output_dir
304
+
305
+ # Save modified config
306
+ save_config(config, config_path)
307
+
308
+ print(f"\nšŸ“ Output: {output_dir}")
309
+ print(f"šŸ“ Mode: {mode['description']}")
310
+
311
+ # Run the pipeline
312
+ success = run_pipeline(config_path, mode['name'])
313
+
314
+ end_time = datetime.now()
315
+ duration = str(end_time - start_time).split('.')[0] # Remove microseconds
316
+
317
+ results.append({
318
+ 'name': mode['name'],
319
+ 'description': mode['description'],
320
+ 'output_dir': output_dir,
321
+ 'success': success,
322
+ 'duration': duration
323
+ })
324
+
325
+ except KeyboardInterrupt:
326
+ print("\n\nāš ļø Ablation study interrupted by user")
327
+
328
+ finally:
329
+ # Restore original config
330
+ print(f"\n{'='*70}")
331
+ print("Restoring original configuration...")
332
+ shutil.copy2(backup_path, config_path)
333
+ os.remove(backup_path)
334
+ print("āœ… Original config restored")
335
+
336
+ # Generate summary report
337
+ if results:
338
+ create_summary_report(results, base_output_dir)
339
+
340
+ # Print final summary
341
+ print(f"\n{'='*70}")
342
+ print("ABLATION STUDY COMPLETE")
343
+ print(f"{'='*70}")
344
+
345
+ successful = sum(1 for r in results if r['success'])
346
+ print(f"\nResults: {successful}/{len(results)} successful")
347
+
348
+ for r in results:
349
+ status = "āœ…" if r['success'] else "āŒ"
350
+ print(f" {status} {r['name']:20} ({r['duration']})")
351
+
352
+ print(f"\nResults saved in: {base_output_dir}/")
353
+ print(f"Summary report: {base_output_dir}/ablation_study_summary.txt")
354
+
355
+ return 0 if all(r['success'] for r in results) else 1
356
+
357
+
358
+ if __name__ == "__main__":
359
+ sys.exit(main())
360
+