isage-benchmark-agent 0.1.0.1__cp311-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. isage_benchmark_agent-0.1.0.1.dist-info/METADATA +91 -0
  2. isage_benchmark_agent-0.1.0.1.dist-info/RECORD +51 -0
  3. isage_benchmark_agent-0.1.0.1.dist-info/WHEEL +5 -0
  4. isage_benchmark_agent-0.1.0.1.dist-info/entry_points.txt +2 -0
  5. isage_benchmark_agent-0.1.0.1.dist-info/licenses/LICENSE +21 -0
  6. isage_benchmark_agent-0.1.0.1.dist-info/top_level.txt +1 -0
  7. sage/__init__.py +0 -0
  8. sage/benchmark/__init__.py +0 -0
  9. sage/benchmark/benchmark_agent/__init__.py +108 -0
  10. sage/benchmark/benchmark_agent/__main__.py +177 -0
  11. sage/benchmark/benchmark_agent/acebench_loader.py +369 -0
  12. sage/benchmark/benchmark_agent/adapter_registry.py +3036 -0
  13. sage/benchmark/benchmark_agent/config/config_loader.py +176 -0
  14. sage/benchmark/benchmark_agent/config/default_config.yaml +24 -0
  15. sage/benchmark/benchmark_agent/config/planning_exp.yaml +34 -0
  16. sage/benchmark/benchmark_agent/config/timing_detection_exp.yaml +34 -0
  17. sage/benchmark/benchmark_agent/config/tool_selection_exp.yaml +32 -0
  18. sage/benchmark/benchmark_agent/data_paths.py +332 -0
  19. sage/benchmark/benchmark_agent/evaluation/__init__.py +217 -0
  20. sage/benchmark/benchmark_agent/evaluation/analyzers/__init__.py +11 -0
  21. sage/benchmark/benchmark_agent/evaluation/analyzers/planning_analyzer.py +111 -0
  22. sage/benchmark/benchmark_agent/evaluation/analyzers/timing_analyzer.py +135 -0
  23. sage/benchmark/benchmark_agent/evaluation/analyzers/tool_selection_analyzer.py +124 -0
  24. sage/benchmark/benchmark_agent/evaluation/evaluator.py +228 -0
  25. sage/benchmark/benchmark_agent/evaluation/metrics.py +650 -0
  26. sage/benchmark/benchmark_agent/evaluation/report_builder.py +217 -0
  27. sage/benchmark/benchmark_agent/evaluation/unified_tool_selection.py +602 -0
  28. sage/benchmark/benchmark_agent/experiments/__init__.py +63 -0
  29. sage/benchmark/benchmark_agent/experiments/base_experiment.py +263 -0
  30. sage/benchmark/benchmark_agent/experiments/method_comparison.py +742 -0
  31. sage/benchmark/benchmark_agent/experiments/planning_exp.py +262 -0
  32. sage/benchmark/benchmark_agent/experiments/timing_detection_exp.py +198 -0
  33. sage/benchmark/benchmark_agent/experiments/tool_selection_exp.py +250 -0
  34. sage/benchmark/benchmark_agent/scripts/__init__.py +26 -0
  35. sage/benchmark/benchmark_agent/scripts/experiments/__init__.py +40 -0
  36. sage/benchmark/benchmark_agent/scripts/experiments/exp_analysis_ablation.py +425 -0
  37. sage/benchmark/benchmark_agent/scripts/experiments/exp_analysis_error.py +400 -0
  38. sage/benchmark/benchmark_agent/scripts/experiments/exp_analysis_robustness.py +439 -0
  39. sage/benchmark/benchmark_agent/scripts/experiments/exp_analysis_scaling.py +565 -0
  40. sage/benchmark/benchmark_agent/scripts/experiments/exp_cross_dataset.py +406 -0
  41. sage/benchmark/benchmark_agent/scripts/experiments/exp_main_planning.py +315 -0
  42. sage/benchmark/benchmark_agent/scripts/experiments/exp_main_selection.py +344 -0
  43. sage/benchmark/benchmark_agent/scripts/experiments/exp_main_timing.py +270 -0
  44. sage/benchmark/benchmark_agent/scripts/experiments/exp_training_comparison.py +620 -0
  45. sage/benchmark/benchmark_agent/scripts/experiments/exp_utils.py +427 -0
  46. sage/benchmark/benchmark_agent/scripts/experiments/figure_generator.py +677 -0
  47. sage/benchmark/benchmark_agent/scripts/experiments/llm_service.py +332 -0
  48. sage/benchmark/benchmark_agent/scripts/experiments/run_paper1_experiments.py +627 -0
  49. sage/benchmark/benchmark_agent/scripts/experiments/sage_bench_cli.py +422 -0
  50. sage/benchmark/benchmark_agent/scripts/experiments/table_generator.py +430 -0
  51. sage/benchmark/benchmark_agent/tools_loader.py +212 -0
@@ -0,0 +1,176 @@
1
+ """
2
+ Configuration Loader for Agent Benchmark Experiments
3
+
4
+ Provides utilities for loading and parsing YAML configuration files
5
+ with environment variable substitution.
6
+ """
7
+
8
+ import os
9
+ import re
10
+ from pathlib import Path
11
+ from typing import Any, Optional
12
+
13
+ import yaml
14
+
15
+ from sage.benchmark.benchmark_agent.experiments.base_experiment import (
16
+ ExperimentConfig,
17
+ create_config,
18
+ )
19
+
20
+
21
+ class ConfigLoader:
22
+ """
23
+ Loader for experiment YAML configurations.
24
+
25
+ Supports:
26
+ - Environment variable substitution (${VAR} or $VAR)
27
+ - Special ${PROJECT_ROOT} variable
28
+ - Type-safe config object creation
29
+ """
30
+
31
+ @staticmethod
32
+ def _find_project_root() -> Path:
33
+ """Find project root directory (where .git exists)."""
34
+ current = Path.cwd()
35
+ while current.parent != current:
36
+ if (current / ".git").exists():
37
+ return current
38
+ current = current.parent
39
+ return Path.cwd()
40
+
41
+ @staticmethod
42
+ def _expand_vars(value: Any, context: Optional[dict[str, str]] = None) -> Any:
43
+ """
44
+ Recursively expand environment variables in config values.
45
+
46
+ Args:
47
+ value: Config value (can be str, dict, list, etc.)
48
+ context: Additional variable context
49
+
50
+ Returns:
51
+ Value with expanded variables
52
+ """
53
+ if context is None:
54
+ context = {}
55
+
56
+ # Add PROJECT_ROOT to context
57
+ if "PROJECT_ROOT" not in context:
58
+ context["PROJECT_ROOT"] = str(ConfigLoader._find_project_root())
59
+
60
+ if isinstance(value, str):
61
+ # Pattern matches ${VAR} or $VAR
62
+ pattern = r"\$\{([^}]+)\}|\$([A-Za-z_][A-Za-z0-9_]*)"
63
+
64
+ def replacer(match):
65
+ var_name = match.group(1) or match.group(2)
66
+ # Check context first, then environment
67
+ return context.get(var_name, os.environ.get(var_name, match.group(0)))
68
+
69
+ return re.sub(pattern, replacer, value)
70
+
71
+ elif isinstance(value, dict):
72
+ return {k: ConfigLoader._expand_vars(v, context) for k, v in value.items()}
73
+
74
+ elif isinstance(value, list):
75
+ return [ConfigLoader._expand_vars(item, context) for item in value]
76
+
77
+ return value
78
+
79
+ @classmethod
80
+ def load_yaml(cls, config_path) -> dict[str, Any]:
81
+ """
82
+ Load and parse YAML config file.
83
+
84
+ Args:
85
+ config_path: Path to YAML config file (str or Path)
86
+
87
+ Returns:
88
+ Parsed config dictionary with expanded variables
89
+ """
90
+ config_path = Path(config_path)
91
+ if not config_path.exists():
92
+ raise FileNotFoundError(f"Config file not found: {config_path}")
93
+
94
+ with open(config_path, encoding="utf-8") as f:
95
+ raw_config = yaml.safe_load(f)
96
+
97
+ # Expand environment variables
98
+ expanded = cls._expand_vars(raw_config)
99
+
100
+ return expanded
101
+
102
+ @classmethod
103
+ def load_config(cls, config_path) -> ExperimentConfig:
104
+ """
105
+ Load YAML config and create typed config object.
106
+
107
+ Args:
108
+ config_path: Path to YAML config file (str or Path)
109
+
110
+ Returns:
111
+ ExperimentConfig subclass instance
112
+ """
113
+ config_dict = cls.load_yaml(config_path)
114
+ return create_config(config_dict)
115
+
116
+ @classmethod
117
+ def load_default_config(cls) -> dict[str, Any]:
118
+ """
119
+ Load default configuration.
120
+
121
+ Returns:
122
+ Default config dictionary
123
+ """
124
+ default_path = Path(__file__).parent.parent / "config" / "default_config.yaml"
125
+ if default_path.exists():
126
+ return cls.load_yaml(default_path)
127
+ return {}
128
+
129
+ @classmethod
130
+ def merge_configs(
131
+ cls, base_config: dict[str, Any], override_config: dict[str, Any]
132
+ ) -> dict[str, Any]:
133
+ """
134
+ Merge two configuration dictionaries.
135
+
136
+ Args:
137
+ base_config: Base configuration
138
+ override_config: Override configuration
139
+
140
+ Returns:
141
+ Merged configuration (override takes precedence)
142
+ """
143
+ merged = base_config.copy()
144
+
145
+ for key, value in override_config.items():
146
+ if key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
147
+ merged[key] = cls.merge_configs(merged[key], value)
148
+ else:
149
+ merged[key] = value
150
+
151
+ return merged
152
+
153
+
154
+ def load_config_with_defaults(config_path: Path) -> ExperimentConfig:
155
+ """
156
+ Load config with default values merged in.
157
+
158
+ Args:
159
+ config_path: Path to experiment config file
160
+
161
+ Returns:
162
+ ExperimentConfig with defaults applied
163
+ """
164
+ loader = ConfigLoader()
165
+
166
+ # Load default config
167
+ defaults = loader.load_default_config()
168
+
169
+ # Load experiment config
170
+ exp_config = loader.load_yaml(config_path)
171
+
172
+ # Merge
173
+ merged = loader.merge_configs(defaults, exp_config)
174
+
175
+ # Create typed config
176
+ return create_config(merged)
@@ -0,0 +1,24 @@
1
+ # Default Configuration for Agent Benchmark Experiments
2
+
3
+ # Data settings
4
+ profile: "quick_eval" # agent_eval usage profile
5
+ split: "dev" # Data split: train/dev/test
6
+ max_samples: # Limit samples (null = all)
7
+
8
+ # Randomness
9
+ seed: 42
10
+
11
+ # Output and reporting
12
+ report:
13
+ format: ["json", "markdown"]
14
+ include_breakdowns: true
15
+ path: "${PROJECT_ROOT}/outputs/agent_benchmark"
16
+ markdown_template:
17
+
18
+ # Metrics (common defaults, overridden per experiment)
19
+ metrics:
20
+ - "accuracy"
21
+
22
+ # Logging
23
+ verbose: true
24
+ log_level: "INFO"
@@ -0,0 +1,34 @@
1
+ # Planning Experiment Configuration
2
+
3
+ experiment: planning
4
+
5
+ # Data configuration
6
+ profile: "full_eval"
7
+ split: "dev"
8
+ max_samples:
9
+
10
+ # Strategy configuration
11
+ planner: "baseline.template"
12
+ min_steps: 5
13
+ max_steps: 10
14
+ planner_params:
15
+ allow_tool_reuse: true
16
+ enforce_sequence: true
17
+
18
+ # Metrics to evaluate
19
+ metrics:
20
+ - "plan_success_rate"
21
+ - "step_accuracy"
22
+ - "tool_sequence_match"
23
+ - "average_plan_length"
24
+
25
+ # Report configuration
26
+ report:
27
+ format: ["json", "markdown"]
28
+ include_breakdowns: true
29
+ path: "${PROJECT_ROOT}/outputs/agent_benchmark/planning"
30
+ markdown_template:
31
+
32
+ # Reproducibility
33
+ seed: 42
34
+ verbose: true
@@ -0,0 +1,34 @@
1
+ # Timing Detection Experiment Configuration
2
+
3
+ experiment: timing_detection
4
+
5
+ # Data configuration
6
+ profile: "full_eval"
7
+ split: "dev"
8
+ max_samples:
9
+
10
+ # Strategy configuration
11
+ detector: "baseline.threshold"
12
+ threshold: 0.5
13
+ detector_params:
14
+ use_context: true
15
+ confidence_threshold: 0.7
16
+
17
+ # Metrics to evaluate
18
+ metrics:
19
+ - "f1_score"
20
+ - "precision"
21
+ - "recall"
22
+ - "accuracy"
23
+ - "confusion_matrix"
24
+
25
+ # Report configuration
26
+ report:
27
+ format: ["json", "markdown"]
28
+ include_breakdowns: true
29
+ path: "${PROJECT_ROOT}/outputs/agent_benchmark/timing_detection"
30
+ markdown_template:
31
+
32
+ # Reproducibility
33
+ seed: 42
34
+ verbose: true
@@ -0,0 +1,32 @@
1
+ # Tool Selection Experiment Configuration
2
+
3
+ experiment: tool_selection
4
+
5
+ # Data configuration
6
+ profile: "quick_eval"
7
+ split: "dev"
8
+ max_samples:
9
+
10
+ # Strategy configuration
11
+ selector: "baseline.keyword"
12
+ top_k: 5
13
+ selector_params:
14
+ min_score: 0.1
15
+
16
+ # Metrics to evaluate
17
+ metrics:
18
+ - "top_k_accuracy"
19
+ - "recall@5"
20
+ - "precision@5"
21
+ - "mrr"
22
+
23
+ # Report configuration
24
+ report:
25
+ format: ["json", "markdown"]
26
+ include_breakdowns: true
27
+ path: "${PROJECT_ROOT}/outputs/agent_benchmark/tool_selection"
28
+ markdown_template:
29
+
30
+ # Reproducibility
31
+ seed: 42
32
+ verbose: true
@@ -0,0 +1,332 @@
1
+ """
2
+ Data Paths Configuration for Agent Benchmark
3
+
4
+ This module provides centralized management of data paths for agent benchmark.
5
+ It follows SAGE's two-layer data architecture:
6
+ 1. Source Layer: Original data accessed via DataManager
7
+ 2. Runtime Layer: Generated data for specific experiments
8
+
9
+ Usage:
10
+ from sage.benchmark.benchmark_agent.data_paths import (
11
+ get_source_paths,
12
+ get_runtime_paths,
13
+ ensure_runtime_dirs,
14
+ DataPathsConfig,
15
+ )
16
+
17
+ # Get source data paths (read-only)
18
+ source = get_source_paths()
19
+ tools_file = source.tools_catalog
20
+
21
+ # Get runtime data paths
22
+ runtime = get_runtime_paths()
23
+ output_dir = runtime.tool_selection_dir
24
+
25
+ # Ensure runtime directories exist
26
+ ensure_runtime_dirs()
27
+ """
28
+
29
+ import os
30
+ from dataclasses import dataclass
31
+ from pathlib import Path
32
+ from typing import Optional
33
+
34
+
35
+ def _find_sage_root() -> Path:
36
+ """
37
+ Find SAGE project root directory.
38
+
39
+ Looks for .git directory or SAGE_ROOT environment variable.
40
+ """
41
+ # Check environment variable first
42
+ if "SAGE_ROOT" in os.environ:
43
+ return Path(os.environ["SAGE_ROOT"])
44
+
45
+ # Walk up from current file to find project root
46
+ current = Path(__file__).resolve()
47
+ while current.parent != current:
48
+ if (current / ".git").exists() and (current / "packages").exists():
49
+ return current
50
+ current = current.parent
51
+
52
+ # Fallback to current working directory
53
+ return Path.cwd()
54
+
55
+
56
+ def _find_package_root() -> Path:
57
+ """Find sage-benchmark package root."""
58
+ current = Path(__file__).resolve()
59
+ # Navigate up to find sage-benchmark/src
60
+ while current.parent != current:
61
+ if current.name == "sage-benchmark":
62
+ return current
63
+ current = current.parent
64
+
65
+ # Fallback
66
+ sage_root = _find_sage_root()
67
+ return sage_root / "packages" / "sage-benchmark"
68
+
69
+
70
+ @dataclass
71
+ class SourcePaths:
72
+ """
73
+ Paths to source data (read-only, via DataManager).
74
+
75
+ These are the original data files that should not be modified directly.
76
+ Use DataManager for standard access.
77
+ """
78
+
79
+ # Root directories
80
+ data_root: Path
81
+
82
+ # Agent benchmark data
83
+ benchmark_dir: Path
84
+ benchmark_splits_dir: Path
85
+ benchmark_metadata_dir: Path
86
+
87
+ # Agent tools data
88
+ tools_dir: Path
89
+ tools_data_dir: Path
90
+
91
+ # Agent SFT data
92
+ sft_dir: Path
93
+ sft_data_dir: Path
94
+
95
+ @property
96
+ def tool_selection_file(self) -> Path:
97
+ """Tool selection benchmark data file."""
98
+ return self.benchmark_splits_dir / "tool_selection.jsonl"
99
+
100
+ @property
101
+ def task_planning_file(self) -> Path:
102
+ """Task planning benchmark data file."""
103
+ return self.benchmark_splits_dir / "task_planning.jsonl"
104
+
105
+ @property
106
+ def timing_judgment_file(self) -> Path:
107
+ """Timing judgment benchmark data file."""
108
+ return self.benchmark_splits_dir / "timing_judgment.jsonl"
109
+
110
+ @property
111
+ def tools_catalog(self) -> Path:
112
+ """Tools catalog file."""
113
+ return self.tools_data_dir / "tool_catalog.jsonl"
114
+
115
+ @property
116
+ def tools_categories(self) -> Path:
117
+ """Tools categories file."""
118
+ return self.tools_data_dir / "categories.json"
119
+
120
+ @property
121
+ def sft_conversations(self) -> Path:
122
+ """SFT conversations file."""
123
+ return self.sft_data_dir / "sft_conversations.jsonl"
124
+
125
+
126
+ @dataclass
127
+ class RuntimePaths:
128
+ """
129
+ Paths to runtime/generated data.
130
+
131
+ These are generated by prepare_*.py scripts for specific experiments.
132
+ Stored in .sage/benchmark/data/ (gitignored).
133
+ """
134
+
135
+ # Root directories
136
+ data_root: Path
137
+ results_root: Path
138
+
139
+ # Task-specific data directories
140
+ tool_selection_dir: Path
141
+ task_planning_dir: Path
142
+ timing_judgment_dir: Path
143
+
144
+ # Results directories
145
+ tool_selection_results: Path
146
+ task_planning_results: Path
147
+ timing_judgment_results: Path
148
+
149
+ @property
150
+ def tool_selection_base(self) -> Path:
151
+ """Base tool selection data file."""
152
+ return self.tool_selection_dir / "tool_selection.jsonl"
153
+
154
+ def tool_selection_with_candidates(self, num_candidates: int) -> Path:
155
+ """Tool selection data file with specific candidate pool size."""
156
+ return self.tool_selection_dir / f"tool_selection_{num_candidates}.jsonl"
157
+
158
+ @property
159
+ def task_planning_base(self) -> Path:
160
+ """Base task planning data file."""
161
+ return self.task_planning_dir / "task_planning.jsonl"
162
+
163
+ def timing_split_file(self, split: str) -> Path:
164
+ """Timing judgment data file for specific split."""
165
+ return self.timing_judgment_dir / f"{split}.jsonl"
166
+
167
+
168
+ @dataclass
169
+ class DataPathsConfig:
170
+ """
171
+ Complete data paths configuration.
172
+
173
+ Provides access to both source and runtime paths.
174
+ """
175
+
176
+ source: SourcePaths
177
+ runtime: RuntimePaths
178
+
179
+ @property
180
+ def sage_root(self) -> Path:
181
+ """SAGE project root."""
182
+ return _find_sage_root()
183
+
184
+ @property
185
+ def package_root(self) -> Path:
186
+ """sage-benchmark package root."""
187
+ return _find_package_root()
188
+
189
+
190
+ # Module-level cached config
191
+ _config: Optional[DataPathsConfig] = None
192
+
193
+
194
+ def get_source_paths() -> SourcePaths:
195
+ """
196
+ Get source data paths.
197
+
198
+ Returns:
199
+ SourcePaths object with paths to original data files.
200
+ """
201
+ package_root = _find_package_root()
202
+ data_root = package_root / "src" / "sage" / "data" / "sources"
203
+
204
+ return SourcePaths(
205
+ data_root=data_root,
206
+ # Agent benchmark
207
+ benchmark_dir=data_root / "agent_benchmark",
208
+ benchmark_splits_dir=data_root / "agent_benchmark" / "splits",
209
+ benchmark_metadata_dir=data_root / "agent_benchmark" / "metadata",
210
+ # Agent tools
211
+ tools_dir=data_root / "agent_tools",
212
+ tools_data_dir=data_root / "agent_tools" / "data",
213
+ # Agent SFT
214
+ sft_dir=data_root / "agent_sft",
215
+ sft_data_dir=data_root / "agent_sft" / "data",
216
+ )
217
+
218
+
219
+ def get_runtime_paths() -> RuntimePaths:
220
+ """
221
+ Get runtime/generated data paths.
222
+
223
+ Returns:
224
+ RuntimePaths object with paths to generated data files.
225
+ """
226
+ sage_root = _find_sage_root()
227
+
228
+ # Use .sage directory for runtime data
229
+ data_root = sage_root / ".sage" / "benchmark" / "data"
230
+ results_root = sage_root / ".sage" / "benchmark" / "results"
231
+
232
+ return RuntimePaths(
233
+ data_root=data_root,
234
+ results_root=results_root,
235
+ # Data directories
236
+ tool_selection_dir=data_root / "tool_selection",
237
+ task_planning_dir=data_root / "task_planning",
238
+ timing_judgment_dir=data_root / "timing_judgment",
239
+ # Results directories
240
+ tool_selection_results=results_root / "tool_selection",
241
+ task_planning_results=results_root / "task_planning",
242
+ timing_judgment_results=results_root / "timing_judgment",
243
+ )
244
+
245
+
246
+ def get_data_paths_config() -> DataPathsConfig:
247
+ """
248
+ Get complete data paths configuration.
249
+
250
+ Returns cached config object.
251
+ """
252
+ global _config
253
+ if _config is None:
254
+ _config = DataPathsConfig(
255
+ source=get_source_paths(),
256
+ runtime=get_runtime_paths(),
257
+ )
258
+ return _config
259
+
260
+
261
+ def ensure_runtime_dirs() -> None:
262
+ """
263
+ Ensure all runtime directories exist.
264
+
265
+ Call this before writing generated data.
266
+ """
267
+ runtime = get_runtime_paths()
268
+
269
+ # Create data directories
270
+ runtime.tool_selection_dir.mkdir(parents=True, exist_ok=True)
271
+ runtime.task_planning_dir.mkdir(parents=True, exist_ok=True)
272
+ runtime.timing_judgment_dir.mkdir(parents=True, exist_ok=True)
273
+
274
+ # Create results directories
275
+ runtime.tool_selection_results.mkdir(parents=True, exist_ok=True)
276
+ runtime.task_planning_results.mkdir(parents=True, exist_ok=True)
277
+ runtime.timing_judgment_results.mkdir(parents=True, exist_ok=True)
278
+
279
+
280
+ def print_data_paths_summary() -> None:
281
+ """Print summary of all data paths for debugging."""
282
+ config = get_data_paths_config()
283
+
284
+ print("\n" + "=" * 60)
285
+ print("SAGE Agent Benchmark Data Paths")
286
+ print("=" * 60)
287
+
288
+ print("\nšŸ“‚ Source Data (read-only, via DataManager):")
289
+ print(f" Root: {config.source.data_root}")
290
+ print(f" Tool Selection: {config.source.tool_selection_file}")
291
+ print(f" Task Planning: {config.source.task_planning_file}")
292
+ print(f" Timing Judgment: {config.source.timing_judgment_file}")
293
+ print(f" Tools Catalog: {config.source.tools_catalog}")
294
+ print(f" SFT Data: {config.source.sft_conversations}")
295
+
296
+ print("\nšŸ“‚ Runtime Data (generated, in .sage/):")
297
+ print(f" Root: {config.runtime.data_root}")
298
+ print(f" Tool Selection: {config.runtime.tool_selection_dir}")
299
+ print(f" Task Planning: {config.runtime.task_planning_dir}")
300
+ print(f" Timing Judgment: {config.runtime.timing_judgment_dir}")
301
+
302
+ print("\nšŸ“‚ Results:")
303
+ print(f" Root: {config.runtime.results_root}")
304
+
305
+ # Check existence
306
+ print("\nšŸ“‹ Status:")
307
+ source_exists = config.source.tool_selection_file.exists()
308
+ runtime_exists = config.runtime.tool_selection_dir.exists()
309
+ print(f" Source data exists: {'āœ…' if source_exists else 'āŒ'}")
310
+ print(f" Runtime data exists: {'āœ…' if runtime_exists else 'āŒ'}")
311
+ print("=" * 60)
312
+
313
+
314
+ # Backwards compatibility aliases
315
+ def get_data_paths() -> dict:
316
+ """
317
+ Legacy function for backwards compatibility.
318
+
319
+ Returns dict with source data paths.
320
+ Prefer using get_source_paths() or get_runtime_paths() directly.
321
+ """
322
+ source = get_source_paths()
323
+ return {
324
+ "tools_dir": source.tools_data_dir,
325
+ "benchmark_dir": source.benchmark_splits_dir,
326
+ "output_dir": get_runtime_paths().tool_selection_dir,
327
+ }
328
+
329
+
330
+ if __name__ == "__main__":
331
+ # Print summary when run directly
332
+ print_data_paths_summary()