isage-data 0.2.1.8__cp311-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isage_data-0.2.1.8.dist-info/METADATA +135 -0
- isage_data-0.2.1.8.dist-info/RECORD +132 -0
- isage_data-0.2.1.8.dist-info/WHEEL +5 -0
- isage_data-0.2.1.8.dist-info/entry_points.txt +2 -0
- isage_data-0.2.1.8.dist-info/licenses/LICENSE +21 -0
- isage_data-0.2.1.8.dist-info/top_level.txt +1 -0
- sage/data/__init__.py +37 -0
- sage/data/__init__.pyc +0 -0
- sage/data/__pycache__/__init__.cpython-311.pyc +0 -0
- sage/data/__pycache__/__init__.cpython-312.pyc +0 -0
- sage/data/__pycache__/cli.cpython-311.pyc +0 -0
- sage/data/__pycache__/cli.cpython-312.pyc +0 -0
- sage/data/__pycache__/manager.cpython-311.pyc +0 -0
- sage/data/__pycache__/manager.cpython-312.pyc +0 -0
- sage/data/cli.pyc +0 -0
- sage/data/manager.pyc +0 -0
- sage/data/sources/__init__.py +13 -0
- sage/data/sources/__init__.pyc +0 -0
- sage/data/sources/__pycache__/__init__.cpython-311.pyc +0 -0
- sage/data/sources/__pycache__/__init__.cpython-312.pyc +0 -0
- sage/data/sources/agent_benchmark/__init__.py +35 -0
- sage/data/sources/agent_benchmark/__init__.pyc +0 -0
- sage/data/sources/agent_benchmark/dataloader.pyc +0 -0
- sage/data/sources/agent_benchmark/dataset.yaml +44 -0
- sage/data/sources/agent_benchmark/external_benchmarks/__init__.py +32 -0
- sage/data/sources/agent_benchmark/external_benchmarks/__init__.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/converters.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/download_all.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/download_apibank.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/download_bfcl.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/download_toolalpaca.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/download_toolbench.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/loader.pyc +0 -0
- sage/data/sources/agent_benchmark/fix_tool_references.pyc +0 -0
- sage/data/sources/agent_benchmark/generate_data.pyc +0 -0
- sage/data/sources/agent_benchmark/prepare_planning_data.pyc +0 -0
- sage/data/sources/agent_benchmark/prepare_runtime_data.pyc +0 -0
- sage/data/sources/agent_benchmark/prepare_timing_data.pyc +0 -0
- sage/data/sources/agent_benchmark/test_integration.py +94 -0
- sage/data/sources/agent_benchmark/tests/test_agent_benchmark_loader.py +353 -0
- sage/data/sources/agent_benchmark/validate_cross_task.pyc +0 -0
- sage/data/sources/agent_benchmark/validate_data.pyc +0 -0
- sage/data/sources/agent_sft/__init__.py +10 -0
- sage/data/sources/agent_sft/__init__.pyc +0 -0
- sage/data/sources/agent_sft/data/generate_data.pyc +0 -0
- sage/data/sources/agent_sft/data/prompts_template.yaml +75 -0
- sage/data/sources/agent_sft/dataloader.pyc +0 -0
- sage/data/sources/agent_sft/dataset.yaml +9 -0
- sage/data/sources/agent_sft/fix_tool_ids.pyc +0 -0
- sage/data/sources/agent_sft/schemas.pyc +0 -0
- sage/data/sources/agent_sft/tests/test_agent_sft_loader.py +316 -0
- sage/data/sources/agent_tools/__init__.py +6 -0
- sage/data/sources/agent_tools/__init__.pyc +0 -0
- sage/data/sources/agent_tools/dataloader.pyc +0 -0
- sage/data/sources/agent_tools/dataset.yaml +9 -0
- sage/data/sources/agent_tools/generate_tools.pyc +0 -0
- sage/data/sources/agent_tools/schemas.pyc +0 -0
- sage/data/sources/agent_tools/test_integration.py +108 -0
- sage/data/sources/agent_tools/tests/test_agent_tools_loader.py +306 -0
- sage/data/sources/agent_tools/validate_data.pyc +0 -0
- sage/data/sources/bbh/__init__.py +5 -0
- sage/data/sources/bbh/__init__.pyc +0 -0
- sage/data/sources/bbh/dataloader.pyc +0 -0
- sage/data/sources/bbh/dataset.yaml +9 -0
- sage/data/sources/control_plane_benchmark/__init__.py +41 -0
- sage/data/sources/control_plane_benchmark/__init__.pyc +0 -0
- sage/data/sources/control_plane_benchmark/dataloader.pyc +0 -0
- sage/data/sources/control_plane_benchmark/dataset.yaml +101 -0
- sage/data/sources/gpqa/__init__.py +5 -0
- sage/data/sources/gpqa/__init__.pyc +0 -0
- sage/data/sources/gpqa/dataloader.pyc +0 -0
- sage/data/sources/gpqa/dataset.yaml +10 -0
- sage/data/sources/libamm_benchmark/__init__.py +10 -0
- sage/data/sources/libamm_benchmark/__init__.pyc +0 -0
- sage/data/sources/libamm_benchmark/dataset.yaml +9 -0
- sage/data/sources/locomo/__init__.py +5 -0
- sage/data/sources/locomo/__init__.pyc +0 -0
- sage/data/sources/locomo/__pycache__/__init__.cpython-311.pyc +0 -0
- sage/data/sources/locomo/__pycache__/__init__.cpython-312.pyc +0 -0
- sage/data/sources/locomo/__pycache__/dataloader.cpython-311.pyc +0 -0
- sage/data/sources/locomo/__pycache__/dataloader.cpython-312.pyc +0 -0
- sage/data/sources/locomo/__pycache__/download.cpython-311.pyc +0 -0
- sage/data/sources/locomo/dataloader.pyc +0 -0
- sage/data/sources/locomo/dataset.yaml +10 -0
- sage/data/sources/locomo/download.pyc +0 -0
- sage/data/sources/locomo/locomo10.json +66751 -0
- sage/data/sources/longmemeval/__init__.py +5 -0
- sage/data/sources/longmemeval/__init__.pyc +0 -0
- sage/data/sources/longmemeval/compose.pyc +0 -0
- sage/data/sources/longmemeval/config/longmemeval_groups.yaml +15 -0
- sage/data/sources/longmemeval/dataloader.pyc +0 -0
- sage/data/sources/longmemeval/dataset.yaml +9 -0
- sage/data/sources/longmemeval/download.pyc +0 -0
- sage/data/sources/memagentbench/Conflict_Resolution.parquet +0 -0
- sage/data/sources/memagentbench/__init__.py +16 -0
- sage/data/sources/memagentbench/__init__.pyc +0 -0
- sage/data/sources/memagentbench/__pycache__/__init__.cpython-312.pyc +0 -0
- sage/data/sources/memagentbench/__pycache__/conflict_resolution_loader.cpython-312.pyc +0 -0
- sage/data/sources/memagentbench/__pycache__/download.cpython-312.pyc +0 -0
- sage/data/sources/memagentbench/conflict_resolution_loader.pyc +0 -0
- sage/data/sources/memagentbench/conflict_resolution_loader_test.py +169 -0
- sage/data/sources/memagentbench/dataset.yaml +10 -0
- sage/data/sources/memagentbench/download.pyc +0 -0
- sage/data/sources/mmlu/__init__.py +5 -0
- sage/data/sources/mmlu/__init__.pyc +0 -0
- sage/data/sources/mmlu/dataloader.pyc +0 -0
- sage/data/sources/mmlu/dataset.yaml +10 -0
- sage/data/sources/mmlu/download.pyc +0 -0
- sage/data/sources/orca_dpo/__init__.py +5 -0
- sage/data/sources/orca_dpo/__init__.pyc +0 -0
- sage/data/sources/orca_dpo/dataloader.pyc +0 -0
- sage/data/sources/qa_base/__init__.py +5 -0
- sage/data/sources/qa_base/__init__.pyc +0 -0
- sage/data/sources/qa_base/dataloader.pyc +0 -0
- sage/data/sources/qa_base/dataset.yaml +9 -0
- sage/data/sources/qa_base/qa_knowledge_base.txt +35 -0
- sage/data/sources/qa_base/qa_knowledge_chromaDB.txt +13 -0
- sage/data/sources/qa_base/sample/one_question.txt +1 -0
- sage/data/sources/qa_base/sample/question.txt +352 -0
- sage/data/sources/qa_base/sample/question1.txt +1 -0
- sage/data/usages/__init__.py +3 -0
- sage/data/usages/__init__.pyc +0 -0
- sage/data/usages/agent_eval/__init__.py +191 -0
- sage/data/usages/agent_eval/__init__.pyc +0 -0
- sage/data/usages/agent_eval/config.yaml +15 -0
- sage/data/usages/agent_eval/profiles/full_eval.yaml +15 -0
- sage/data/usages/agent_eval/profiles/quick_eval.yaml +11 -0
- sage/data/usages/agent_eval/profiles/sft_training.yaml +12 -0
- sage/data/usages/agent_eval/usage.yaml +8 -0
- sage/data/usages/libamm/config.yaml +13 -0
- sage/data/usages/neuromem/config.yaml +5 -0
- sage/data/usages/rag/config.yaml +9 -0
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: isage-data
|
|
3
|
+
Version: 0.2.1.8
|
|
4
|
+
Summary: SAGE Data - Unified data loaders for memory benchmark datasets (LongMemEval, Locomo, MemAgentBench, etc.)
|
|
5
|
+
Author-email: IntelliStream Team <shuhao_zhang@hust.edu.cn>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/intellistream/sageData
|
|
8
|
+
Project-URL: Repository, https://github.com/intellistream/sageData
|
|
9
|
+
Project-URL: Documentation, https://github.com/intellistream/sageData/blob/main/README.md
|
|
10
|
+
Project-URL: Issues, https://github.com/intellistream/sageData/issues
|
|
11
|
+
Keywords: dataset,benchmark,memory,ai,longmemeval,locomo,memagentbench,sage
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Requires-Python: ==3.11.*
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: isage-common>=0.2.0
|
|
26
|
+
Requires-Dist: pandas>=2.0.0
|
|
27
|
+
Requires-Dist: numpy<2.3.0,>=1.26.0
|
|
28
|
+
Requires-Dist: pyyaml>=6.0
|
|
29
|
+
Requires-Dist: datasets>=2.14.0
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
32
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
33
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
|
|
36
|
+
# SAGE Data ��
|
|
37
|
+
|
|
38
|
+
**Dataset management module for SAGE benchmark suite**
|
|
39
|
+
|
|
40
|
+
Provides unified access to multiple datasets through a two-layer architecture:
|
|
41
|
+
- **Sources**: Physical datasets (qa_base, bbh, mmlu, gpqa, locomo, orca_dpo)
|
|
42
|
+
- **Usages**: Logical views for experiments (rag, libamm, neuromem, agent_eval)
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
./quickstart.sh
|
|
48
|
+
source .venv/bin/activate
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Or manual steps:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from sage.data import DataManager
|
|
55
|
+
|
|
56
|
+
manager = DataManager.get_instance()
|
|
57
|
+
|
|
58
|
+
# Access datasets by logical usage profile
|
|
59
|
+
rag = manager.get_by_usage("rag")
|
|
60
|
+
qa_loader = rag.load("qa_base") # already instantiated
|
|
61
|
+
queries = qa_loader.load_queries()
|
|
62
|
+
|
|
63
|
+
# Or fetch a specific data source directly
|
|
64
|
+
bbh_loader = manager.get_by_source("bbh")
|
|
65
|
+
tasks = bbh_loader.get_task_names()
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## 🛠️ CLI 使用方式(精简版)
|
|
69
|
+
|
|
70
|
+
安装后可直接使用 `sage-data` 命令:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
sage-data list # 显示数据源状态(已下载/缺失/远程)
|
|
74
|
+
sage-data usage rag # 查看某个 usage 的数据映射
|
|
75
|
+
sage-data download locomo # 下载指定数据源(仅支持部分源)
|
|
76
|
+
|
|
77
|
+
# 选项
|
|
78
|
+
sage-data list --json # JSON 输出,便于脚本处理
|
|
79
|
+
sage-data --data-root /path # 指定自定义数据根目录
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
当前支持自动下载的源:`locomo`, `longmemeval`, `memagentbench`, `mmlu`。
|
|
83
|
+
其他如 `gpqa`, `orca_dpo` 采用按需在线加载(Hugging Face),`qa_base`/`bbh` 等随包内置。
|
|
84
|
+
|
|
85
|
+
## Available Datasets
|
|
86
|
+
|
|
87
|
+
| Dataset | Description | Download Required | Storage |
|
|
88
|
+
|---------|-------------|-------------------|---------|
|
|
89
|
+
| **qa_base** | Question-Answering with knowledge base | ❌ No (included) | Local files |
|
|
90
|
+
| **locomo** | Long-context memory benchmark | ✅ Yes (`python -m locomo.download`) | Local files (2.68MB) |
|
|
91
|
+
| **bbh** | BIG-Bench Hard reasoning tasks | ❌ No (included) | Local JSON files |
|
|
92
|
+
| **mmlu** | Massive Multitask Language Understanding | 📥 Optional (`python -m mmlu.download --all-subjects`) | On-demand or Local (~160MB) |
|
|
93
|
+
| **gpqa** | Graduate-Level Question Answering | ✅ Auto (Hugging Face) | On-demand (~5MB cached) |
|
|
94
|
+
| **orca_dpo** | Preference pairs for alignment/DPO | ✅ Auto (Hugging Face) | On-demand (varies) |
|
|
95
|
+
|
|
96
|
+
See `examples/` for detailed usage examples.
|
|
97
|
+
|
|
98
|
+
## 📖 Examples
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
python examples/qa_examples.py # QA dataset usage
|
|
102
|
+
python examples/locomo_examples.py # LoCoMo dataset usage
|
|
103
|
+
python examples/bbh_examples.py # BBH dataset usage
|
|
104
|
+
python examples/mmlu_examples.py # MMLU dataset usage
|
|
105
|
+
python examples/gpqa_examples.py # GPQA dataset usage
|
|
106
|
+
python examples/orca_dpo_examples.py # Orca DPO dataset usage
|
|
107
|
+
python examples/integration_example.py # Cross-dataset integration
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## License
|
|
111
|
+
|
|
112
|
+
MIT License - see [LICENSE](LICENSE) file.
|
|
113
|
+
|
|
114
|
+
## 🔗 Links
|
|
115
|
+
|
|
116
|
+
- **Repository**: https://github.com/intellistream/sageData
|
|
117
|
+
- **Issues**: https://github.com/intellistream/sageData/issues
|
|
118
|
+
|
|
119
|
+
## ❓ Common Issues
|
|
120
|
+
|
|
121
|
+
**Q: Where's the LoCoMo data?**
|
|
122
|
+
A: Run `python -m locomo.download` to download it (2.68MB from Hugging Face).
|
|
123
|
+
|
|
124
|
+
**Q: How to download MMLU for offline use?**
|
|
125
|
+
A: Run `python -m mmlu.download --all-subjects` to download all subjects (~160MB).
|
|
126
|
+
|
|
127
|
+
**Q: GPQA access error?**
|
|
128
|
+
A: You need to accept the dataset terms on Hugging Face: https://huggingface.co/datasets/Idavidrein/gpqa
|
|
129
|
+
|
|
130
|
+
**Q: How to use Orca DPO for alignment research?**
|
|
131
|
+
A: Use `DataManager.get_by_source("orca_dpo")` to get the loader, then use `format_for_dpo()` to prepare data for training.
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
**Version**: 0.1.0 | **Last Updated**: December 2025
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
isage_data-0.2.1.8.dist-info/licenses/LICENSE,sha256=8UV2hDddmN5Fm-v7EgDBMiHHtVlo04tzmbX70Ab13NU,1080
|
|
2
|
+
sage/data/__init__.py,sha256=JAIeBEs85Flu5CSt3cCz3B5t-5a8DXw4XUfEp0MnL-Y,803
|
|
3
|
+
sage/data/__init__.pyc,sha256=Hymb6L7SEX5DpbBgah6qqqbgm7vAr45oT-aeQKovCvA,1171
|
|
4
|
+
sage/data/cli.pyc,sha256=yT_3FR6oP0mBJ04k0E7-SDU98idPMJAzul90wowuJpU,15327
|
|
5
|
+
sage/data/manager.pyc,sha256=DZAOVwD27AQHmtTTwDtArg0LDjExtBmebniROEsD3xo,21527
|
|
6
|
+
sage/data/__pycache__/__init__.cpython-311.pyc,sha256=KOjVYD_gB-APFNOx7kJEINlpk0w4SX8P-jQRVg0mNH4,1203
|
|
7
|
+
sage/data/__pycache__/__init__.cpython-312.pyc,sha256=87nvnChTg2ncREvb8hron_NA5v22Cek1yjlyKMvkR4c,1059
|
|
8
|
+
sage/data/__pycache__/cli.cpython-311.pyc,sha256=F-cYpmGU5-eZGqOLrtM_4XlmuQq_y-OnIkB-58U3FLw,15359
|
|
9
|
+
sage/data/__pycache__/cli.cpython-312.pyc,sha256=8YFLF02PpZJYNuTcSfeX0EVBc8I3WXIusvAVED83dws,12834
|
|
10
|
+
sage/data/__pycache__/manager.cpython-311.pyc,sha256=6IHUzw3lKXjD_fSlgPwxPyIL_4fxAxTTMZf2iDuWGe0,21559
|
|
11
|
+
sage/data/__pycache__/manager.cpython-312.pyc,sha256=fMPGOHfjdG160B_GrvQNFP6cbRFKeIAF368Ts1iWRcc,19330
|
|
12
|
+
sage/data/sources/__init__.py,sha256=FuaXfYc4GX3MP9StEpARDfbo5aJxe8SI_L_bIcemkuo,567
|
|
13
|
+
sage/data/sources/__init__.pyc,sha256=matZe2Y3N5klDR-Yi67sjTVgVlT_B8bnYYK0RosdDIU,731
|
|
14
|
+
sage/data/sources/__pycache__/__init__.cpython-311.pyc,sha256=6J6kZLo5acaf0nqky81dAxrQMNSiRCZVF36t2Xq2BAk,763
|
|
15
|
+
sage/data/sources/__pycache__/__init__.cpython-312.pyc,sha256=PtzTCNLFzXwXz7DVGBpXAquaJnvTDWtCdpnkO7Blh3o,752
|
|
16
|
+
sage/data/sources/agent_benchmark/__init__.py,sha256=3QMdQ2xKOqgqpjiEfr0YgDIhcaS27JeWEOGAuwhJyqU,876
|
|
17
|
+
sage/data/sources/agent_benchmark/__init__.pyc,sha256=-ztFyvH-bO2kMJ93MnLQqu4sPcShcZrwbTdbaC17D60,1027
|
|
18
|
+
sage/data/sources/agent_benchmark/dataloader.pyc,sha256=DDDGXzQUXWb_L8hS9a7z6xK0vBeBbnJHnQfvE4lE_dY,22776
|
|
19
|
+
sage/data/sources/agent_benchmark/dataset.yaml,sha256=QpU2qb7m90BrteXlXAIaT7CC-Fwphsnt2a0YtOhIIi0,1143
|
|
20
|
+
sage/data/sources/agent_benchmark/fix_tool_references.pyc,sha256=jy0j2tf260SGEH25XLLD_fUzoatH2b47GRbxREZzXkg,9877
|
|
21
|
+
sage/data/sources/agent_benchmark/generate_data.pyc,sha256=TOMQXUkRZ2KqM14HnZeXPCTz1YqerJiypSaxQarSoYI,12316
|
|
22
|
+
sage/data/sources/agent_benchmark/prepare_planning_data.pyc,sha256=jC-9mYlQN7yhepVm67W0hBzYndM4EvjF2A3ECpzYid8,26682
|
|
23
|
+
sage/data/sources/agent_benchmark/prepare_runtime_data.pyc,sha256=vJhavn-yedF1-lEOCTc50cglu2Wr2NU4VU2KF3TUcfE,23769
|
|
24
|
+
sage/data/sources/agent_benchmark/prepare_timing_data.pyc,sha256=VRWZHeqq1xcdrwft0XNPgmK_28ig4_HwX8M0-NN5lF8,27276
|
|
25
|
+
sage/data/sources/agent_benchmark/test_integration.py,sha256=o5b6zcxzjbVQNdO2HDm-l0xl7U62xul46gdVD2tGXlw,2890
|
|
26
|
+
sage/data/sources/agent_benchmark/validate_cross_task.pyc,sha256=OSCft6fCkiB0GzfBIbCDfYzNnNYrztXA4D0fRThmfZA,8246
|
|
27
|
+
sage/data/sources/agent_benchmark/validate_data.pyc,sha256=6ly30q7Uarjoop0-X-K3slhvtAxwUOFax2AL_AkODiY,15587
|
|
28
|
+
sage/data/sources/agent_benchmark/external_benchmarks/__init__.py,sha256=TbcZ7BAxM98mJMgiDC7VjBYhGZ01KLH538MCovD4FsM,665
|
|
29
|
+
sage/data/sources/agent_benchmark/external_benchmarks/__init__.pyc,sha256=U_rYEx3oMlJDDGOu5s-Zk3wWcgHxXHLUB3u9OyVXfV4,870
|
|
30
|
+
sage/data/sources/agent_benchmark/external_benchmarks/converters.pyc,sha256=ifLYhufKTmaNAGe7_CnE83N4vAONY_gBKBjpzwTCHAE,22570
|
|
31
|
+
sage/data/sources/agent_benchmark/external_benchmarks/download_all.pyc,sha256=rupmuv2R-EKVmY_oHZeJVo6KG2oaUFMtTnFcQSvWVLE,7066
|
|
32
|
+
sage/data/sources/agent_benchmark/external_benchmarks/download_apibank.pyc,sha256=xmOzVaRR01IxN1o6zojFFhmoqvYyoWrapqxu6oQ_3ak,26225
|
|
33
|
+
sage/data/sources/agent_benchmark/external_benchmarks/download_bfcl.pyc,sha256=bDlsfkiisUqr2zd5RyXwczVfE78TEpPdN32e0QcVycA,6239
|
|
34
|
+
sage/data/sources/agent_benchmark/external_benchmarks/download_toolalpaca.pyc,sha256=92tVhXVKcd_c2tS6JWKdRABLcIycdPGqkTGUgMPmVTE,30759
|
|
35
|
+
sage/data/sources/agent_benchmark/external_benchmarks/download_toolbench.pyc,sha256=4kLLVjjGxO5NVTXYis-lJV8vDZkFZ4_jVrS9_60OoEs,6422
|
|
36
|
+
sage/data/sources/agent_benchmark/external_benchmarks/loader.pyc,sha256=Cf4KcGb0433t7LRA2JE-FYgZGFiArOsHRutSskA4R10,14374
|
|
37
|
+
sage/data/sources/agent_benchmark/tests/test_agent_benchmark_loader.py,sha256=YjJ4LV6Ui0d_hDUQJU0CfMMrLAE8z8oaQdAR00bJMMw,13130
|
|
38
|
+
sage/data/sources/agent_sft/__init__.py,sha256=GYiOa-F46Ulg7u3B4FdBQay1B_tFdLw_mXLHhptSMDk,250
|
|
39
|
+
sage/data/sources/agent_sft/__init__.pyc,sha256=y8Fcax2VijAPG4gLeuq5A-hCsw6W3UqMDlSB1bjCPF4,433
|
|
40
|
+
sage/data/sources/agent_sft/dataloader.pyc,sha256=C1uYp0Ab5htHfwAEwobs9cVZCqpG9wooPihOmgUZ6jw,14036
|
|
41
|
+
sage/data/sources/agent_sft/dataset.yaml,sha256=87cfmcXyPDKkR_3ZMjd81Tol8JOm4fSanpehh7sltgo,227
|
|
42
|
+
sage/data/sources/agent_sft/fix_tool_ids.pyc,sha256=lARrp92Gi8gPEFtOW_IjKES7MwMAGSEYiz_QRsKWd7Q,8666
|
|
43
|
+
sage/data/sources/agent_sft/schemas.pyc,sha256=US-L1dfqMzLlRleHXpm0eqTXNj2XDRrNQgZmXEG-EfU,10473
|
|
44
|
+
sage/data/sources/agent_sft/data/generate_data.pyc,sha256=RQqQ_51ETTseCCjcHsT4lyHFVW3Wl-RpE7DX_X_oiS4,8823
|
|
45
|
+
sage/data/sources/agent_sft/data/prompts_template.yaml,sha256=McR-U4QkLEjezQ_BZXcgPI8UqBJmGUtcVaqFa049w-Y,2281
|
|
46
|
+
sage/data/sources/agent_sft/tests/test_agent_sft_loader.py,sha256=1CJkbQEmu6zWEQJB3Pcbtyr_bd8MxHQ7J8DCtpwdpjk,11468
|
|
47
|
+
sage/data/sources/agent_tools/__init__.py,sha256=kBjgRB9m5sOMhsYmTSJvI95DnjFB9wBbpqui7nMAVa0,257
|
|
48
|
+
sage/data/sources/agent_tools/__init__.pyc,sha256=C8uu0Kwf71TlvSJlq-e1ECSmWk935RH-Zq5Dfzxtv38,457
|
|
49
|
+
sage/data/sources/agent_tools/dataloader.pyc,sha256=j8qWlRhYbzNktloV4Z2lQWgmzCSNsEAglqv8s-Rl0wU,21285
|
|
50
|
+
sage/data/sources/agent_tools/dataset.yaml,sha256=1B8tq5CCVi9I4hX5Jt4keyom1tqw5U9q_dxzLmviCJ0,283
|
|
51
|
+
sage/data/sources/agent_tools/generate_tools.pyc,sha256=c0W0Uh28hYbk3dGFh_pX-rFilPqtEiW6JuYx1QPWLZY,18474
|
|
52
|
+
sage/data/sources/agent_tools/schemas.pyc,sha256=DduHEV2VCWav51DDvmilzBQTO2-493hmaEx8GqXcpto,16007
|
|
53
|
+
sage/data/sources/agent_tools/test_integration.py,sha256=aQndSqJxc5O4IZXAalTCVhdFZZcdz0iTZ2LcJz-mB88,3809
|
|
54
|
+
sage/data/sources/agent_tools/validate_data.pyc,sha256=7AhZxvfG3T0Rb2mxP9rm1ny5SshmDB0CCTU1KUHKKMw,13955
|
|
55
|
+
sage/data/sources/agent_tools/tests/test_agent_tools_loader.py,sha256=oG8Kpc70KtgOgNNFJLE6kfOyb94Sc-_-yohritFVmGk,11020
|
|
56
|
+
sage/data/sources/bbh/__init__.py,sha256=MBsn0xDOs52ixaEY2TrE7yPdTfw2u_CamjSEzergL3U,102
|
|
57
|
+
sage/data/sources/bbh/__init__.pyc,sha256=MxeqnPFPywfTJVQWTZmjeHY3yq2bfChGegms2prbiRM,278
|
|
58
|
+
sage/data/sources/bbh/dataloader.pyc,sha256=_xmAP3YP8hm9dyrH4MQNZDGtFO59x-GP2kCUEZ-ewoY,13158
|
|
59
|
+
sage/data/sources/bbh/dataset.yaml,sha256=APyN8KQasC2ytDF7WKrg-hpzkjm5T-cV8wIvCmLgHFA,245
|
|
60
|
+
sage/data/sources/control_plane_benchmark/__init__.py,sha256=ZfGJ0XLpBDe-iwZCpEyU4LxaYsM5Vg2-K1NVnUiNFD4,1109
|
|
61
|
+
sage/data/sources/control_plane_benchmark/__init__.pyc,sha256=VwucBBIGNbmC4PYhXfd9neWIKf_uTd_jzlaUnKZS8kM,1290
|
|
62
|
+
sage/data/sources/control_plane_benchmark/dataloader.pyc,sha256=w0-oqgF0N1Xed0IPjM6Rg9dzslQ7gElT8D6iV8vanMA,27997
|
|
63
|
+
sage/data/sources/control_plane_benchmark/dataset.yaml,sha256=ihLtPNU0YqGNsTFf3DoQZMTg50YsIFJBv1qJaQ-F1PQ,2597
|
|
64
|
+
sage/data/sources/gpqa/__init__.py,sha256=JGY29_Vh-Kq1x471soBxFWjx9nQdDbC9KmW6v3sn8Rs,105
|
|
65
|
+
sage/data/sources/gpqa/__init__.pyc,sha256=ZBXW5wTWnNlZmM_D7YnQjbSChp0rA6btea1h1r1VF2U,281
|
|
66
|
+
sage/data/sources/gpqa/dataloader.pyc,sha256=h5mqui44FdtvBUKcE6mAGQaWJ5IqjV2iLkgqPDNM0i0,12309
|
|
67
|
+
sage/data/sources/gpqa/dataset.yaml,sha256=_EkpdX8rtF-c4W9_E3UsZVyspYBfUKo9zp6JKcWD0Mc,361
|
|
68
|
+
sage/data/sources/libamm_benchmark/__init__.py,sha256=eQkaHZ8BbiIezfLOI4ntVwj_PSKTRyLLd7I6cklLsqA,277
|
|
69
|
+
sage/data/sources/libamm_benchmark/__init__.pyc,sha256=7oCyvdcr5eEPN1Tck4k7OA_mMYMSZKWUSwX3Q7a8TEM,322
|
|
70
|
+
sage/data/sources/libamm_benchmark/dataset.yaml,sha256=rMO8Ajycf8Zp3z2puQIEARH7mwd1HU92_ZAFq2G3PiQ,301
|
|
71
|
+
sage/data/sources/locomo/__init__.py,sha256=qvJdedYm2ly123Tca_BcQq_B1N6vrUS4_p7zzw5R_f8,111
|
|
72
|
+
sage/data/sources/locomo/__init__.pyc,sha256=ypZv6IlQj1t1ZKLiEDvSzMCBLcQ1hBrp0UmEfsV3b7g,287
|
|
73
|
+
sage/data/sources/locomo/dataloader.pyc,sha256=4YEBmv2tOmNCU8SZjsbUIrCl6j465CWkqnNHCehncF0,25683
|
|
74
|
+
sage/data/sources/locomo/dataset.yaml,sha256=ckxVU5A8DVAc8feeVtsVx5mtlTSN4Rd7WNCqLdlsT84,320
|
|
75
|
+
sage/data/sources/locomo/download.pyc,sha256=M7mO9oA4SsWmktFh1qk1RanxG1lt2Phy5XCj9k-8ylk,4640
|
|
76
|
+
sage/data/sources/locomo/locomo10.json,sha256=efqH6Q8ECBNDuMjevsuAqaaEK3anqlN9yf32Ueppj_Q,2805274
|
|
77
|
+
sage/data/sources/locomo/__pycache__/__init__.cpython-311.pyc,sha256=klg-8glD1-YPNSoA2o-u2fsPC3ZsI6aK3nfGg7kqtNs,319
|
|
78
|
+
sage/data/sources/locomo/__pycache__/__init__.cpython-312.pyc,sha256=YpvvVZVDYx8V3GURGEn1p_lGaP1rnr3uL4zZdbFFN90,295
|
|
79
|
+
sage/data/sources/locomo/__pycache__/dataloader.cpython-311.pyc,sha256=heWVnkohmJW3_sLsU2YezM3rq6HHkTPDWR-SIf-yK6w,22724
|
|
80
|
+
sage/data/sources/locomo/__pycache__/dataloader.cpython-312.pyc,sha256=FbY09jfsLV3VMRv-VEO_wpazeG1jhWPFJkY6rWb16NE,21880
|
|
81
|
+
sage/data/sources/locomo/__pycache__/download.cpython-311.pyc,sha256=OVYEDkXd0dzVIvoOcOKuOgFxeyvlywuOhTFT7lT6w5I,4672
|
|
82
|
+
sage/data/sources/longmemeval/__init__.py,sha256=8xdgmUEbDmoQ_492iKe_WHjNguqH0jYqC36d15nbmCc,129
|
|
83
|
+
sage/data/sources/longmemeval/__init__.pyc,sha256=al86XCpBE-m8xhtUV0YK-fm8v_6Vu8Qlk60M4KWarkw,302
|
|
84
|
+
sage/data/sources/longmemeval/compose.pyc,sha256=qfhXrZyY_qd3cx0ihKoEWi-l4ocslR1DzVHU8im0Y-0,8191
|
|
85
|
+
sage/data/sources/longmemeval/dataloader.pyc,sha256=j87OhZYVR_SwimmiMizHMuU5YctnoWgB1HcBLjVJxWc,22591
|
|
86
|
+
sage/data/sources/longmemeval/dataset.yaml,sha256=4hr1HNtNgpxJdc4uvGHanS4x1fILGwoidzz-lWSvsoM,358
|
|
87
|
+
sage/data/sources/longmemeval/download.pyc,sha256=Rdc9QgqZK_YZnkkn_NbgJoYdH7uuN6v27p0R_TtJTQ0,4929
|
|
88
|
+
sage/data/sources/longmemeval/config/longmemeval_groups.yaml,sha256=hzL5BQSsNuya_h_fkDq0fIZDD7WtyS_KoU14KnajH1o,6355
|
|
89
|
+
sage/data/sources/memagentbench/Conflict_Resolution.parquet,sha256=JNXD8JzgzhViXLn4qY9E8NhkymyU17StBOtpfKOl_0U,1491588
|
|
90
|
+
sage/data/sources/memagentbench/__init__.py,sha256=RIdteSNTrSr5S78whC4Tq-Qw-bYPESwVyA51qjMSEcw,487
|
|
91
|
+
sage/data/sources/memagentbench/__init__.pyc,sha256=k31EFhpJLM1iPqgMu4j5TB2SKnQmRFFtROE7Oe6zU90,665
|
|
92
|
+
sage/data/sources/memagentbench/conflict_resolution_loader.pyc,sha256=DyYZd_JFGOHo_0nLU6Kc00QclNoSe6peE5mP_p7-74s,15748
|
|
93
|
+
sage/data/sources/memagentbench/conflict_resolution_loader_test.py,sha256=e-AsKQG90zCG9L1APln4cc6L0e6y8XTd5gGUKS2hv0U,6278
|
|
94
|
+
sage/data/sources/memagentbench/dataset.yaml,sha256=Jma6JDkMA2L_TPEgqVWdK7P02LcYPDwZg8j1eH5iniA,363
|
|
95
|
+
sage/data/sources/memagentbench/download.pyc,sha256=eOW11Pa1E47-xtHyHsNBBOprYc26EXG2xLbqA6FsI-4,5227
|
|
96
|
+
sage/data/sources/memagentbench/__pycache__/__init__.cpython-312.pyc,sha256=Qv1RkDlR5T19PsZc4oyQtFyeSd9MOyhUoPLYTLnPZbA,671
|
|
97
|
+
sage/data/sources/memagentbench/__pycache__/conflict_resolution_loader.cpython-312.pyc,sha256=gMfjsLq07y_wffn8caYtfNOr1vAuy_voZpSyhK-2d-A,13775
|
|
98
|
+
sage/data/sources/memagentbench/__pycache__/download.cpython-312.pyc,sha256=NBTTBlLzw1K85254-swGreYxhiQj1dftt1bQ0ePAmzs,4544
|
|
99
|
+
sage/data/sources/mmlu/__init__.py,sha256=ZiTx_rmQA0M5z4U8vhZ5uRivldYhGNoxKeFeJVcGxPk,105
|
|
100
|
+
sage/data/sources/mmlu/__init__.pyc,sha256=bU64BMe3SQX63ERfxSszrqhO5CgJbgjOGRERCOl2Km4,281
|
|
101
|
+
sage/data/sources/mmlu/dataloader.pyc,sha256=RxXA9k1m9sxXulaBO0fEtG9W75IyenLsPH9CfHghORo,12790
|
|
102
|
+
sage/data/sources/mmlu/dataset.yaml,sha256=MjyP64sMOzuPdsIft_Z3oCRWFKJ12PKFXy6ruMs539w,365
|
|
103
|
+
sage/data/sources/mmlu/download.pyc,sha256=zhLKNiqSV-4qcYoc7CSNNkfOpSuqYS18PZrWZsllWbY,14197
|
|
104
|
+
sage/data/sources/orca_dpo/__init__.py,sha256=LN1evirvfo7ZZN0hHzg_YSdm5m3ADxMHsNi-jL8IC6s,139
|
|
105
|
+
sage/data/sources/orca_dpo/__init__.pyc,sha256=pNeWRFaR2TIKa7YL1T5u3lQ31cQYZVFrmQN8DbBkRjQ,316
|
|
106
|
+
sage/data/sources/orca_dpo/dataloader.pyc,sha256=Y5b5eCQy015KQSTUX9ZYfXEZsAJ3oFd2wN6ckfEIIZA,13973
|
|
107
|
+
sage/data/sources/qa_base/__init__.py,sha256=VUdivRVm5NVYlC-RfkQinmZ-Goz2kusfI5LUbX1OuyI,104
|
|
108
|
+
sage/data/sources/qa_base/__init__.pyc,sha256=Vq0YqoGnaATWlDHtjtkcC9VjKI274l0Fiesw50JUQ8Y,285
|
|
109
|
+
sage/data/sources/qa_base/dataloader.pyc,sha256=XoUlfhXF3vLrjbDOOV7lzV5X4R0ApxpHmqZY8REi3UU,13358
|
|
110
|
+
sage/data/sources/qa_base/dataset.yaml,sha256=3Wm5JVoSZ2Bi9SA9uD3Lzh1EI2YlI_AudAz49zFjPok,259
|
|
111
|
+
sage/data/sources/qa_base/qa_knowledge_base.txt,sha256=MrTjFQBYyv-QvxyO91dgoDB8k_Zf50V8xfmE8DQezc4,1360
|
|
112
|
+
sage/data/sources/qa_base/qa_knowledge_chromaDB.txt,sha256=7NKnW06tuOyngh98cOjo3c4R0Du2QTy_J5UZVdAMtW0,580
|
|
113
|
+
sage/data/sources/qa_base/sample/one_question.txt,sha256=W7xWZlO1xhfAt9W53NIl48q-TH5jnABT8xR_H9Ak5oE,87
|
|
114
|
+
sage/data/sources/qa_base/sample/question.txt,sha256=VjX0DuAz4BzlpExlPawK57dIQuhFquoclqXgvMPE2gg,29984
|
|
115
|
+
sage/data/sources/qa_base/sample/question1.txt,sha256=GSB3Gq63t1lBKFPX9CyqkhfqE_bKe_VmUyjXUVxi4qs,23
|
|
116
|
+
sage/data/usages/__init__.py,sha256=jHd4op-nfNtvAjr0RekGzlfzODt-RS6pZwkcZ3Zea6k,84
|
|
117
|
+
sage/data/usages/__init__.pyc,sha256=2k6UBW-oED5nZJJ8U49_BC3xZ2m9Em5nlYW3RYcq-tA,230
|
|
118
|
+
sage/data/usages/agent_eval/__init__.py,sha256=0LvUWT-WRMiUwUm14LobIcDkydd0-DTnJ3rj2doPPe0,6117
|
|
119
|
+
sage/data/usages/agent_eval/__init__.pyc,sha256=y1cKUK4dKEopbjVpw4CYarUU9u3SyYKS86alsom3Qj8,8152
|
|
120
|
+
sage/data/usages/agent_eval/config.yaml,sha256=wD1GZoNmnBGMARohvGa1ABjkDCTEnoM-ksOb3LksIjw,459
|
|
121
|
+
sage/data/usages/agent_eval/usage.yaml,sha256=m9j0RPnSm7AJ-_quIDw2SfN7Z0fC0zD35klDw1-esrc,203
|
|
122
|
+
sage/data/usages/agent_eval/profiles/full_eval.yaml,sha256=zKQkGRB3cPtFJJ-416FjDwgSOckl6DSNTwtPcKkQlGY,330
|
|
123
|
+
sage/data/usages/agent_eval/profiles/quick_eval.yaml,sha256=zcnCI-c1oga4vI5xryW8tZGqeZsfF20AXTGmw-p0BpU,235
|
|
124
|
+
sage/data/usages/agent_eval/profiles/sft_training.yaml,sha256=OfsWnUNC0qY98nhG5ZgmLgi4lph6FqU3hSm98RFCn4o,229
|
|
125
|
+
sage/data/usages/libamm/config.yaml,sha256=HorGUurHUMqtwPs8UbXm_N0dOmY61ABJy8nzzE7DSXI,409
|
|
126
|
+
sage/data/usages/neuromem/config.yaml,sha256=A7E7fNYAaeZDzKj76Bpyh6sIKqqvTRzv14MSIX9TuZY,225
|
|
127
|
+
sage/data/usages/rag/config.yaml,sha256=lhxfqBy3X3Y52NkgsJ-w4Hk_ZaAQxEDJFpTByNpN5O0,251
|
|
128
|
+
isage_data-0.2.1.8.dist-info/METADATA,sha256=A1RfNU6fowsWICnjXGnw-sLqktQhEIIuS78p5Bua880,5136
|
|
129
|
+
isage_data-0.2.1.8.dist-info/WHEEL,sha256=27snaH8EChr9VGIQt_981R5IOTPR-vQPuJNW-WzhNJA,93
|
|
130
|
+
isage_data-0.2.1.8.dist-info/entry_points.txt,sha256=8ZU2OS18xxsbtaHiDnvuHutBT73yldAJp4gURFcwx6E,49
|
|
131
|
+
isage_data-0.2.1.8.dist-info/top_level.txt,sha256=hibFyzQHiLOMK68qL1OWsNKaXOmSXqZjeLTBem6Yy7I,5
|
|
132
|
+
isage_data-0.2.1.8.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024-2026 IntelliStream Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
sage
|
sage/data/__init__.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""SAGE Data - Shared dataset library with two-layer architecture."""
|
|
2
|
+
|
|
3
|
+
from importlib import metadata as _metadata
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _resolve_version() -> str:
|
|
7
|
+
"""Return installed package version with graceful fallbacks."""
|
|
8
|
+
for distribution in ("sage-data", "sage-benchmark"):
|
|
9
|
+
try:
|
|
10
|
+
return _metadata.version(distribution)
|
|
11
|
+
except _metadata.PackageNotFoundError:
|
|
12
|
+
continue
|
|
13
|
+
return "0.0.0-dev"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
__version__ = _resolve_version()
|
|
17
|
+
|
|
18
|
+
from .manager import (
|
|
19
|
+
DataManager,
|
|
20
|
+
DatasetMetadata,
|
|
21
|
+
SourceRegistry,
|
|
22
|
+
UsageProfile,
|
|
23
|
+
UsageRegistry,
|
|
24
|
+
get_usage_view,
|
|
25
|
+
load_dataset,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"DataManager",
|
|
30
|
+
"DatasetMetadata",
|
|
31
|
+
"SourceRegistry",
|
|
32
|
+
"UsageProfile",
|
|
33
|
+
"UsageRegistry",
|
|
34
|
+
"get_usage_view",
|
|
35
|
+
"load_dataset",
|
|
36
|
+
"__version__",
|
|
37
|
+
]
|
sage/data/__init__.pyc
ADDED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
sage/data/cli.pyc
ADDED
|
Binary file
|
sage/data/manager.pyc
ADDED
|
Binary file
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Dataset source registry for SAGE data marketplace.
|
|
2
|
+
|
|
3
|
+
This package contains one subpackage per physical dataset. Each dataset remains in its
|
|
4
|
+
original location under ``sage.data``; these wrappers expose them through the new
|
|
5
|
+
"sources" layer without changing existing imports.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
from sage.data.sources.qa_base import QADataLoader
|
|
9
|
+
from sage.data.sources.agent_benchmark import AgentBenchmarkDataLoader
|
|
10
|
+
from sage.data.sources.control_plane_benchmark import ControlPlaneBenchmarkDataLoader
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
__all__ = ["agent_benchmark", "control_plane_benchmark"]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Agent Benchmark Dataset Module
|
|
3
|
+
|
|
4
|
+
This module provides tools for loading and managing the Agent Benchmark dataset,
|
|
5
|
+
which evaluates AI agent capabilities in tool selection, task planning, and timing judgment.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from agent_benchmark import AgentBenchmarkDataLoader
|
|
9
|
+
|
|
10
|
+
loader = AgentBenchmarkDataLoader()
|
|
11
|
+
stats = loader.get_stats()
|
|
12
|
+
|
|
13
|
+
for sample in loader.iter_split("tool_selection", split="dev"):
|
|
14
|
+
print(sample.instruction)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .dataloader import (
|
|
18
|
+
AgentBenchmarkDataLoader,
|
|
19
|
+
AgentBenchmarkSample,
|
|
20
|
+
GroundTruthTaskPlanning,
|
|
21
|
+
GroundTruthTimingJudgment,
|
|
22
|
+
GroundTruthToolSelection,
|
|
23
|
+
PlanStep,
|
|
24
|
+
SampleMetadata,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"AgentBenchmarkDataLoader",
|
|
29
|
+
"AgentBenchmarkSample",
|
|
30
|
+
"GroundTruthToolSelection",
|
|
31
|
+
"GroundTruthTaskPlanning",
|
|
32
|
+
"GroundTruthTimingJudgment",
|
|
33
|
+
"SampleMetadata",
|
|
34
|
+
"PlanStep",
|
|
35
|
+
]
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
name: "agent_benchmark"
|
|
2
|
+
description: "Task packs for tool picking, planning, and timing evaluation"
|
|
3
|
+
type: "benchmark"
|
|
4
|
+
format: "jsonl"
|
|
5
|
+
version: "0.1.0"
|
|
6
|
+
maintainer: "SAGE Agent Benchmark Team"
|
|
7
|
+
tags: ["agent", "benchmark", "planning", "tool_selection", "reasoning"]
|
|
8
|
+
license: "CC-BY-SA-4.0"
|
|
9
|
+
size: "~20MB"
|
|
10
|
+
|
|
11
|
+
tasks:
|
|
12
|
+
- name: "tool_selection"
|
|
13
|
+
description: "Evaluate agent's ability to select appropriate tools"
|
|
14
|
+
samples: 500
|
|
15
|
+
splits: ["train", "dev", "test"]
|
|
16
|
+
|
|
17
|
+
- name: "task_planning"
|
|
18
|
+
description: "Evaluate agent's task decomposition and sequencing"
|
|
19
|
+
samples: 300
|
|
20
|
+
splits: ["train", "dev", "test"]
|
|
21
|
+
|
|
22
|
+
- name: "timing_judgment"
|
|
23
|
+
description: "Evaluate when to use tools vs. direct answers"
|
|
24
|
+
samples: 300
|
|
25
|
+
splits: ["train", "dev", "test"]
|
|
26
|
+
|
|
27
|
+
statistics:
|
|
28
|
+
total_samples: 1100
|
|
29
|
+
train_samples: 770
|
|
30
|
+
dev_samples: 165
|
|
31
|
+
test_samples: 165
|
|
32
|
+
|
|
33
|
+
difficulty_distribution:
|
|
34
|
+
easy: "~35%"
|
|
35
|
+
medium: "~45%"
|
|
36
|
+
hard: "~20%"
|
|
37
|
+
|
|
38
|
+
citation: |
|
|
39
|
+
@dataset{agent_benchmark_2025,
|
|
40
|
+
title={Agent Benchmark: Evaluating Tool Selection, Planning, and Timing},
|
|
41
|
+
author={SAGE Team},
|
|
42
|
+
year={2025},
|
|
43
|
+
publisher={SAGE Framework}
|
|
44
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""
|
|
2
|
+
External Benchmarks for Agent Evaluation
|
|
3
|
+
|
|
4
|
+
This module provides unified access to external public benchmarks for
|
|
5
|
+
tool selection, task planning, and timing judgment evaluation.
|
|
6
|
+
|
|
7
|
+
Supported benchmarks:
|
|
8
|
+
- BFCL (Berkeley Function Calling Leaderboard)
|
|
9
|
+
- ToolBench
|
|
10
|
+
- API-Bank
|
|
11
|
+
- ToolAlpaca
|
|
12
|
+
- TaskBench
|
|
13
|
+
- MetaTool
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from .converters import (
|
|
17
|
+
APIBankConverter,
|
|
18
|
+
BFCLConverter,
|
|
19
|
+
MetaToolConverter,
|
|
20
|
+
TaskBenchConverter,
|
|
21
|
+
ToolBenchConverter,
|
|
22
|
+
)
|
|
23
|
+
from .loader import ExternalBenchmarkLoader
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"ExternalBenchmarkLoader",
|
|
27
|
+
"BFCLConverter",
|
|
28
|
+
"ToolBenchConverter",
|
|
29
|
+
"APIBankConverter",
|
|
30
|
+
"TaskBenchConverter",
|
|
31
|
+
"MetaToolConverter",
|
|
32
|
+
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Integration test for Agent Benchmark
|
|
3
|
+
|
|
4
|
+
Tests the complete end-to-end workflow including:
|
|
5
|
+
- Module import
|
|
6
|
+
- DataLoader initialization
|
|
7
|
+
- Data iteration
|
|
8
|
+
- Statistics generation
|
|
9
|
+
- Sample validation
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from sage.data.sources.agent_benchmark import (
|
|
13
|
+
AgentBenchmarkDataLoader,
|
|
14
|
+
GroundTruthTaskPlanning,
|
|
15
|
+
GroundTruthTimingJudgment,
|
|
16
|
+
GroundTruthToolSelection,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_basic_workflow():
|
|
21
|
+
"""Test basic workflow."""
|
|
22
|
+
print("=" * 70)
|
|
23
|
+
print("AGENT BENCHMARK INTEGRATION TEST")
|
|
24
|
+
print("=" * 70)
|
|
25
|
+
|
|
26
|
+
# Initialize loader
|
|
27
|
+
print("\n1. Initializing loader...")
|
|
28
|
+
loader = AgentBenchmarkDataLoader()
|
|
29
|
+
print(" ✅ Loader initialized")
|
|
30
|
+
|
|
31
|
+
# Get statistics
|
|
32
|
+
print("\n2. Getting statistics...")
|
|
33
|
+
stats = loader.get_stats()
|
|
34
|
+
print(f" ✅ Total samples: {stats['total_samples']}")
|
|
35
|
+
print(f" ✅ Task types: {len(stats['by_task_type'])}")
|
|
36
|
+
|
|
37
|
+
# Test each task type
|
|
38
|
+
print("\n3. Testing task types...")
|
|
39
|
+
|
|
40
|
+
# Tool Selection
|
|
41
|
+
print(" Testing tool_selection...")
|
|
42
|
+
ts_samples = list(loader.iter_split("tool_selection", "dev"))
|
|
43
|
+
sample = ts_samples[0]
|
|
44
|
+
gt = sample.get_typed_ground_truth()
|
|
45
|
+
assert isinstance(gt, GroundTruthToolSelection)
|
|
46
|
+
assert len(gt.top_k) > 0
|
|
47
|
+
print(f" ✅ Loaded {len(ts_samples)} samples")
|
|
48
|
+
print(f" ✅ Sample: {sample.sample_id}")
|
|
49
|
+
print(f" ✅ Tools: {gt.top_k}")
|
|
50
|
+
|
|
51
|
+
# Task Planning
|
|
52
|
+
print(" Testing task_planning...")
|
|
53
|
+
tp_samples = list(loader.iter_split("task_planning", "dev"))
|
|
54
|
+
sample = tp_samples[0]
|
|
55
|
+
gt = sample.get_typed_ground_truth()
|
|
56
|
+
assert isinstance(gt, GroundTruthTaskPlanning)
|
|
57
|
+
assert 5 <= len(gt.plan_steps) <= 10
|
|
58
|
+
print(f" ✅ Loaded {len(tp_samples)} samples")
|
|
59
|
+
print(f" ✅ Sample: {sample.sample_id}")
|
|
60
|
+
print(f" ✅ Steps: {len(gt.plan_steps)}")
|
|
61
|
+
|
|
62
|
+
# Timing Judgment
|
|
63
|
+
print(" Testing timing_judgment...")
|
|
64
|
+
tj_samples = list(loader.iter_split("timing_judgment", "dev"))
|
|
65
|
+
sample = tj_samples[0]
|
|
66
|
+
gt = sample.get_typed_ground_truth()
|
|
67
|
+
assert isinstance(gt, GroundTruthTimingJudgment)
|
|
68
|
+
assert isinstance(gt.should_call_tool, bool)
|
|
69
|
+
print(f" ✅ Loaded {len(tj_samples)} samples")
|
|
70
|
+
print(f" ✅ Sample: {sample.sample_id}")
|
|
71
|
+
print(f" ✅ Should call tool: {gt.should_call_tool}")
|
|
72
|
+
|
|
73
|
+
# Test sample retrieval
|
|
74
|
+
print("\n4. Testing sample retrieval...")
|
|
75
|
+
sample = loader.get_sample("ts_000001")
|
|
76
|
+
assert sample is not None
|
|
77
|
+
print(f" ✅ Retrieved sample: {sample.sample_id}")
|
|
78
|
+
|
|
79
|
+
# Test validation
|
|
80
|
+
print("\n5. Testing validation...")
|
|
81
|
+
errors = loader.validate_sample(sample)
|
|
82
|
+
assert len(errors) == 0
|
|
83
|
+
print(" ✅ Sample validation passed")
|
|
84
|
+
|
|
85
|
+
print("\n" + "=" * 70)
|
|
86
|
+
print("✅ ALL INTEGRATION TESTS PASSED")
|
|
87
|
+
print("=" * 70)
|
|
88
|
+
|
|
89
|
+
return True
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
if __name__ == "__main__":
|
|
93
|
+
success = test_basic_workflow()
|
|
94
|
+
exit(0 if success else 1)
|