isage-data 0.2.1.8__cp311-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. isage_data-0.2.1.8.dist-info/METADATA +135 -0
  2. isage_data-0.2.1.8.dist-info/RECORD +132 -0
  3. isage_data-0.2.1.8.dist-info/WHEEL +5 -0
  4. isage_data-0.2.1.8.dist-info/entry_points.txt +2 -0
  5. isage_data-0.2.1.8.dist-info/licenses/LICENSE +21 -0
  6. isage_data-0.2.1.8.dist-info/top_level.txt +1 -0
  7. sage/data/__init__.py +37 -0
  8. sage/data/__init__.pyc +0 -0
  9. sage/data/__pycache__/__init__.cpython-311.pyc +0 -0
  10. sage/data/__pycache__/__init__.cpython-312.pyc +0 -0
  11. sage/data/__pycache__/cli.cpython-311.pyc +0 -0
  12. sage/data/__pycache__/cli.cpython-312.pyc +0 -0
  13. sage/data/__pycache__/manager.cpython-311.pyc +0 -0
  14. sage/data/__pycache__/manager.cpython-312.pyc +0 -0
  15. sage/data/cli.pyc +0 -0
  16. sage/data/manager.pyc +0 -0
  17. sage/data/sources/__init__.py +13 -0
  18. sage/data/sources/__init__.pyc +0 -0
  19. sage/data/sources/__pycache__/__init__.cpython-311.pyc +0 -0
  20. sage/data/sources/__pycache__/__init__.cpython-312.pyc +0 -0
  21. sage/data/sources/agent_benchmark/__init__.py +35 -0
  22. sage/data/sources/agent_benchmark/__init__.pyc +0 -0
  23. sage/data/sources/agent_benchmark/dataloader.pyc +0 -0
  24. sage/data/sources/agent_benchmark/dataset.yaml +44 -0
  25. sage/data/sources/agent_benchmark/external_benchmarks/__init__.py +32 -0
  26. sage/data/sources/agent_benchmark/external_benchmarks/__init__.pyc +0 -0
  27. sage/data/sources/agent_benchmark/external_benchmarks/converters.pyc +0 -0
  28. sage/data/sources/agent_benchmark/external_benchmarks/download_all.pyc +0 -0
  29. sage/data/sources/agent_benchmark/external_benchmarks/download_apibank.pyc +0 -0
  30. sage/data/sources/agent_benchmark/external_benchmarks/download_bfcl.pyc +0 -0
  31. sage/data/sources/agent_benchmark/external_benchmarks/download_toolalpaca.pyc +0 -0
  32. sage/data/sources/agent_benchmark/external_benchmarks/download_toolbench.pyc +0 -0
  33. sage/data/sources/agent_benchmark/external_benchmarks/loader.pyc +0 -0
  34. sage/data/sources/agent_benchmark/fix_tool_references.pyc +0 -0
  35. sage/data/sources/agent_benchmark/generate_data.pyc +0 -0
  36. sage/data/sources/agent_benchmark/prepare_planning_data.pyc +0 -0
  37. sage/data/sources/agent_benchmark/prepare_runtime_data.pyc +0 -0
  38. sage/data/sources/agent_benchmark/prepare_timing_data.pyc +0 -0
  39. sage/data/sources/agent_benchmark/test_integration.py +94 -0
  40. sage/data/sources/agent_benchmark/tests/test_agent_benchmark_loader.py +353 -0
  41. sage/data/sources/agent_benchmark/validate_cross_task.pyc +0 -0
  42. sage/data/sources/agent_benchmark/validate_data.pyc +0 -0
  43. sage/data/sources/agent_sft/__init__.py +10 -0
  44. sage/data/sources/agent_sft/__init__.pyc +0 -0
  45. sage/data/sources/agent_sft/data/generate_data.pyc +0 -0
  46. sage/data/sources/agent_sft/data/prompts_template.yaml +75 -0
  47. sage/data/sources/agent_sft/dataloader.pyc +0 -0
  48. sage/data/sources/agent_sft/dataset.yaml +9 -0
  49. sage/data/sources/agent_sft/fix_tool_ids.pyc +0 -0
  50. sage/data/sources/agent_sft/schemas.pyc +0 -0
  51. sage/data/sources/agent_sft/tests/test_agent_sft_loader.py +316 -0
  52. sage/data/sources/agent_tools/__init__.py +6 -0
  53. sage/data/sources/agent_tools/__init__.pyc +0 -0
  54. sage/data/sources/agent_tools/dataloader.pyc +0 -0
  55. sage/data/sources/agent_tools/dataset.yaml +9 -0
  56. sage/data/sources/agent_tools/generate_tools.pyc +0 -0
  57. sage/data/sources/agent_tools/schemas.pyc +0 -0
  58. sage/data/sources/agent_tools/test_integration.py +108 -0
  59. sage/data/sources/agent_tools/tests/test_agent_tools_loader.py +306 -0
  60. sage/data/sources/agent_tools/validate_data.pyc +0 -0
  61. sage/data/sources/bbh/__init__.py +5 -0
  62. sage/data/sources/bbh/__init__.pyc +0 -0
  63. sage/data/sources/bbh/dataloader.pyc +0 -0
  64. sage/data/sources/bbh/dataset.yaml +9 -0
  65. sage/data/sources/control_plane_benchmark/__init__.py +41 -0
  66. sage/data/sources/control_plane_benchmark/__init__.pyc +0 -0
  67. sage/data/sources/control_plane_benchmark/dataloader.pyc +0 -0
  68. sage/data/sources/control_plane_benchmark/dataset.yaml +101 -0
  69. sage/data/sources/gpqa/__init__.py +5 -0
  70. sage/data/sources/gpqa/__init__.pyc +0 -0
  71. sage/data/sources/gpqa/dataloader.pyc +0 -0
  72. sage/data/sources/gpqa/dataset.yaml +10 -0
  73. sage/data/sources/libamm_benchmark/__init__.py +10 -0
  74. sage/data/sources/libamm_benchmark/__init__.pyc +0 -0
  75. sage/data/sources/libamm_benchmark/dataset.yaml +9 -0
  76. sage/data/sources/locomo/__init__.py +5 -0
  77. sage/data/sources/locomo/__init__.pyc +0 -0
  78. sage/data/sources/locomo/__pycache__/__init__.cpython-311.pyc +0 -0
  79. sage/data/sources/locomo/__pycache__/__init__.cpython-312.pyc +0 -0
  80. sage/data/sources/locomo/__pycache__/dataloader.cpython-311.pyc +0 -0
  81. sage/data/sources/locomo/__pycache__/dataloader.cpython-312.pyc +0 -0
  82. sage/data/sources/locomo/__pycache__/download.cpython-311.pyc +0 -0
  83. sage/data/sources/locomo/dataloader.pyc +0 -0
  84. sage/data/sources/locomo/dataset.yaml +10 -0
  85. sage/data/sources/locomo/download.pyc +0 -0
  86. sage/data/sources/locomo/locomo10.json +66751 -0
  87. sage/data/sources/longmemeval/__init__.py +5 -0
  88. sage/data/sources/longmemeval/__init__.pyc +0 -0
  89. sage/data/sources/longmemeval/compose.pyc +0 -0
  90. sage/data/sources/longmemeval/config/longmemeval_groups.yaml +15 -0
  91. sage/data/sources/longmemeval/dataloader.pyc +0 -0
  92. sage/data/sources/longmemeval/dataset.yaml +9 -0
  93. sage/data/sources/longmemeval/download.pyc +0 -0
  94. sage/data/sources/memagentbench/Conflict_Resolution.parquet +0 -0
  95. sage/data/sources/memagentbench/__init__.py +16 -0
  96. sage/data/sources/memagentbench/__init__.pyc +0 -0
  97. sage/data/sources/memagentbench/__pycache__/__init__.cpython-312.pyc +0 -0
  98. sage/data/sources/memagentbench/__pycache__/conflict_resolution_loader.cpython-312.pyc +0 -0
  99. sage/data/sources/memagentbench/__pycache__/download.cpython-312.pyc +0 -0
  100. sage/data/sources/memagentbench/conflict_resolution_loader.pyc +0 -0
  101. sage/data/sources/memagentbench/conflict_resolution_loader_test.py +169 -0
  102. sage/data/sources/memagentbench/dataset.yaml +10 -0
  103. sage/data/sources/memagentbench/download.pyc +0 -0
  104. sage/data/sources/mmlu/__init__.py +5 -0
  105. sage/data/sources/mmlu/__init__.pyc +0 -0
  106. sage/data/sources/mmlu/dataloader.pyc +0 -0
  107. sage/data/sources/mmlu/dataset.yaml +10 -0
  108. sage/data/sources/mmlu/download.pyc +0 -0
  109. sage/data/sources/orca_dpo/__init__.py +5 -0
  110. sage/data/sources/orca_dpo/__init__.pyc +0 -0
  111. sage/data/sources/orca_dpo/dataloader.pyc +0 -0
  112. sage/data/sources/qa_base/__init__.py +5 -0
  113. sage/data/sources/qa_base/__init__.pyc +0 -0
  114. sage/data/sources/qa_base/dataloader.pyc +0 -0
  115. sage/data/sources/qa_base/dataset.yaml +9 -0
  116. sage/data/sources/qa_base/qa_knowledge_base.txt +35 -0
  117. sage/data/sources/qa_base/qa_knowledge_chromaDB.txt +13 -0
  118. sage/data/sources/qa_base/sample/one_question.txt +1 -0
  119. sage/data/sources/qa_base/sample/question.txt +352 -0
  120. sage/data/sources/qa_base/sample/question1.txt +1 -0
  121. sage/data/usages/__init__.py +3 -0
  122. sage/data/usages/__init__.pyc +0 -0
  123. sage/data/usages/agent_eval/__init__.py +191 -0
  124. sage/data/usages/agent_eval/__init__.pyc +0 -0
  125. sage/data/usages/agent_eval/config.yaml +15 -0
  126. sage/data/usages/agent_eval/profiles/full_eval.yaml +15 -0
  127. sage/data/usages/agent_eval/profiles/quick_eval.yaml +11 -0
  128. sage/data/usages/agent_eval/profiles/sft_training.yaml +12 -0
  129. sage/data/usages/agent_eval/usage.yaml +8 -0
  130. sage/data/usages/libamm/config.yaml +13 -0
  131. sage/data/usages/neuromem/config.yaml +5 -0
  132. sage/data/usages/rag/config.yaml +9 -0
@@ -0,0 +1,135 @@
1
+ Metadata-Version: 2.4
2
+ Name: isage-data
3
+ Version: 0.2.1.8
4
+ Summary: SAGE Data - Unified data loaders for memory benchmark datasets (LongMemEval, Locomo, MemAgentBench, etc.)
5
+ Author-email: IntelliStream Team <shuhao_zhang@hust.edu.cn>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/intellistream/sageData
8
+ Project-URL: Repository, https://github.com/intellistream/sageData
9
+ Project-URL: Documentation, https://github.com/intellistream/sageData/blob/main/README.md
10
+ Project-URL: Issues, https://github.com/intellistream/sageData/issues
11
+ Keywords: dataset,benchmark,memory,ai,longmemeval,locomo,memagentbench,sage
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: ==3.11.*
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: isage-common>=0.2.0
26
+ Requires-Dist: pandas>=2.0.0
27
+ Requires-Dist: numpy<2.3.0,>=1.26.0
28
+ Requires-Dist: pyyaml>=6.0
29
+ Requires-Dist: datasets>=2.14.0
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
32
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
33
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
34
+ Dynamic: license-file
35
+
36
+ # SAGE Data ��
37
+
38
+ **Dataset management module for SAGE benchmark suite**
39
+
40
+ Provides unified access to multiple datasets through a two-layer architecture:
41
+ - **Sources**: Physical datasets (qa_base, bbh, mmlu, gpqa, locomo, orca_dpo)
42
+ - **Usages**: Logical views for experiments (rag, libamm, neuromem, agent_eval)
43
+
44
+ ## Quick Start
45
+
46
+ ```bash
47
+ ./quickstart.sh
48
+ source .venv/bin/activate
49
+ ```
50
+
51
+ Or manual steps:
52
+
53
+ ```python
54
+ from sage.data import DataManager
55
+
56
+ manager = DataManager.get_instance()
57
+
58
+ # Access datasets by logical usage profile
59
+ rag = manager.get_by_usage("rag")
60
+ qa_loader = rag.load("qa_base") # already instantiated
61
+ queries = qa_loader.load_queries()
62
+
63
+ # Or fetch a specific data source directly
64
+ bbh_loader = manager.get_by_source("bbh")
65
+ tasks = bbh_loader.get_task_names()
66
+ ```
67
+
68
+ ## 🛠️ CLI 使用方式(精简版)
69
+
70
+ 安装后可直接使用 `sage-data` 命令:
71
+
72
+ ```bash
73
+ sage-data list # 显示数据源状态(已下载/缺失/远程)
74
+ sage-data usage rag # 查看某个 usage 的数据映射
75
+ sage-data download locomo # 下载指定数据源(仅支持部分源)
76
+
77
+ # 选项
78
+ sage-data list --json # JSON 输出,便于脚本处理
79
+ sage-data --data-root /path # 指定自定义数据根目录
80
+ ```
81
+
82
+ 当前支持自动下载的源:`locomo`, `longmemeval`, `memagentbench`, `mmlu`。
83
+ 其他如 `gpqa`, `orca_dpo` 采用按需在线加载(Hugging Face),`qa_base`/`bbh` 等随包内置。
84
+
85
+ ## Available Datasets
86
+
87
+ | Dataset | Description | Download Required | Storage |
88
+ |---------|-------------|-------------------|---------|
89
+ | **qa_base** | Question-Answering with knowledge base | ❌ No (included) | Local files |
90
+ | **locomo** | Long-context memory benchmark | ✅ Yes (`python -m locomo.download`) | Local files (2.68MB) |
91
+ | **bbh** | BIG-Bench Hard reasoning tasks | ❌ No (included) | Local JSON files |
92
+ | **mmlu** | Massive Multitask Language Understanding | 📥 Optional (`python -m mmlu.download --all-subjects`) | On-demand or Local (~160MB) |
93
+ | **gpqa** | Graduate-Level Question Answering | ✅ Auto (Hugging Face) | On-demand (~5MB cached) |
94
+ | **orca_dpo** | Preference pairs for alignment/DPO | ✅ Auto (Hugging Face) | On-demand (varies) |
95
+
96
+ See `examples/` for detailed usage examples.
97
+
98
+ ## 📖 Examples
99
+
100
+ ```bash
101
+ python examples/qa_examples.py # QA dataset usage
102
+ python examples/locomo_examples.py # LoCoMo dataset usage
103
+ python examples/bbh_examples.py # BBH dataset usage
104
+ python examples/mmlu_examples.py # MMLU dataset usage
105
+ python examples/gpqa_examples.py # GPQA dataset usage
106
+ python examples/orca_dpo_examples.py # Orca DPO dataset usage
107
+ python examples/integration_example.py # Cross-dataset integration
108
+ ```
109
+
110
+ ## License
111
+
112
+ MIT License - see [LICENSE](LICENSE) file.
113
+
114
+ ## 🔗 Links
115
+
116
+ - **Repository**: https://github.com/intellistream/sageData
117
+ - **Issues**: https://github.com/intellistream/sageData/issues
118
+
119
+ ## ❓ Common Issues
120
+
121
+ **Q: Where's the LoCoMo data?**
122
+ A: Run `python -m locomo.download` to download it (2.68MB from Hugging Face).
123
+
124
+ **Q: How to download MMLU for offline use?**
125
+ A: Run `python -m mmlu.download --all-subjects` to download all subjects (~160MB).
126
+
127
+ **Q: GPQA access error?**
128
+ A: You need to accept the dataset terms on Hugging Face: https://huggingface.co/datasets/Idavidrein/gpqa
129
+
130
+ **Q: How to use Orca DPO for alignment research?**
131
+ A: Use `DataManager.get_by_source("orca_dpo")` to get the loader, then use `format_for_dpo()` to prepare data for training.
132
+
133
+ ---
134
+
135
+ **Version**: 0.1.0 | **Last Updated**: December 2025
@@ -0,0 +1,132 @@
1
+ isage_data-0.2.1.8.dist-info/licenses/LICENSE,sha256=8UV2hDddmN5Fm-v7EgDBMiHHtVlo04tzmbX70Ab13NU,1080
2
+ sage/data/__init__.py,sha256=JAIeBEs85Flu5CSt3cCz3B5t-5a8DXw4XUfEp0MnL-Y,803
3
+ sage/data/__init__.pyc,sha256=Hymb6L7SEX5DpbBgah6qqqbgm7vAr45oT-aeQKovCvA,1171
4
+ sage/data/cli.pyc,sha256=yT_3FR6oP0mBJ04k0E7-SDU98idPMJAzul90wowuJpU,15327
5
+ sage/data/manager.pyc,sha256=DZAOVwD27AQHmtTTwDtArg0LDjExtBmebniROEsD3xo,21527
6
+ sage/data/__pycache__/__init__.cpython-311.pyc,sha256=KOjVYD_gB-APFNOx7kJEINlpk0w4SX8P-jQRVg0mNH4,1203
7
+ sage/data/__pycache__/__init__.cpython-312.pyc,sha256=87nvnChTg2ncREvb8hron_NA5v22Cek1yjlyKMvkR4c,1059
8
+ sage/data/__pycache__/cli.cpython-311.pyc,sha256=F-cYpmGU5-eZGqOLrtM_4XlmuQq_y-OnIkB-58U3FLw,15359
9
+ sage/data/__pycache__/cli.cpython-312.pyc,sha256=8YFLF02PpZJYNuTcSfeX0EVBc8I3WXIusvAVED83dws,12834
10
+ sage/data/__pycache__/manager.cpython-311.pyc,sha256=6IHUzw3lKXjD_fSlgPwxPyIL_4fxAxTTMZf2iDuWGe0,21559
11
+ sage/data/__pycache__/manager.cpython-312.pyc,sha256=fMPGOHfjdG160B_GrvQNFP6cbRFKeIAF368Ts1iWRcc,19330
12
+ sage/data/sources/__init__.py,sha256=FuaXfYc4GX3MP9StEpARDfbo5aJxe8SI_L_bIcemkuo,567
13
+ sage/data/sources/__init__.pyc,sha256=matZe2Y3N5klDR-Yi67sjTVgVlT_B8bnYYK0RosdDIU,731
14
+ sage/data/sources/__pycache__/__init__.cpython-311.pyc,sha256=6J6kZLo5acaf0nqky81dAxrQMNSiRCZVF36t2Xq2BAk,763
15
+ sage/data/sources/__pycache__/__init__.cpython-312.pyc,sha256=PtzTCNLFzXwXz7DVGBpXAquaJnvTDWtCdpnkO7Blh3o,752
16
+ sage/data/sources/agent_benchmark/__init__.py,sha256=3QMdQ2xKOqgqpjiEfr0YgDIhcaS27JeWEOGAuwhJyqU,876
17
+ sage/data/sources/agent_benchmark/__init__.pyc,sha256=-ztFyvH-bO2kMJ93MnLQqu4sPcShcZrwbTdbaC17D60,1027
18
+ sage/data/sources/agent_benchmark/dataloader.pyc,sha256=DDDGXzQUXWb_L8hS9a7z6xK0vBeBbnJHnQfvE4lE_dY,22776
19
+ sage/data/sources/agent_benchmark/dataset.yaml,sha256=QpU2qb7m90BrteXlXAIaT7CC-Fwphsnt2a0YtOhIIi0,1143
20
+ sage/data/sources/agent_benchmark/fix_tool_references.pyc,sha256=jy0j2tf260SGEH25XLLD_fUzoatH2b47GRbxREZzXkg,9877
21
+ sage/data/sources/agent_benchmark/generate_data.pyc,sha256=TOMQXUkRZ2KqM14HnZeXPCTz1YqerJiypSaxQarSoYI,12316
22
+ sage/data/sources/agent_benchmark/prepare_planning_data.pyc,sha256=jC-9mYlQN7yhepVm67W0hBzYndM4EvjF2A3ECpzYid8,26682
23
+ sage/data/sources/agent_benchmark/prepare_runtime_data.pyc,sha256=vJhavn-yedF1-lEOCTc50cglu2Wr2NU4VU2KF3TUcfE,23769
24
+ sage/data/sources/agent_benchmark/prepare_timing_data.pyc,sha256=VRWZHeqq1xcdrwft0XNPgmK_28ig4_HwX8M0-NN5lF8,27276
25
+ sage/data/sources/agent_benchmark/test_integration.py,sha256=o5b6zcxzjbVQNdO2HDm-l0xl7U62xul46gdVD2tGXlw,2890
26
+ sage/data/sources/agent_benchmark/validate_cross_task.pyc,sha256=OSCft6fCkiB0GzfBIbCDfYzNnNYrztXA4D0fRThmfZA,8246
27
+ sage/data/sources/agent_benchmark/validate_data.pyc,sha256=6ly30q7Uarjoop0-X-K3slhvtAxwUOFax2AL_AkODiY,15587
28
+ sage/data/sources/agent_benchmark/external_benchmarks/__init__.py,sha256=TbcZ7BAxM98mJMgiDC7VjBYhGZ01KLH538MCovD4FsM,665
29
+ sage/data/sources/agent_benchmark/external_benchmarks/__init__.pyc,sha256=U_rYEx3oMlJDDGOu5s-Zk3wWcgHxXHLUB3u9OyVXfV4,870
30
+ sage/data/sources/agent_benchmark/external_benchmarks/converters.pyc,sha256=ifLYhufKTmaNAGe7_CnE83N4vAONY_gBKBjpzwTCHAE,22570
31
+ sage/data/sources/agent_benchmark/external_benchmarks/download_all.pyc,sha256=rupmuv2R-EKVmY_oHZeJVo6KG2oaUFMtTnFcQSvWVLE,7066
32
+ sage/data/sources/agent_benchmark/external_benchmarks/download_apibank.pyc,sha256=xmOzVaRR01IxN1o6zojFFhmoqvYyoWrapqxu6oQ_3ak,26225
33
+ sage/data/sources/agent_benchmark/external_benchmarks/download_bfcl.pyc,sha256=bDlsfkiisUqr2zd5RyXwczVfE78TEpPdN32e0QcVycA,6239
34
+ sage/data/sources/agent_benchmark/external_benchmarks/download_toolalpaca.pyc,sha256=92tVhXVKcd_c2tS6JWKdRABLcIycdPGqkTGUgMPmVTE,30759
35
+ sage/data/sources/agent_benchmark/external_benchmarks/download_toolbench.pyc,sha256=4kLLVjjGxO5NVTXYis-lJV8vDZkFZ4_jVrS9_60OoEs,6422
36
+ sage/data/sources/agent_benchmark/external_benchmarks/loader.pyc,sha256=Cf4KcGb0433t7LRA2JE-FYgZGFiArOsHRutSskA4R10,14374
37
+ sage/data/sources/agent_benchmark/tests/test_agent_benchmark_loader.py,sha256=YjJ4LV6Ui0d_hDUQJU0CfMMrLAE8z8oaQdAR00bJMMw,13130
38
+ sage/data/sources/agent_sft/__init__.py,sha256=GYiOa-F46Ulg7u3B4FdBQay1B_tFdLw_mXLHhptSMDk,250
39
+ sage/data/sources/agent_sft/__init__.pyc,sha256=y8Fcax2VijAPG4gLeuq5A-hCsw6W3UqMDlSB1bjCPF4,433
40
+ sage/data/sources/agent_sft/dataloader.pyc,sha256=C1uYp0Ab5htHfwAEwobs9cVZCqpG9wooPihOmgUZ6jw,14036
41
+ sage/data/sources/agent_sft/dataset.yaml,sha256=87cfmcXyPDKkR_3ZMjd81Tol8JOm4fSanpehh7sltgo,227
42
+ sage/data/sources/agent_sft/fix_tool_ids.pyc,sha256=lARrp92Gi8gPEFtOW_IjKES7MwMAGSEYiz_QRsKWd7Q,8666
43
+ sage/data/sources/agent_sft/schemas.pyc,sha256=US-L1dfqMzLlRleHXpm0eqTXNj2XDRrNQgZmXEG-EfU,10473
44
+ sage/data/sources/agent_sft/data/generate_data.pyc,sha256=RQqQ_51ETTseCCjcHsT4lyHFVW3Wl-RpE7DX_X_oiS4,8823
45
+ sage/data/sources/agent_sft/data/prompts_template.yaml,sha256=McR-U4QkLEjezQ_BZXcgPI8UqBJmGUtcVaqFa049w-Y,2281
46
+ sage/data/sources/agent_sft/tests/test_agent_sft_loader.py,sha256=1CJkbQEmu6zWEQJB3Pcbtyr_bd8MxHQ7J8DCtpwdpjk,11468
47
+ sage/data/sources/agent_tools/__init__.py,sha256=kBjgRB9m5sOMhsYmTSJvI95DnjFB9wBbpqui7nMAVa0,257
48
+ sage/data/sources/agent_tools/__init__.pyc,sha256=C8uu0Kwf71TlvSJlq-e1ECSmWk935RH-Zq5Dfzxtv38,457
49
+ sage/data/sources/agent_tools/dataloader.pyc,sha256=j8qWlRhYbzNktloV4Z2lQWgmzCSNsEAglqv8s-Rl0wU,21285
50
+ sage/data/sources/agent_tools/dataset.yaml,sha256=1B8tq5CCVi9I4hX5Jt4keyom1tqw5U9q_dxzLmviCJ0,283
51
+ sage/data/sources/agent_tools/generate_tools.pyc,sha256=c0W0Uh28hYbk3dGFh_pX-rFilPqtEiW6JuYx1QPWLZY,18474
52
+ sage/data/sources/agent_tools/schemas.pyc,sha256=DduHEV2VCWav51DDvmilzBQTO2-493hmaEx8GqXcpto,16007
53
+ sage/data/sources/agent_tools/test_integration.py,sha256=aQndSqJxc5O4IZXAalTCVhdFZZcdz0iTZ2LcJz-mB88,3809
54
+ sage/data/sources/agent_tools/validate_data.pyc,sha256=7AhZxvfG3T0Rb2mxP9rm1ny5SshmDB0CCTU1KUHKKMw,13955
55
+ sage/data/sources/agent_tools/tests/test_agent_tools_loader.py,sha256=oG8Kpc70KtgOgNNFJLE6kfOyb94Sc-_-yohritFVmGk,11020
56
+ sage/data/sources/bbh/__init__.py,sha256=MBsn0xDOs52ixaEY2TrE7yPdTfw2u_CamjSEzergL3U,102
57
+ sage/data/sources/bbh/__init__.pyc,sha256=MxeqnPFPywfTJVQWTZmjeHY3yq2bfChGegms2prbiRM,278
58
+ sage/data/sources/bbh/dataloader.pyc,sha256=_xmAP3YP8hm9dyrH4MQNZDGtFO59x-GP2kCUEZ-ewoY,13158
59
+ sage/data/sources/bbh/dataset.yaml,sha256=APyN8KQasC2ytDF7WKrg-hpzkjm5T-cV8wIvCmLgHFA,245
60
+ sage/data/sources/control_plane_benchmark/__init__.py,sha256=ZfGJ0XLpBDe-iwZCpEyU4LxaYsM5Vg2-K1NVnUiNFD4,1109
61
+ sage/data/sources/control_plane_benchmark/__init__.pyc,sha256=VwucBBIGNbmC4PYhXfd9neWIKf_uTd_jzlaUnKZS8kM,1290
62
+ sage/data/sources/control_plane_benchmark/dataloader.pyc,sha256=w0-oqgF0N1Xed0IPjM6Rg9dzslQ7gElT8D6iV8vanMA,27997
63
+ sage/data/sources/control_plane_benchmark/dataset.yaml,sha256=ihLtPNU0YqGNsTFf3DoQZMTg50YsIFJBv1qJaQ-F1PQ,2597
64
+ sage/data/sources/gpqa/__init__.py,sha256=JGY29_Vh-Kq1x471soBxFWjx9nQdDbC9KmW6v3sn8Rs,105
65
+ sage/data/sources/gpqa/__init__.pyc,sha256=ZBXW5wTWnNlZmM_D7YnQjbSChp0rA6btea1h1r1VF2U,281
66
+ sage/data/sources/gpqa/dataloader.pyc,sha256=h5mqui44FdtvBUKcE6mAGQaWJ5IqjV2iLkgqPDNM0i0,12309
67
+ sage/data/sources/gpqa/dataset.yaml,sha256=_EkpdX8rtF-c4W9_E3UsZVyspYBfUKo9zp6JKcWD0Mc,361
68
+ sage/data/sources/libamm_benchmark/__init__.py,sha256=eQkaHZ8BbiIezfLOI4ntVwj_PSKTRyLLd7I6cklLsqA,277
69
+ sage/data/sources/libamm_benchmark/__init__.pyc,sha256=7oCyvdcr5eEPN1Tck4k7OA_mMYMSZKWUSwX3Q7a8TEM,322
70
+ sage/data/sources/libamm_benchmark/dataset.yaml,sha256=rMO8Ajycf8Zp3z2puQIEARH7mwd1HU92_ZAFq2G3PiQ,301
71
+ sage/data/sources/locomo/__init__.py,sha256=qvJdedYm2ly123Tca_BcQq_B1N6vrUS4_p7zzw5R_f8,111
72
+ sage/data/sources/locomo/__init__.pyc,sha256=ypZv6IlQj1t1ZKLiEDvSzMCBLcQ1hBrp0UmEfsV3b7g,287
73
+ sage/data/sources/locomo/dataloader.pyc,sha256=4YEBmv2tOmNCU8SZjsbUIrCl6j465CWkqnNHCehncF0,25683
74
+ sage/data/sources/locomo/dataset.yaml,sha256=ckxVU5A8DVAc8feeVtsVx5mtlTSN4Rd7WNCqLdlsT84,320
75
+ sage/data/sources/locomo/download.pyc,sha256=M7mO9oA4SsWmktFh1qk1RanxG1lt2Phy5XCj9k-8ylk,4640
76
+ sage/data/sources/locomo/locomo10.json,sha256=efqH6Q8ECBNDuMjevsuAqaaEK3anqlN9yf32Ueppj_Q,2805274
77
+ sage/data/sources/locomo/__pycache__/__init__.cpython-311.pyc,sha256=klg-8glD1-YPNSoA2o-u2fsPC3ZsI6aK3nfGg7kqtNs,319
78
+ sage/data/sources/locomo/__pycache__/__init__.cpython-312.pyc,sha256=YpvvVZVDYx8V3GURGEn1p_lGaP1rnr3uL4zZdbFFN90,295
79
+ sage/data/sources/locomo/__pycache__/dataloader.cpython-311.pyc,sha256=heWVnkohmJW3_sLsU2YezM3rq6HHkTPDWR-SIf-yK6w,22724
80
+ sage/data/sources/locomo/__pycache__/dataloader.cpython-312.pyc,sha256=FbY09jfsLV3VMRv-VEO_wpazeG1jhWPFJkY6rWb16NE,21880
81
+ sage/data/sources/locomo/__pycache__/download.cpython-311.pyc,sha256=OVYEDkXd0dzVIvoOcOKuOgFxeyvlywuOhTFT7lT6w5I,4672
82
+ sage/data/sources/longmemeval/__init__.py,sha256=8xdgmUEbDmoQ_492iKe_WHjNguqH0jYqC36d15nbmCc,129
83
+ sage/data/sources/longmemeval/__init__.pyc,sha256=al86XCpBE-m8xhtUV0YK-fm8v_6Vu8Qlk60M4KWarkw,302
84
+ sage/data/sources/longmemeval/compose.pyc,sha256=qfhXrZyY_qd3cx0ihKoEWi-l4ocslR1DzVHU8im0Y-0,8191
85
+ sage/data/sources/longmemeval/dataloader.pyc,sha256=j87OhZYVR_SwimmiMizHMuU5YctnoWgB1HcBLjVJxWc,22591
86
+ sage/data/sources/longmemeval/dataset.yaml,sha256=4hr1HNtNgpxJdc4uvGHanS4x1fILGwoidzz-lWSvsoM,358
87
+ sage/data/sources/longmemeval/download.pyc,sha256=Rdc9QgqZK_YZnkkn_NbgJoYdH7uuN6v27p0R_TtJTQ0,4929
88
+ sage/data/sources/longmemeval/config/longmemeval_groups.yaml,sha256=hzL5BQSsNuya_h_fkDq0fIZDD7WtyS_KoU14KnajH1o,6355
89
+ sage/data/sources/memagentbench/Conflict_Resolution.parquet,sha256=JNXD8JzgzhViXLn4qY9E8NhkymyU17StBOtpfKOl_0U,1491588
90
+ sage/data/sources/memagentbench/__init__.py,sha256=RIdteSNTrSr5S78whC4Tq-Qw-bYPESwVyA51qjMSEcw,487
91
+ sage/data/sources/memagentbench/__init__.pyc,sha256=k31EFhpJLM1iPqgMu4j5TB2SKnQmRFFtROE7Oe6zU90,665
92
+ sage/data/sources/memagentbench/conflict_resolution_loader.pyc,sha256=DyYZd_JFGOHo_0nLU6Kc00QclNoSe6peE5mP_p7-74s,15748
93
+ sage/data/sources/memagentbench/conflict_resolution_loader_test.py,sha256=e-AsKQG90zCG9L1APln4cc6L0e6y8XTd5gGUKS2hv0U,6278
94
+ sage/data/sources/memagentbench/dataset.yaml,sha256=Jma6JDkMA2L_TPEgqVWdK7P02LcYPDwZg8j1eH5iniA,363
95
+ sage/data/sources/memagentbench/download.pyc,sha256=eOW11Pa1E47-xtHyHsNBBOprYc26EXG2xLbqA6FsI-4,5227
96
+ sage/data/sources/memagentbench/__pycache__/__init__.cpython-312.pyc,sha256=Qv1RkDlR5T19PsZc4oyQtFyeSd9MOyhUoPLYTLnPZbA,671
97
+ sage/data/sources/memagentbench/__pycache__/conflict_resolution_loader.cpython-312.pyc,sha256=gMfjsLq07y_wffn8caYtfNOr1vAuy_voZpSyhK-2d-A,13775
98
+ sage/data/sources/memagentbench/__pycache__/download.cpython-312.pyc,sha256=NBTTBlLzw1K85254-swGreYxhiQj1dftt1bQ0ePAmzs,4544
99
+ sage/data/sources/mmlu/__init__.py,sha256=ZiTx_rmQA0M5z4U8vhZ5uRivldYhGNoxKeFeJVcGxPk,105
100
+ sage/data/sources/mmlu/__init__.pyc,sha256=bU64BMe3SQX63ERfxSszrqhO5CgJbgjOGRERCOl2Km4,281
101
+ sage/data/sources/mmlu/dataloader.pyc,sha256=RxXA9k1m9sxXulaBO0fEtG9W75IyenLsPH9CfHghORo,12790
102
+ sage/data/sources/mmlu/dataset.yaml,sha256=MjyP64sMOzuPdsIft_Z3oCRWFKJ12PKFXy6ruMs539w,365
103
+ sage/data/sources/mmlu/download.pyc,sha256=zhLKNiqSV-4qcYoc7CSNNkfOpSuqYS18PZrWZsllWbY,14197
104
+ sage/data/sources/orca_dpo/__init__.py,sha256=LN1evirvfo7ZZN0hHzg_YSdm5m3ADxMHsNi-jL8IC6s,139
105
+ sage/data/sources/orca_dpo/__init__.pyc,sha256=pNeWRFaR2TIKa7YL1T5u3lQ31cQYZVFrmQN8DbBkRjQ,316
106
+ sage/data/sources/orca_dpo/dataloader.pyc,sha256=Y5b5eCQy015KQSTUX9ZYfXEZsAJ3oFd2wN6ckfEIIZA,13973
107
+ sage/data/sources/qa_base/__init__.py,sha256=VUdivRVm5NVYlC-RfkQinmZ-Goz2kusfI5LUbX1OuyI,104
108
+ sage/data/sources/qa_base/__init__.pyc,sha256=Vq0YqoGnaATWlDHtjtkcC9VjKI274l0Fiesw50JUQ8Y,285
109
+ sage/data/sources/qa_base/dataloader.pyc,sha256=XoUlfhXF3vLrjbDOOV7lzV5X4R0ApxpHmqZY8REi3UU,13358
110
+ sage/data/sources/qa_base/dataset.yaml,sha256=3Wm5JVoSZ2Bi9SA9uD3Lzh1EI2YlI_AudAz49zFjPok,259
111
+ sage/data/sources/qa_base/qa_knowledge_base.txt,sha256=MrTjFQBYyv-QvxyO91dgoDB8k_Zf50V8xfmE8DQezc4,1360
112
+ sage/data/sources/qa_base/qa_knowledge_chromaDB.txt,sha256=7NKnW06tuOyngh98cOjo3c4R0Du2QTy_J5UZVdAMtW0,580
113
+ sage/data/sources/qa_base/sample/one_question.txt,sha256=W7xWZlO1xhfAt9W53NIl48q-TH5jnABT8xR_H9Ak5oE,87
114
+ sage/data/sources/qa_base/sample/question.txt,sha256=VjX0DuAz4BzlpExlPawK57dIQuhFquoclqXgvMPE2gg,29984
115
+ sage/data/sources/qa_base/sample/question1.txt,sha256=GSB3Gq63t1lBKFPX9CyqkhfqE_bKe_VmUyjXUVxi4qs,23
116
+ sage/data/usages/__init__.py,sha256=jHd4op-nfNtvAjr0RekGzlfzODt-RS6pZwkcZ3Zea6k,84
117
+ sage/data/usages/__init__.pyc,sha256=2k6UBW-oED5nZJJ8U49_BC3xZ2m9Em5nlYW3RYcq-tA,230
118
+ sage/data/usages/agent_eval/__init__.py,sha256=0LvUWT-WRMiUwUm14LobIcDkydd0-DTnJ3rj2doPPe0,6117
119
+ sage/data/usages/agent_eval/__init__.pyc,sha256=y1cKUK4dKEopbjVpw4CYarUU9u3SyYKS86alsom3Qj8,8152
120
+ sage/data/usages/agent_eval/config.yaml,sha256=wD1GZoNmnBGMARohvGa1ABjkDCTEnoM-ksOb3LksIjw,459
121
+ sage/data/usages/agent_eval/usage.yaml,sha256=m9j0RPnSm7AJ-_quIDw2SfN7Z0fC0zD35klDw1-esrc,203
122
+ sage/data/usages/agent_eval/profiles/full_eval.yaml,sha256=zKQkGRB3cPtFJJ-416FjDwgSOckl6DSNTwtPcKkQlGY,330
123
+ sage/data/usages/agent_eval/profiles/quick_eval.yaml,sha256=zcnCI-c1oga4vI5xryW8tZGqeZsfF20AXTGmw-p0BpU,235
124
+ sage/data/usages/agent_eval/profiles/sft_training.yaml,sha256=OfsWnUNC0qY98nhG5ZgmLgi4lph6FqU3hSm98RFCn4o,229
125
+ sage/data/usages/libamm/config.yaml,sha256=HorGUurHUMqtwPs8UbXm_N0dOmY61ABJy8nzzE7DSXI,409
126
+ sage/data/usages/neuromem/config.yaml,sha256=A7E7fNYAaeZDzKj76Bpyh6sIKqqvTRzv14MSIX9TuZY,225
127
+ sage/data/usages/rag/config.yaml,sha256=lhxfqBy3X3Y52NkgsJ-w4Hk_ZaAQxEDJFpTByNpN5O0,251
128
+ isage_data-0.2.1.8.dist-info/METADATA,sha256=A1RfNU6fowsWICnjXGnw-sLqktQhEIIuS78p5Bua880,5136
129
+ isage_data-0.2.1.8.dist-info/WHEEL,sha256=27snaH8EChr9VGIQt_981R5IOTPR-vQPuJNW-WzhNJA,93
130
+ isage_data-0.2.1.8.dist-info/entry_points.txt,sha256=8ZU2OS18xxsbtaHiDnvuHutBT73yldAJp4gURFcwx6E,49
131
+ isage_data-0.2.1.8.dist-info/top_level.txt,sha256=hibFyzQHiLOMK68qL1OWsNKaXOmSXqZjeLTBem6Yy7I,5
132
+ isage_data-0.2.1.8.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: cp311-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ sage-data = sage.data.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024-2026 IntelliStream Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ sage
sage/data/__init__.py ADDED
@@ -0,0 +1,37 @@
1
+ """SAGE Data - Shared dataset library with two-layer architecture."""
2
+
3
+ from importlib import metadata as _metadata
4
+
5
+
6
+ def _resolve_version() -> str:
7
+ """Return installed package version with graceful fallbacks."""
8
+ for distribution in ("sage-data", "sage-benchmark"):
9
+ try:
10
+ return _metadata.version(distribution)
11
+ except _metadata.PackageNotFoundError:
12
+ continue
13
+ return "0.0.0-dev"
14
+
15
+
16
+ __version__ = _resolve_version()
17
+
18
+ from .manager import (
19
+ DataManager,
20
+ DatasetMetadata,
21
+ SourceRegistry,
22
+ UsageProfile,
23
+ UsageRegistry,
24
+ get_usage_view,
25
+ load_dataset,
26
+ )
27
+
28
+ __all__ = [
29
+ "DataManager",
30
+ "DatasetMetadata",
31
+ "SourceRegistry",
32
+ "UsageProfile",
33
+ "UsageRegistry",
34
+ "get_usage_view",
35
+ "load_dataset",
36
+ "__version__",
37
+ ]
sage/data/__init__.pyc ADDED
Binary file
sage/data/cli.pyc ADDED
Binary file
sage/data/manager.pyc ADDED
Binary file
@@ -0,0 +1,13 @@
1
+ """Dataset source registry for SAGE data marketplace.
2
+
3
+ This package contains one subpackage per physical dataset. Each dataset remains in its
4
+ original location under ``sage.data``; these wrappers expose them through the new
5
+ "sources" layer without changing existing imports.
6
+
7
+ Example:
8
+ from sage.data.sources.qa_base import QADataLoader
9
+ from sage.data.sources.agent_benchmark import AgentBenchmarkDataLoader
10
+ from sage.data.sources.control_plane_benchmark import ControlPlaneBenchmarkDataLoader
11
+ """
12
+
13
+ __all__ = ["agent_benchmark", "control_plane_benchmark"]
Binary file
@@ -0,0 +1,35 @@
1
+ """
2
+ Agent Benchmark Dataset Module
3
+
4
+ This module provides tools for loading and managing the Agent Benchmark dataset,
5
+ which evaluates AI agent capabilities in tool selection, task planning, and timing judgment.
6
+
7
+ Usage:
8
+ from agent_benchmark import AgentBenchmarkDataLoader
9
+
10
+ loader = AgentBenchmarkDataLoader()
11
+ stats = loader.get_stats()
12
+
13
+ for sample in loader.iter_split("tool_selection", split="dev"):
14
+ print(sample.instruction)
15
+ """
16
+
17
+ from .dataloader import (
18
+ AgentBenchmarkDataLoader,
19
+ AgentBenchmarkSample,
20
+ GroundTruthTaskPlanning,
21
+ GroundTruthTimingJudgment,
22
+ GroundTruthToolSelection,
23
+ PlanStep,
24
+ SampleMetadata,
25
+ )
26
+
27
+ __all__ = [
28
+ "AgentBenchmarkDataLoader",
29
+ "AgentBenchmarkSample",
30
+ "GroundTruthToolSelection",
31
+ "GroundTruthTaskPlanning",
32
+ "GroundTruthTimingJudgment",
33
+ "SampleMetadata",
34
+ "PlanStep",
35
+ ]
@@ -0,0 +1,44 @@
1
+ name: "agent_benchmark"
2
+ description: "Task packs for tool picking, planning, and timing evaluation"
3
+ type: "benchmark"
4
+ format: "jsonl"
5
+ version: "0.1.0"
6
+ maintainer: "SAGE Agent Benchmark Team"
7
+ tags: ["agent", "benchmark", "planning", "tool_selection", "reasoning"]
8
+ license: "CC-BY-SA-4.0"
9
+ size: "~20MB"
10
+
11
+ tasks:
12
+ - name: "tool_selection"
13
+ description: "Evaluate agent's ability to select appropriate tools"
14
+ samples: 500
15
+ splits: ["train", "dev", "test"]
16
+
17
+ - name: "task_planning"
18
+ description: "Evaluate agent's task decomposition and sequencing"
19
+ samples: 300
20
+ splits: ["train", "dev", "test"]
21
+
22
+ - name: "timing_judgment"
23
+ description: "Evaluate when to use tools vs. direct answers"
24
+ samples: 300
25
+ splits: ["train", "dev", "test"]
26
+
27
+ statistics:
28
+ total_samples: 1100
29
+ train_samples: 770
30
+ dev_samples: 165
31
+ test_samples: 165
32
+
33
+ difficulty_distribution:
34
+ easy: "~35%"
35
+ medium: "~45%"
36
+ hard: "~20%"
37
+
38
+ citation: |
39
+ @dataset{agent_benchmark_2025,
40
+ title={Agent Benchmark: Evaluating Tool Selection, Planning, and Timing},
41
+ author={SAGE Team},
42
+ year={2025},
43
+ publisher={SAGE Framework}
44
+ }
@@ -0,0 +1,32 @@
1
+ """
2
+ External Benchmarks for Agent Evaluation
3
+
4
+ This module provides unified access to external public benchmarks for
5
+ tool selection, task planning, and timing judgment evaluation.
6
+
7
+ Supported benchmarks:
8
+ - BFCL (Berkeley Function Calling Leaderboard)
9
+ - ToolBench
10
+ - API-Bank
11
+ - ToolAlpaca
12
+ - TaskBench
13
+ - MetaTool
14
+ """
15
+
16
+ from .converters import (
17
+ APIBankConverter,
18
+ BFCLConverter,
19
+ MetaToolConverter,
20
+ TaskBenchConverter,
21
+ ToolBenchConverter,
22
+ )
23
+ from .loader import ExternalBenchmarkLoader
24
+
25
+ __all__ = [
26
+ "ExternalBenchmarkLoader",
27
+ "BFCLConverter",
28
+ "ToolBenchConverter",
29
+ "APIBankConverter",
30
+ "TaskBenchConverter",
31
+ "MetaToolConverter",
32
+ ]
@@ -0,0 +1,94 @@
1
+ """
2
+ Integration test for Agent Benchmark
3
+
4
+ Tests the complete end-to-end workflow including:
5
+ - Module import
6
+ - DataLoader initialization
7
+ - Data iteration
8
+ - Statistics generation
9
+ - Sample validation
10
+ """
11
+
12
+ from sage.data.sources.agent_benchmark import (
13
+ AgentBenchmarkDataLoader,
14
+ GroundTruthTaskPlanning,
15
+ GroundTruthTimingJudgment,
16
+ GroundTruthToolSelection,
17
+ )
18
+
19
+
20
+ def test_basic_workflow():
21
+ """Test basic workflow."""
22
+ print("=" * 70)
23
+ print("AGENT BENCHMARK INTEGRATION TEST")
24
+ print("=" * 70)
25
+
26
+ # Initialize loader
27
+ print("\n1. Initializing loader...")
28
+ loader = AgentBenchmarkDataLoader()
29
+ print(" ✅ Loader initialized")
30
+
31
+ # Get statistics
32
+ print("\n2. Getting statistics...")
33
+ stats = loader.get_stats()
34
+ print(f" ✅ Total samples: {stats['total_samples']}")
35
+ print(f" ✅ Task types: {len(stats['by_task_type'])}")
36
+
37
+ # Test each task type
38
+ print("\n3. Testing task types...")
39
+
40
+ # Tool Selection
41
+ print(" Testing tool_selection...")
42
+ ts_samples = list(loader.iter_split("tool_selection", "dev"))
43
+ sample = ts_samples[0]
44
+ gt = sample.get_typed_ground_truth()
45
+ assert isinstance(gt, GroundTruthToolSelection)
46
+ assert len(gt.top_k) > 0
47
+ print(f" ✅ Loaded {len(ts_samples)} samples")
48
+ print(f" ✅ Sample: {sample.sample_id}")
49
+ print(f" ✅ Tools: {gt.top_k}")
50
+
51
+ # Task Planning
52
+ print(" Testing task_planning...")
53
+ tp_samples = list(loader.iter_split("task_planning", "dev"))
54
+ sample = tp_samples[0]
55
+ gt = sample.get_typed_ground_truth()
56
+ assert isinstance(gt, GroundTruthTaskPlanning)
57
+ assert 5 <= len(gt.plan_steps) <= 10
58
+ print(f" ✅ Loaded {len(tp_samples)} samples")
59
+ print(f" ✅ Sample: {sample.sample_id}")
60
+ print(f" ✅ Steps: {len(gt.plan_steps)}")
61
+
62
+ # Timing Judgment
63
+ print(" Testing timing_judgment...")
64
+ tj_samples = list(loader.iter_split("timing_judgment", "dev"))
65
+ sample = tj_samples[0]
66
+ gt = sample.get_typed_ground_truth()
67
+ assert isinstance(gt, GroundTruthTimingJudgment)
68
+ assert isinstance(gt.should_call_tool, bool)
69
+ print(f" ✅ Loaded {len(tj_samples)} samples")
70
+ print(f" ✅ Sample: {sample.sample_id}")
71
+ print(f" ✅ Should call tool: {gt.should_call_tool}")
72
+
73
+ # Test sample retrieval
74
+ print("\n4. Testing sample retrieval...")
75
+ sample = loader.get_sample("ts_000001")
76
+ assert sample is not None
77
+ print(f" ✅ Retrieved sample: {sample.sample_id}")
78
+
79
+ # Test validation
80
+ print("\n5. Testing validation...")
81
+ errors = loader.validate_sample(sample)
82
+ assert len(errors) == 0
83
+ print(" ✅ Sample validation passed")
84
+
85
+ print("\n" + "=" * 70)
86
+ print("✅ ALL INTEGRATION TESTS PASSED")
87
+ print("=" * 70)
88
+
89
+ return True
90
+
91
+
92
+ if __name__ == "__main__":
93
+ success = test_basic_workflow()
94
+ exit(0 if success else 1)