isage-data 0.2.1.8__cp311-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. isage_data-0.2.1.8.dist-info/METADATA +135 -0
  2. isage_data-0.2.1.8.dist-info/RECORD +132 -0
  3. isage_data-0.2.1.8.dist-info/WHEEL +5 -0
  4. isage_data-0.2.1.8.dist-info/entry_points.txt +2 -0
  5. isage_data-0.2.1.8.dist-info/licenses/LICENSE +21 -0
  6. isage_data-0.2.1.8.dist-info/top_level.txt +1 -0
  7. sage/data/__init__.py +37 -0
  8. sage/data/__init__.pyc +0 -0
  9. sage/data/__pycache__/__init__.cpython-311.pyc +0 -0
  10. sage/data/__pycache__/__init__.cpython-312.pyc +0 -0
  11. sage/data/__pycache__/cli.cpython-311.pyc +0 -0
  12. sage/data/__pycache__/cli.cpython-312.pyc +0 -0
  13. sage/data/__pycache__/manager.cpython-311.pyc +0 -0
  14. sage/data/__pycache__/manager.cpython-312.pyc +0 -0
  15. sage/data/cli.pyc +0 -0
  16. sage/data/manager.pyc +0 -0
  17. sage/data/sources/__init__.py +13 -0
  18. sage/data/sources/__init__.pyc +0 -0
  19. sage/data/sources/__pycache__/__init__.cpython-311.pyc +0 -0
  20. sage/data/sources/__pycache__/__init__.cpython-312.pyc +0 -0
  21. sage/data/sources/agent_benchmark/__init__.py +35 -0
  22. sage/data/sources/agent_benchmark/__init__.pyc +0 -0
  23. sage/data/sources/agent_benchmark/dataloader.pyc +0 -0
  24. sage/data/sources/agent_benchmark/dataset.yaml +44 -0
  25. sage/data/sources/agent_benchmark/external_benchmarks/__init__.py +32 -0
  26. sage/data/sources/agent_benchmark/external_benchmarks/__init__.pyc +0 -0
  27. sage/data/sources/agent_benchmark/external_benchmarks/converters.pyc +0 -0
  28. sage/data/sources/agent_benchmark/external_benchmarks/download_all.pyc +0 -0
  29. sage/data/sources/agent_benchmark/external_benchmarks/download_apibank.pyc +0 -0
  30. sage/data/sources/agent_benchmark/external_benchmarks/download_bfcl.pyc +0 -0
  31. sage/data/sources/agent_benchmark/external_benchmarks/download_toolalpaca.pyc +0 -0
  32. sage/data/sources/agent_benchmark/external_benchmarks/download_toolbench.pyc +0 -0
  33. sage/data/sources/agent_benchmark/external_benchmarks/loader.pyc +0 -0
  34. sage/data/sources/agent_benchmark/fix_tool_references.pyc +0 -0
  35. sage/data/sources/agent_benchmark/generate_data.pyc +0 -0
  36. sage/data/sources/agent_benchmark/prepare_planning_data.pyc +0 -0
  37. sage/data/sources/agent_benchmark/prepare_runtime_data.pyc +0 -0
  38. sage/data/sources/agent_benchmark/prepare_timing_data.pyc +0 -0
  39. sage/data/sources/agent_benchmark/test_integration.py +94 -0
  40. sage/data/sources/agent_benchmark/tests/test_agent_benchmark_loader.py +353 -0
  41. sage/data/sources/agent_benchmark/validate_cross_task.pyc +0 -0
  42. sage/data/sources/agent_benchmark/validate_data.pyc +0 -0
  43. sage/data/sources/agent_sft/__init__.py +10 -0
  44. sage/data/sources/agent_sft/__init__.pyc +0 -0
  45. sage/data/sources/agent_sft/data/generate_data.pyc +0 -0
  46. sage/data/sources/agent_sft/data/prompts_template.yaml +75 -0
  47. sage/data/sources/agent_sft/dataloader.pyc +0 -0
  48. sage/data/sources/agent_sft/dataset.yaml +9 -0
  49. sage/data/sources/agent_sft/fix_tool_ids.pyc +0 -0
  50. sage/data/sources/agent_sft/schemas.pyc +0 -0
  51. sage/data/sources/agent_sft/tests/test_agent_sft_loader.py +316 -0
  52. sage/data/sources/agent_tools/__init__.py +6 -0
  53. sage/data/sources/agent_tools/__init__.pyc +0 -0
  54. sage/data/sources/agent_tools/dataloader.pyc +0 -0
  55. sage/data/sources/agent_tools/dataset.yaml +9 -0
  56. sage/data/sources/agent_tools/generate_tools.pyc +0 -0
  57. sage/data/sources/agent_tools/schemas.pyc +0 -0
  58. sage/data/sources/agent_tools/test_integration.py +108 -0
  59. sage/data/sources/agent_tools/tests/test_agent_tools_loader.py +306 -0
  60. sage/data/sources/agent_tools/validate_data.pyc +0 -0
  61. sage/data/sources/bbh/__init__.py +5 -0
  62. sage/data/sources/bbh/__init__.pyc +0 -0
  63. sage/data/sources/bbh/dataloader.pyc +0 -0
  64. sage/data/sources/bbh/dataset.yaml +9 -0
  65. sage/data/sources/control_plane_benchmark/__init__.py +41 -0
  66. sage/data/sources/control_plane_benchmark/__init__.pyc +0 -0
  67. sage/data/sources/control_plane_benchmark/dataloader.pyc +0 -0
  68. sage/data/sources/control_plane_benchmark/dataset.yaml +101 -0
  69. sage/data/sources/gpqa/__init__.py +5 -0
  70. sage/data/sources/gpqa/__init__.pyc +0 -0
  71. sage/data/sources/gpqa/dataloader.pyc +0 -0
  72. sage/data/sources/gpqa/dataset.yaml +10 -0
  73. sage/data/sources/libamm_benchmark/__init__.py +10 -0
  74. sage/data/sources/libamm_benchmark/__init__.pyc +0 -0
  75. sage/data/sources/libamm_benchmark/dataset.yaml +9 -0
  76. sage/data/sources/locomo/__init__.py +5 -0
  77. sage/data/sources/locomo/__init__.pyc +0 -0
  78. sage/data/sources/locomo/__pycache__/__init__.cpython-311.pyc +0 -0
  79. sage/data/sources/locomo/__pycache__/__init__.cpython-312.pyc +0 -0
  80. sage/data/sources/locomo/__pycache__/dataloader.cpython-311.pyc +0 -0
  81. sage/data/sources/locomo/__pycache__/dataloader.cpython-312.pyc +0 -0
  82. sage/data/sources/locomo/__pycache__/download.cpython-311.pyc +0 -0
  83. sage/data/sources/locomo/dataloader.pyc +0 -0
  84. sage/data/sources/locomo/dataset.yaml +10 -0
  85. sage/data/sources/locomo/download.pyc +0 -0
  86. sage/data/sources/locomo/locomo10.json +66751 -0
  87. sage/data/sources/longmemeval/__init__.py +5 -0
  88. sage/data/sources/longmemeval/__init__.pyc +0 -0
  89. sage/data/sources/longmemeval/compose.pyc +0 -0
  90. sage/data/sources/longmemeval/config/longmemeval_groups.yaml +15 -0
  91. sage/data/sources/longmemeval/dataloader.pyc +0 -0
  92. sage/data/sources/longmemeval/dataset.yaml +9 -0
  93. sage/data/sources/longmemeval/download.pyc +0 -0
  94. sage/data/sources/memagentbench/Conflict_Resolution.parquet +0 -0
  95. sage/data/sources/memagentbench/__init__.py +16 -0
  96. sage/data/sources/memagentbench/__init__.pyc +0 -0
  97. sage/data/sources/memagentbench/__pycache__/__init__.cpython-312.pyc +0 -0
  98. sage/data/sources/memagentbench/__pycache__/conflict_resolution_loader.cpython-312.pyc +0 -0
  99. sage/data/sources/memagentbench/__pycache__/download.cpython-312.pyc +0 -0
  100. sage/data/sources/memagentbench/conflict_resolution_loader.pyc +0 -0
  101. sage/data/sources/memagentbench/conflict_resolution_loader_test.py +169 -0
  102. sage/data/sources/memagentbench/dataset.yaml +10 -0
  103. sage/data/sources/memagentbench/download.pyc +0 -0
  104. sage/data/sources/mmlu/__init__.py +5 -0
  105. sage/data/sources/mmlu/__init__.pyc +0 -0
  106. sage/data/sources/mmlu/dataloader.pyc +0 -0
  107. sage/data/sources/mmlu/dataset.yaml +10 -0
  108. sage/data/sources/mmlu/download.pyc +0 -0
  109. sage/data/sources/orca_dpo/__init__.py +5 -0
  110. sage/data/sources/orca_dpo/__init__.pyc +0 -0
  111. sage/data/sources/orca_dpo/dataloader.pyc +0 -0
  112. sage/data/sources/qa_base/__init__.py +5 -0
  113. sage/data/sources/qa_base/__init__.pyc +0 -0
  114. sage/data/sources/qa_base/dataloader.pyc +0 -0
  115. sage/data/sources/qa_base/dataset.yaml +9 -0
  116. sage/data/sources/qa_base/qa_knowledge_base.txt +35 -0
  117. sage/data/sources/qa_base/qa_knowledge_chromaDB.txt +13 -0
  118. sage/data/sources/qa_base/sample/one_question.txt +1 -0
  119. sage/data/sources/qa_base/sample/question.txt +352 -0
  120. sage/data/sources/qa_base/sample/question1.txt +1 -0
  121. sage/data/usages/__init__.py +3 -0
  122. sage/data/usages/__init__.pyc +0 -0
  123. sage/data/usages/agent_eval/__init__.py +191 -0
  124. sage/data/usages/agent_eval/__init__.pyc +0 -0
  125. sage/data/usages/agent_eval/config.yaml +15 -0
  126. sage/data/usages/agent_eval/profiles/full_eval.yaml +15 -0
  127. sage/data/usages/agent_eval/profiles/quick_eval.yaml +11 -0
  128. sage/data/usages/agent_eval/profiles/sft_training.yaml +12 -0
  129. sage/data/usages/agent_eval/usage.yaml +8 -0
  130. sage/data/usages/libamm/config.yaml +13 -0
  131. sage/data/usages/neuromem/config.yaml +5 -0
  132. sage/data/usages/rag/config.yaml +9 -0
@@ -0,0 +1,306 @@
1
+ """
2
+ Unit tests for Agent Tools DataLoader
3
+
4
+ Tests cover:
5
+ - Data loading and validation
6
+ - Tool ID format validation
7
+ - Search and retrieval operations
8
+ - Category indexing
9
+ - Deduplication checks
10
+ - Coverage metrics
11
+ """
12
+
13
+ import re
14
+
15
+ import pytest
16
+
17
+ from sage.data.sources.agent_tools import AgentToolRecord, AgentToolsDataLoader
18
+
19
+
20
+ class TestAgentToolsDataLoader:
21
+ """Test suite for AgentToolsDataLoader."""
22
+
23
+ @pytest.fixture
24
+ def loader(self):
25
+ """Create a loader instance for testing."""
26
+ return AgentToolsDataLoader()
27
+
28
+ def test_loader_initialization(self, loader):
29
+ """Test that loader initializes successfully."""
30
+ assert loader is not None
31
+ assert len(loader) > 0
32
+ assert loader.get_total_tools() > 0
33
+
34
+ def test_minimum_tool_count(self, loader):
35
+ """Test that we have at least 1000 tools."""
36
+ assert len(loader) >= 1000, f"Expected >= 1000 tools, got {len(loader)}"
37
+
38
+ def test_tool_id_format(self, loader):
39
+ """Test that all tool_ids match required regex pattern."""
40
+ pattern = re.compile(r"^[a-z]+(_[a-z]+)*_[0-9]{3}$")
41
+
42
+ invalid_ids = []
43
+ for tool_id in loader.list_tool_ids():
44
+ if not pattern.match(tool_id):
45
+ invalid_ids.append(tool_id)
46
+
47
+ assert len(invalid_ids) == 0, f"Invalid tool_ids found: {invalid_ids[:10]}"
48
+
49
+ def test_tool_id_uniqueness(self, loader):
50
+ """Test that all tool_ids are unique."""
51
+ tool_ids = loader.list_tool_ids()
52
+ assert len(tool_ids) == len(set(tool_ids)), "Duplicate tool_ids found"
53
+
54
+ def test_tool_name_uniqueness(self, loader):
55
+ """Test that all tool names are unique."""
56
+ names = [tool.name for tool in loader.tools.values()]
57
+ duplicates = [name for name in names if names.count(name) > 1]
58
+ assert len(duplicates) == 0, f"Duplicate names found: {set(duplicates)}"
59
+
60
+ def test_get_tool(self, loader):
61
+ """Test getting a tool by ID."""
62
+ # Get first tool_id
63
+ tool_ids = loader.list_tool_ids()
64
+ assert len(tool_ids) > 0
65
+
66
+ tool = loader.get_tool(tool_ids[0])
67
+ assert isinstance(tool, AgentToolRecord)
68
+ assert tool.tool_id == tool_ids[0]
69
+
70
+ def test_get_tool_invalid_id(self, loader):
71
+ """Test that getting invalid tool_id raises KeyError."""
72
+ with pytest.raises(KeyError):
73
+ loader.get_tool("nonexistent_tool_999")
74
+
75
+ def test_capabilities_non_empty(self, loader):
76
+ """Test that all tools have non-empty capabilities."""
77
+ for tool in loader.tools.values():
78
+ assert len(tool.capabilities) > 0, f"Tool {tool.tool_id} has empty capabilities"
79
+
80
+ def test_category_format(self, loader):
81
+ """Test that all categories follow path format."""
82
+ category_pattern = re.compile(r"^[a-z]+(/[a-z_]+)*$")
83
+
84
+ invalid_categories = []
85
+ for tool in loader.tools.values():
86
+ if not category_pattern.match(tool.category):
87
+ invalid_categories.append(tool.category)
88
+
89
+ assert len(invalid_categories) == 0, f"Invalid categories: {set(invalid_categories)}"
90
+
91
+ def test_category_index(self, loader):
92
+ """Test that category index is built correctly."""
93
+ categories = loader.get_categories()
94
+ assert len(categories) > 0
95
+
96
+ # Test that each category has tools
97
+ for category in categories:
98
+ tools_in_cat = list(loader.iter_category(category))
99
+ assert len(tools_in_cat) > 0, f"Category {category} has no tools"
100
+
101
+ def test_search_by_capability(self, loader):
102
+ """Test capability-based search."""
103
+ # Search for common capability
104
+ results = loader.search_by_capability("forecast", top_k=10)
105
+ assert len(results) > 0, "No tools found with 'forecast' capability"
106
+
107
+ # Verify results have matching capability
108
+ for tool in results:
109
+ assert any("forecast" in cap for cap in tool.capabilities), \
110
+ f"Tool {tool.tool_id} doesn't have 'forecast' in capabilities"
111
+
112
+ def test_search_top_k_limit(self, loader):
113
+ """Test that search respects top_k parameter."""
114
+ results = loader.search_by_capability("search", top_k=5)
115
+ assert len(results) <= 5, "Search returned more than top_k results"
116
+
117
+ def test_iter_category(self, loader):
118
+ """Test category iteration."""
119
+ categories = loader.get_categories()
120
+ assert len(categories) > 0
121
+
122
+ # Test first category
123
+ category = categories[0]
124
+ count = 0
125
+ for tool in loader.iter_category(category):
126
+ assert tool.category == category
127
+ count += 1
128
+
129
+ assert count > 0, f"No tools found in category {category}"
130
+
131
+ def test_iter_category_invalid(self, loader):
132
+ """Test that iterating invalid category raises ValueError."""
133
+ with pytest.raises(ValueError):
134
+ list(loader.iter_category("invalid/category"))
135
+
136
+ def test_load_taxonomy(self, loader):
137
+ """Test loading category taxonomy."""
138
+ taxonomy = loader.load_taxonomy()
139
+ assert taxonomy is not None
140
+ assert len(taxonomy.taxonomy) > 0
141
+ assert taxonomy.version is not None
142
+
143
+ def test_load_stats(self, loader):
144
+ """Test loading dataset statistics."""
145
+ stats = loader.load_stats()
146
+ assert stats is not None
147
+ assert stats.total_tools > 0
148
+ assert stats.total_categories > 0
149
+ assert len(stats.category_distribution) > 0
150
+
151
+ def test_stats_accuracy(self, loader):
152
+ """Test that stats match actual data."""
153
+ stats = loader.load_stats()
154
+ assert stats.total_tools == len(loader)
155
+
156
+ # Check category distribution
157
+ for category, count in stats.category_distribution.items():
158
+ actual_count = len(list(loader.iter_category(category)))
159
+ assert actual_count == count, \
160
+ f"Category {category}: stats={count}, actual={actual_count}"
161
+
162
+ def test_reliability_scores(self, loader):
163
+ """Test that reliability scores are valid."""
164
+ for tool in loader.tools.values():
165
+ if tool.reliability_score is not None:
166
+ assert 0.0 <= tool.reliability_score <= 1.0, \
167
+ f"Tool {tool.tool_id} has invalid reliability: {tool.reliability_score}"
168
+
169
+ def test_latency_values(self, loader):
170
+ """Test that latency values are non-negative."""
171
+ for tool in loader.tools.values():
172
+ if tool.latency_ms_p50 is not None:
173
+ assert tool.latency_ms_p50 >= 0, \
174
+ f"Tool {tool.tool_id} has negative latency: {tool.latency_ms_p50}"
175
+
176
+ def test_filter_tools(self, loader):
177
+ """Test multi-criteria filtering."""
178
+ # Filter by category
179
+ results = loader.filter_tools(category="environment/weather")
180
+ assert all(t.category == "environment/weather" for t in results)
181
+
182
+ # Filter by reliability
183
+ results = loader.filter_tools(min_reliability=0.95)
184
+ assert all(
185
+ t.reliability_score is None or t.reliability_score >= 0.95
186
+ for t in results
187
+ )
188
+
189
+ # Filter by latency
190
+ results = loader.filter_tools(max_latency=200)
191
+ assert all(
192
+ t.latency_ms_p50 is None or t.latency_ms_p50 <= 200
193
+ for t in results
194
+ )
195
+
196
+ def test_get_tool_by_name(self, loader):
197
+ """Test getting tool by name."""
198
+ # Get first tool
199
+ tool_ids = loader.list_tool_ids()
200
+ original_tool = loader.get_tool(tool_ids[0])
201
+
202
+ # Get by name
203
+ tool_by_name = loader.get_tool_by_name(original_tool.name)
204
+ assert tool_by_name.tool_id == original_tool.tool_id
205
+
206
+ def test_search_by_name(self, loader):
207
+ """Test name-based search."""
208
+ results = loader.search_by_name("weather", top_k=5)
209
+ assert len(results) > 0
210
+ assert all("weather" in t.name.lower() for t in results)
211
+
212
+ def test_capability_index(self, loader):
213
+ """Test that capability index is comprehensive."""
214
+ capabilities = loader.get_capabilities()
215
+ assert len(capabilities) > 0
216
+
217
+ # Each capability should have tools
218
+ for cap in capabilities[:10]: # Test first 10
219
+ tools_with_cap = [
220
+ t for t in loader.tools.values()
221
+ if cap in t.capabilities
222
+ ]
223
+ assert len(tools_with_cap) > 0, f"No tools with capability: {cap}"
224
+
225
+ def test_category_stats(self, loader):
226
+ """Test category statistics calculation."""
227
+ categories = loader.get_categories()
228
+ category = categories[0]
229
+
230
+ stats = loader.get_category_stats(category)
231
+ assert "total_tools" in stats
232
+ assert "avg_reliability" in stats
233
+ assert "avg_latency_ms" in stats
234
+ assert stats["total_tools"] > 0
235
+
236
+
237
+ class TestAgentToolRecord:
238
+ """Test suite for AgentToolRecord schema validation."""
239
+
240
+ def test_valid_tool_id(self):
241
+ """Test valid tool_id formats."""
242
+ valid_ids = [
243
+ "weather_query_001",
244
+ "calendar_event_create_042",
245
+ "environment_air_quality_015",
246
+ "a_001",
247
+ "test_tool_999"
248
+ ]
249
+
250
+ for tool_id in valid_ids:
251
+ tool = AgentToolRecord(
252
+ tool_id=tool_id,
253
+ name="Test Tool",
254
+ category="test/category",
255
+ capabilities=["test"]
256
+ )
257
+ assert tool.tool_id == tool_id
258
+
259
+ def test_invalid_tool_id(self):
260
+ """Test that invalid tool_ids raise validation error."""
261
+ invalid_ids = [
262
+ "WeatherQuery_001", # uppercase
263
+ "weather-query_001", # hyphen
264
+ "weather_query_1", # not 3 digits
265
+ "weather_query_1234", # too many digits
266
+ "001_weather_query", # digits not at end
267
+ ]
268
+
269
+ from pydantic import ValidationError
270
+
271
+ for tool_id in invalid_ids:
272
+ with pytest.raises(ValidationError):
273
+ AgentToolRecord(
274
+ tool_id=tool_id,
275
+ name="Test Tool",
276
+ category="test/category",
277
+ capabilities=["test"]
278
+ )
279
+
280
+ def test_empty_capabilities(self):
281
+ """Test that empty capabilities raise error."""
282
+ from pydantic import ValidationError
283
+
284
+ with pytest.raises(ValidationError):
285
+ AgentToolRecord(
286
+ tool_id="test_tool_001",
287
+ name="Test Tool",
288
+ category="test/category",
289
+ capabilities=[]
290
+ )
291
+
292
+ def test_invalid_category(self):
293
+ """Test that invalid category format raises error."""
294
+ from pydantic import ValidationError
295
+
296
+ with pytest.raises(ValidationError):
297
+ AgentToolRecord(
298
+ tool_id="test_tool_001",
299
+ name="Test Tool",
300
+ category="Invalid-Category", # hyphen not allowed
301
+ capabilities=["test"]
302
+ )
303
+
304
+
305
+ if __name__ == "__main__":
306
+ pytest.main([__file__, "-v"])
@@ -0,0 +1,5 @@
1
+ """BBH dataset source wrapper."""
2
+
3
+ from .dataloader import BBHDataLoader
4
+
5
+ __all__ = ["BBHDataLoader"]
Binary file
Binary file
@@ -0,0 +1,9 @@
1
+ name: "bbh"
2
+ description: "BIG-Bench Hard - 27 challenging reasoning tasks for language models"
3
+ type: "text"
4
+ format: "json"
5
+ maintainer: "sage-team"
6
+ tags: ["reasoning", "benchmark", "big-bench"]
7
+ size: "~5MB"
8
+ license: "Apache-2.0"
9
+ version: "1.0.0"
@@ -0,0 +1,41 @@
1
+ """
2
+ Control Plane Benchmark Dataset Module
3
+
4
+ This module provides data loaders for the Control Plane scheduling benchmark,
5
+ including LLM workloads, hybrid workloads (LLM + Embedding), and test prompts.
6
+
7
+ Usage:
8
+ from sage.data.sources.control_plane_benchmark import ControlPlaneBenchmarkDataLoader
9
+
10
+ loader = ControlPlaneBenchmarkDataLoader()
11
+
12
+ # List available workloads
13
+ print(loader.list_workloads())
14
+ print(loader.list_workloads(category="hybrid"))
15
+
16
+ # Load a workload configuration
17
+ workload = loader.load_workload("llm_medium")
18
+ print(f"Requests: {workload.request_count}, Rate: {workload.rate_per_second}")
19
+
20
+ # Load test prompts
21
+ llm_prompts = loader.load_prompts("llm")
22
+ embed_texts = loader.load_prompts("embedding")
23
+ """
24
+
25
+ from .dataloader import (
26
+ ControlPlaneBenchmarkDataLoader,
27
+ EmbeddingText,
28
+ HybridWorkloadConfig,
29
+ LLMPrompt,
30
+ LLMWorkloadConfig,
31
+ WorkloadConfig,
32
+ )
33
+
34
+ __all__ = [
35
+ "ControlPlaneBenchmarkDataLoader",
36
+ "WorkloadConfig",
37
+ "LLMWorkloadConfig",
38
+ "HybridWorkloadConfig",
39
+ "LLMPrompt",
40
+ "EmbeddingText",
41
+ ]
@@ -0,0 +1,101 @@
1
+ name: "control_plane_benchmark"
2
+ description: "Benchmark data for Control Plane scheduling policy evaluation"
3
+ type: "benchmark"
4
+ format: "jsonl"
5
+ version: "0.1.0"
6
+ maintainer: "SAGE Control Plane Team"
7
+ tags: ["control_plane", "scheduling", "benchmark", "llm", "embedding", "hybrid"]
8
+ license: "Apache-2.0"
9
+ size: "~5MB"
10
+
11
+ categories:
12
+ - name: "llm_workloads"
13
+ description: "Pure LLM request workload configurations"
14
+ files:
15
+ - name: "light"
16
+ description: "Light workload: 100 requests, 10 req/s, single model"
17
+ - name: "medium"
18
+ description: "Medium workload: 1000 requests, 100 req/s, multi-model"
19
+ - name: "heavy"
20
+ description: "Heavy workload: 5000 requests, 500 req/s, multi-priority"
21
+
22
+ - name: "hybrid_workloads"
23
+ description: "Mixed LLM + Embedding workload configurations"
24
+ files:
25
+ - name: "balanced"
26
+ description: "Balanced mix: 50% LLM, 50% Embedding"
27
+ - name: "llm_heavy"
28
+ description: "LLM heavy: 80% LLM, 20% Embedding"
29
+ - name: "embed_heavy"
30
+ description: "Embedding heavy: 20% LLM, 80% Embedding"
31
+ - name: "burst"
32
+ description: "Burst pattern with variable mix ratios"
33
+
34
+ - name: "prompts"
35
+ description: "Test data for benchmark requests"
36
+ files:
37
+ - name: "llm_prompts"
38
+ description: "200+ LLM test prompts with expected token counts"
39
+ - name: "embed_texts"
40
+ description: "200+ Embedding test texts with batch configurations"
41
+
42
+ statistics:
43
+ llm_workloads: 3
44
+ hybrid_workloads: 4
45
+ llm_prompts: 200
46
+ embed_texts: 200
47
+
48
+ schema:
49
+ llm_workload:
50
+ required:
51
+ - workload_id
52
+ - request_count
53
+ - rate_per_second
54
+ optional:
55
+ - arrival_pattern
56
+ - model_distribution
57
+ - priority_distribution
58
+ - prompt_len_range
59
+ - output_len_range
60
+ - slo_deadlines
61
+
62
+ hybrid_workload:
63
+ required:
64
+ - workload_id
65
+ - request_count
66
+ - rate_per_second
67
+ - llm_ratio
68
+ - embedding_ratio
69
+ optional:
70
+ - arrival_pattern
71
+ - llm_model_distribution
72
+ - embedding_model
73
+ - embedding_batch_sizes
74
+ - priority_distribution
75
+
76
+ llm_prompt:
77
+ required:
78
+ - prompt_id
79
+ - text
80
+ optional:
81
+ - expected_tokens
82
+ - priority
83
+ - category
84
+ - difficulty
85
+
86
+ embed_text:
87
+ required:
88
+ - text_id
89
+ - texts
90
+ optional:
91
+ - model
92
+ - batch_size
93
+ - category
94
+
95
+ citation: |
96
+ @dataset{control_plane_benchmark_2025,
97
+ title={Control Plane Benchmark: Evaluating LLM Scheduling Policies},
98
+ author={SAGE Team},
99
+ year={2025},
100
+ publisher={SAGE Framework}
101
+ }
@@ -0,0 +1,5 @@
1
+ """GPQA dataset source wrapper."""
2
+
3
+ from .dataloader import GPQADataLoader
4
+
5
+ __all__ = ["GPQADataLoader"]
Binary file
Binary file
@@ -0,0 +1,10 @@
1
+ name: "gpqa"
2
+ description: "GPQA (Graduate-Level Google-Proof Q&A) - Expert-level questions in Physics, Chemistry, Biology"
3
+ type: "text"
4
+ format: "huggingface-dataset"
5
+ maintainer: "sage-team"
6
+ tags: ["gpqa", "expert-level", "reasoning", "science"]
7
+ size: "~5MB (cached)"
8
+ license: "MIT"
9
+ version: "1.0.0"
10
+ source_url: "https://huggingface.co/datasets/Idavidrein/gpqa"
@@ -0,0 +1,10 @@
1
+ """LibAMM benchmark dataset source.
2
+
3
+ This provides access to the LibAMM benchmark datasets located in the
4
+ libamm-benchmark directory.
5
+ """
6
+
7
+ # Note: The actual data is in ../libamm-benchmark/
8
+ # This wrapper provides a consistent interface through the sources layer
9
+
10
+ __all__ = []
@@ -0,0 +1,9 @@
1
+ name: "libamm_benchmark"
2
+ description: "LibAMM benchmark datasets for approximate matrix multiplication evaluation"
3
+ type: "matrix"
4
+ format: "binary/text"
5
+ maintainer: "libamm-team"
6
+ tags: ["matrix", "benchmark", "approximate-multiplication", "ann"]
7
+ size: "~325MB"
8
+ license: "Research Use"
9
+ version: "1.0.0"
@@ -0,0 +1,5 @@
1
+ """LoCoMo dataset source wrapper."""
2
+
3
+ from .dataloader import LocomoDataLoader
4
+
5
+ __all__ = ["LocomoDataLoader"]
Binary file
Binary file
@@ -0,0 +1,10 @@
1
+ name: "locomo"
2
+ description: "Long-Context Memory (LoCoMo) dataset for long-context conversation and QA evaluation"
3
+ type: "text"
4
+ format: "json"
5
+ maintainer: "sage-team"
6
+ tags: ["long-context", "memory", "conversation", "qa"]
7
+ size: "~2.68MB"
8
+ license: "MIT"
9
+ version: "1.0.0"
10
+ source_url: "https://github.com/your-repo/locomo"
Binary file