isage-data 0.2.1.8__cp311-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isage_data-0.2.1.8.dist-info/METADATA +135 -0
- isage_data-0.2.1.8.dist-info/RECORD +132 -0
- isage_data-0.2.1.8.dist-info/WHEEL +5 -0
- isage_data-0.2.1.8.dist-info/entry_points.txt +2 -0
- isage_data-0.2.1.8.dist-info/licenses/LICENSE +21 -0
- isage_data-0.2.1.8.dist-info/top_level.txt +1 -0
- sage/data/__init__.py +37 -0
- sage/data/__init__.pyc +0 -0
- sage/data/__pycache__/__init__.cpython-311.pyc +0 -0
- sage/data/__pycache__/__init__.cpython-312.pyc +0 -0
- sage/data/__pycache__/cli.cpython-311.pyc +0 -0
- sage/data/__pycache__/cli.cpython-312.pyc +0 -0
- sage/data/__pycache__/manager.cpython-311.pyc +0 -0
- sage/data/__pycache__/manager.cpython-312.pyc +0 -0
- sage/data/cli.pyc +0 -0
- sage/data/manager.pyc +0 -0
- sage/data/sources/__init__.py +13 -0
- sage/data/sources/__init__.pyc +0 -0
- sage/data/sources/__pycache__/__init__.cpython-311.pyc +0 -0
- sage/data/sources/__pycache__/__init__.cpython-312.pyc +0 -0
- sage/data/sources/agent_benchmark/__init__.py +35 -0
- sage/data/sources/agent_benchmark/__init__.pyc +0 -0
- sage/data/sources/agent_benchmark/dataloader.pyc +0 -0
- sage/data/sources/agent_benchmark/dataset.yaml +44 -0
- sage/data/sources/agent_benchmark/external_benchmarks/__init__.py +32 -0
- sage/data/sources/agent_benchmark/external_benchmarks/__init__.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/converters.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/download_all.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/download_apibank.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/download_bfcl.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/download_toolalpaca.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/download_toolbench.pyc +0 -0
- sage/data/sources/agent_benchmark/external_benchmarks/loader.pyc +0 -0
- sage/data/sources/agent_benchmark/fix_tool_references.pyc +0 -0
- sage/data/sources/agent_benchmark/generate_data.pyc +0 -0
- sage/data/sources/agent_benchmark/prepare_planning_data.pyc +0 -0
- sage/data/sources/agent_benchmark/prepare_runtime_data.pyc +0 -0
- sage/data/sources/agent_benchmark/prepare_timing_data.pyc +0 -0
- sage/data/sources/agent_benchmark/test_integration.py +94 -0
- sage/data/sources/agent_benchmark/tests/test_agent_benchmark_loader.py +353 -0
- sage/data/sources/agent_benchmark/validate_cross_task.pyc +0 -0
- sage/data/sources/agent_benchmark/validate_data.pyc +0 -0
- sage/data/sources/agent_sft/__init__.py +10 -0
- sage/data/sources/agent_sft/__init__.pyc +0 -0
- sage/data/sources/agent_sft/data/generate_data.pyc +0 -0
- sage/data/sources/agent_sft/data/prompts_template.yaml +75 -0
- sage/data/sources/agent_sft/dataloader.pyc +0 -0
- sage/data/sources/agent_sft/dataset.yaml +9 -0
- sage/data/sources/agent_sft/fix_tool_ids.pyc +0 -0
- sage/data/sources/agent_sft/schemas.pyc +0 -0
- sage/data/sources/agent_sft/tests/test_agent_sft_loader.py +316 -0
- sage/data/sources/agent_tools/__init__.py +6 -0
- sage/data/sources/agent_tools/__init__.pyc +0 -0
- sage/data/sources/agent_tools/dataloader.pyc +0 -0
- sage/data/sources/agent_tools/dataset.yaml +9 -0
- sage/data/sources/agent_tools/generate_tools.pyc +0 -0
- sage/data/sources/agent_tools/schemas.pyc +0 -0
- sage/data/sources/agent_tools/test_integration.py +108 -0
- sage/data/sources/agent_tools/tests/test_agent_tools_loader.py +306 -0
- sage/data/sources/agent_tools/validate_data.pyc +0 -0
- sage/data/sources/bbh/__init__.py +5 -0
- sage/data/sources/bbh/__init__.pyc +0 -0
- sage/data/sources/bbh/dataloader.pyc +0 -0
- sage/data/sources/bbh/dataset.yaml +9 -0
- sage/data/sources/control_plane_benchmark/__init__.py +41 -0
- sage/data/sources/control_plane_benchmark/__init__.pyc +0 -0
- sage/data/sources/control_plane_benchmark/dataloader.pyc +0 -0
- sage/data/sources/control_plane_benchmark/dataset.yaml +101 -0
- sage/data/sources/gpqa/__init__.py +5 -0
- sage/data/sources/gpqa/__init__.pyc +0 -0
- sage/data/sources/gpqa/dataloader.pyc +0 -0
- sage/data/sources/gpqa/dataset.yaml +10 -0
- sage/data/sources/libamm_benchmark/__init__.py +10 -0
- sage/data/sources/libamm_benchmark/__init__.pyc +0 -0
- sage/data/sources/libamm_benchmark/dataset.yaml +9 -0
- sage/data/sources/locomo/__init__.py +5 -0
- sage/data/sources/locomo/__init__.pyc +0 -0
- sage/data/sources/locomo/__pycache__/__init__.cpython-311.pyc +0 -0
- sage/data/sources/locomo/__pycache__/__init__.cpython-312.pyc +0 -0
- sage/data/sources/locomo/__pycache__/dataloader.cpython-311.pyc +0 -0
- sage/data/sources/locomo/__pycache__/dataloader.cpython-312.pyc +0 -0
- sage/data/sources/locomo/__pycache__/download.cpython-311.pyc +0 -0
- sage/data/sources/locomo/dataloader.pyc +0 -0
- sage/data/sources/locomo/dataset.yaml +10 -0
- sage/data/sources/locomo/download.pyc +0 -0
- sage/data/sources/locomo/locomo10.json +66751 -0
- sage/data/sources/longmemeval/__init__.py +5 -0
- sage/data/sources/longmemeval/__init__.pyc +0 -0
- sage/data/sources/longmemeval/compose.pyc +0 -0
- sage/data/sources/longmemeval/config/longmemeval_groups.yaml +15 -0
- sage/data/sources/longmemeval/dataloader.pyc +0 -0
- sage/data/sources/longmemeval/dataset.yaml +9 -0
- sage/data/sources/longmemeval/download.pyc +0 -0
- sage/data/sources/memagentbench/Conflict_Resolution.parquet +0 -0
- sage/data/sources/memagentbench/__init__.py +16 -0
- sage/data/sources/memagentbench/__init__.pyc +0 -0
- sage/data/sources/memagentbench/__pycache__/__init__.cpython-312.pyc +0 -0
- sage/data/sources/memagentbench/__pycache__/conflict_resolution_loader.cpython-312.pyc +0 -0
- sage/data/sources/memagentbench/__pycache__/download.cpython-312.pyc +0 -0
- sage/data/sources/memagentbench/conflict_resolution_loader.pyc +0 -0
- sage/data/sources/memagentbench/conflict_resolution_loader_test.py +169 -0
- sage/data/sources/memagentbench/dataset.yaml +10 -0
- sage/data/sources/memagentbench/download.pyc +0 -0
- sage/data/sources/mmlu/__init__.py +5 -0
- sage/data/sources/mmlu/__init__.pyc +0 -0
- sage/data/sources/mmlu/dataloader.pyc +0 -0
- sage/data/sources/mmlu/dataset.yaml +10 -0
- sage/data/sources/mmlu/download.pyc +0 -0
- sage/data/sources/orca_dpo/__init__.py +5 -0
- sage/data/sources/orca_dpo/__init__.pyc +0 -0
- sage/data/sources/orca_dpo/dataloader.pyc +0 -0
- sage/data/sources/qa_base/__init__.py +5 -0
- sage/data/sources/qa_base/__init__.pyc +0 -0
- sage/data/sources/qa_base/dataloader.pyc +0 -0
- sage/data/sources/qa_base/dataset.yaml +9 -0
- sage/data/sources/qa_base/qa_knowledge_base.txt +35 -0
- sage/data/sources/qa_base/qa_knowledge_chromaDB.txt +13 -0
- sage/data/sources/qa_base/sample/one_question.txt +1 -0
- sage/data/sources/qa_base/sample/question.txt +352 -0
- sage/data/sources/qa_base/sample/question1.txt +1 -0
- sage/data/usages/__init__.py +3 -0
- sage/data/usages/__init__.pyc +0 -0
- sage/data/usages/agent_eval/__init__.py +191 -0
- sage/data/usages/agent_eval/__init__.pyc +0 -0
- sage/data/usages/agent_eval/config.yaml +15 -0
- sage/data/usages/agent_eval/profiles/full_eval.yaml +15 -0
- sage/data/usages/agent_eval/profiles/quick_eval.yaml +11 -0
- sage/data/usages/agent_eval/profiles/sft_training.yaml +12 -0
- sage/data/usages/agent_eval/usage.yaml +8 -0
- sage/data/usages/libamm/config.yaml +13 -0
- sage/data/usages/neuromem/config.yaml +5 -0
- sage/data/usages/rag/config.yaml +9 -0
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Configure LongMemEval task groups for composition (10 groups x 47 IDs = 470)
|
|
2
|
+
# Moved to benchmark_memory/experiment/config as requested.
|
|
3
|
+
|
|
4
|
+
groups:
|
|
5
|
+
- ["e47becba", "118b2229", "51a45a95", "58bf7951", "1e043500", "c5e8278d", "6ade9755", "6f9b354f", "58ef2f1c", "f8c5f88b", "5d3d2817", "7527f7e2", "c960da58", "3b6f954b", "726462e0", "94f70d80", "66f24dbb", "ad7109d1", "af8d2e46", "dccbc061", "c8c3f81d", "8ebdbe50", "6b168ec8", "75499fd8", "21436231", "95bcc1c8", "0862e8bf", "853b0a1d", "a06e4cfe", "37d43f65", "b86304ba", "d52b4f67", "25e5aa4f", "caf9ead2", "8550ddae", "60d45044", "3f1e9474", "86b68151", "577d4d32", "ec81a493", "15745da0", "e01b8e2f", "bc8a6e93", "ccb36322", "001be529", "b320f3f8"]
|
|
6
|
+
- ["19b5f2b3", "4fd1909e", "545bd2b5", "8a137a7f", "76d63226", "86f00804", "8e9d538c", "311778f1", "c19f7a0b", "4100d0a0", "29f2956b", "1faac195", "faba32e5", "f4f1d8a4", "c14c00dd", "36580ce8", "3d86fd0a", "a82c026e", "0a995998", "6d550036", "gpt4_59c863d7", "b5ef892d", "e831120c", "3a704032", "gpt4_d84a3211", "aae3761f", "gpt4_f2262a51", "dd2973ad", "c4a1ceb8", "gpt4_a56e767c", "6cb6f249", "46a3abf7", "36b9f61e", "28dc39ac", "gpt4_2f8be40d", "2e6d26dc", "gpt4_15e38248", "88432d0a", "80ec1f4f", "d23cf73b", "gpt4_7fce9456", "d682f1a2", "7024f17c", "gpt4_5501fe77", "gpt4_2ba83207", "2318644b", "2ce6a0f2"]
|
|
7
|
+
- ["gpt4_d12ceb0e", "00ca467f", "b3c15d39", "gpt4_31ff4165", "eeda8a6d", "2788b940", "60bf93ed", "9d25d4e0", "129d1232", "60472f9c", "gpt4_194be4b3", "a9f6b44c", "d851d5ba", "5a7937c8", "gpt4_ab202e7f", "gpt4_e05b82a6", "gpt4_731e37d7", "edced276", "10d9b85a", "e3038f8c", "2b8f3739", "1a8a66a6", "c2ac3c61", "bf659f65", "gpt4_372c3eed", "gpt4_2f91af09", "81507db6", "8a2466db", "06878be2", "75832dbd", "0edc2aef", "35a27287", "32260d93", "195a1a1b", "afdc33df", "caf03d32", "54026fce", "06f04340", "6b7dfb22", "1a1907b4", "09d032c9", "38146c39", "d24813b1", "57f827a0", "95228167", "505af2f5"]
|
|
8
|
+
- ["75f70248", "d6233ab6", "1da05512", "fca70973", "b6025781", "a89d7624", "b0479f84", "1d4e3b97", "07b6f563", "1c0ddc50", "0a34ad58", "d3ab962e", "2311e44b", "cc06de0d", "a11281a2", "4f54b7c9", "85fa3a3f", "9aaed6a3", "1f2b8d4f", "e6041065", "51c32626", "d905b33f", "7405e8b1", "f35224e0", "6456829e", "a4996e51", "3c1045c8", "60036106", "681a1674", "e25c3b8d", "4adc0475", "4bc144e2", "ef66a6e5", "5025383b", "a1cc6108", "9ee3ecd6", "3fdac837", "91b15a6e", "27016adc", "720133ac", "77eafa52", "8979f9ec", "0100672e", "a96c20ee", "92a0aa75", "3fe836c9"]
|
|
9
|
+
- ["1c549ce4", "6c49646a", "1192316e", "0ea62687", "67e0d0f2", "bb7c3b45", "ba358f49", "61f8c8f8", "60159905", "ef9cf60a", "73d42213", "bc149d6b", "099778bb", "09ba9854", "d6062bb9", "157a136e", "c18a7dc8", "a3332713", "55241a1f", "a08a253f", "f0e564bc", "078150f1", "8cf4d046", "a346bb18", "37f165cf", "8e91e7d9", "87f22b4a", "e56a43b9", "efc3f7c2", "21d02d0d", "gpt4_59149c77", "gpt4_f49edff3", "71017276", "b46e15ed", "gpt4_fa19884c", "0bc8ad92", "af082822", "gpt4_4929293a", "gpt4_b5700ca9", "9a707b81", "gpt4_1d4ab0c9", "gpt4_e072b769", "0db4c65d", "gpt4_1d80365e", "gpt4_7f6b06db", "gpt4_6dc9b45b", "gpt4_8279ba02"]
|
|
10
|
+
- ["gpt4_18c2b244", "gpt4_a1b77f9c", "gpt4_1916e0ea", "gpt4_7a0daae1", "gpt4_468eb063", "gpt4_7abb270c", "gpt4_1e4a8aeb", "gpt4_4fc4f797", "4dfccbf7", "gpt4_61e13b3c", "gpt4_45189cb4", "2ebe6c90", "gpt4_e061b84f", "370a8ff4", "gpt4_d6585ce8", "gpt4_4ef30696", "gpt4_ec93e27f", "6e984301", "8077ef71", "gpt4_f420262c", "gpt4_8e165409", "gpt4_74aed68e", "bcbe585f", "gpt4_21adecb5", "5e1b23de", "gpt4_98f46fc6", "gpt4_af6db32f", "eac54adc", "gpt4_7ddcf75f", "gpt4_a2d1d1f6", "gpt4_85da3956", "gpt4_b0863698", "gpt4_68e94287", "gpt4_e414231e", "gpt4_7ca326fa", "gpt4_7bc6cf22", "2ebe6c92", "gpt4_e061b84g", "71017277", "b46e15ee", "gpt4_d6585ce9", "gpt4_1e4a8aec", "gpt4_f420262d", "gpt4_59149c78", "gpt4_e414231f", "gpt4_4929293b"]
|
|
11
|
+
- ["gpt4_468eb064", "gpt4_fa19884d", "9a707b82", "eac54add", "4dfccbf8", "0bc8ad93", "6e984302", "gpt4_8279ba03", "gpt4_b5700ca0", "gpt4_68e94288", "gpt4_2655b836", "gpt4_2487a7cb", "gpt4_76048e76", "gpt4_2312f94c", "0bb5a684", "08f4fc43", "2c63a862", "gpt4_385a5000", "2a1811e2", "bbf86515", "gpt4_5dcc0aab", "gpt4_0b2f1d21", "f0853d11", "gpt4_6ed717ea", "gpt4_70e84552", "a3838d2b", "gpt4_93159ced", "gpt4_2d58bcd6", "gpt4_65aabe59", "982b5123", "b9cfe692", "gpt4_4edbafa2", "c8090214", "gpt4_483dd43c", "e4e14d04", "c9f37c46", "gpt4_2c50253f", "dcfa8644", "gpt4_b4a80587", "gpt4_9a159967", "cc6d1ec1", "gpt4_8c8961ae", "gpt4_d9af6064", "gpt4_7de946e7", "d01c6aa8", "993da5e2"]
|
|
12
|
+
- ["a3045048", "gpt4_d31cdae3", "gpt4_cd90e484", "gpt4_88806d6e", "gpt4_4cd9eba1", "gpt4_93f6379c", "b29f3365", "gpt4_2f56ae70", "6613b389", "gpt4_78cf46a3", "gpt4_0a05b494", "gpt4_1a1dc16d", "gpt4_2f584639", "gpt4_213fd887", "gpt4_5438fa52", "gpt4_c27434e8", "gpt4_fe651585", "8c18457d", "6a1eabeb", "6aeb4375", "830ce83f", "852ce960", "945e3d21", "d7c942c3", "71315a70", "89941a93", "ce6d2d27", "9ea5eabc", "07741c44", "a1eacc2a", "184da446", "031748ae", "4d6b87c8", "0f05491a", "08e075c7", "f9e8c073", "41698283", "2698e78f", "b6019101", "45dc21b6", "5a4f22c0", "6071bd76", "e493bb7c", "618f13b2", "72e3ee87"]
|
|
13
|
+
- ["c4ea545c", "01493427", "6a27ffc2", "2133c1b5", "18bc8abd", "db467c8c", "7a87bd0c", "e61a7584", "1cea1afa", "ed4ddc30", "8fb83627", "b01defab", "22d2cb42", "0e4e4c46", "4b24c848", "7e974930", "603deb26", "59524333", "5831f84d", "eace081b", "affe2881", "50635ada", "e66b632c", "0ddfec37", "f685340e", "cc5ded98", "dfde3500", "69fee5aa", "7401057b", "cf22b7bf", "a2f3aa27", "c7dc5443", "06db6396", "3ba21379", "9bbe84a2", "10e09553", "dad224aa", "ba61f0b9", "42ec0761", "5c40ec5b", "c6853660", "26bdc477", "0977f2af", "89941a94", "07741c45"]
|
|
14
|
+
- ["7161e7e2", "c4f10528", "89527b6b", "e9327a54", "4c36ccef", "6ae235be", "7e00a6cb", "1903aded", "ceb54acb", "f523d9fe", "0e5e2d1a", "fea54f57", "cc539528", "dc439ea3", "18dcd5a5", "488d3006", "58470ed2", "8cf51dda", "1d4da289", "8464fc84", "8aef76bc", "71a3fd6b", "2bf43736", "70b3e69b", "8752c811", "3249768e", "1b9b7252", "1568498a", "6222b6eb", "e8a79c70", "d596882b", "e3fc4d6e", "51b23612", "3e321797", "e982271f", "352ab8bd", "fca762bc", "7a8d0b71", "a40e080f", "8b9d4367", "5809eb10", "41275add", "4388e9dd", "4baee567", "561fabcd", "b759caee"]
|
|
15
|
+
- ["ac031881", "28bcfaac", "16c90bf4", "c8f1aeed", "eaca4986", "c7cf7dfd", "e48988bc", "1de5cff2", "65240037", "778164c6"]
|
|
Binary file
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
name: "longmemeval"
|
|
2
|
+
description: "LongMemEval benchmark for testing long-term interactive memory of chat assistants"
|
|
3
|
+
type: "text"
|
|
4
|
+
format: "json"
|
|
5
|
+
maintainer: "sage-team"
|
|
6
|
+
tags: ["long-context", "memory", "conversation", "qa", "timestamped"]
|
|
7
|
+
license: "Apache-2.0"
|
|
8
|
+
version: "1.0.0"
|
|
9
|
+
source_url: "https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned"
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MemoryAgentBench data source package
|
|
3
|
+
|
|
4
|
+
This package provides data loaders for the MemoryAgentBench dataset.
|
|
5
|
+
|
|
6
|
+
Available loaders:
|
|
7
|
+
- ConflictResolutionDataLoader: 4-task version with cumulative question visibility (6K each)
|
|
8
|
+
- ConflictResolutionDataLoaderV1: 8 independent tasks version (future)
|
|
9
|
+
- ConflictResolutionDataLoaderV2: Cumulative task version (future)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from .conflict_resolution_loader import ConflictResolutionDataLoader
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
'ConflictResolutionDataLoader',
|
|
16
|
+
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Conflict Resolution Dataset Loader - TEST VERSION (only load first 500 facts for quick testing)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
from typing import Any, Dict, Generator, List
|
|
8
|
+
|
|
9
|
+
import pyarrow.parquet as pq
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ConflictResolutionDataLoader:
|
|
13
|
+
"""Conflict Resolution dataset loader - TEST VERSION with limited facts"""
|
|
14
|
+
|
|
15
|
+
@staticmethod
|
|
16
|
+
def _convert_to_native_types(obj):
|
|
17
|
+
"""Convert numpy types to native Python types for JSON serialization"""
|
|
18
|
+
import numpy as np
|
|
19
|
+
if isinstance(obj, np.ndarray):
|
|
20
|
+
return obj.tolist()
|
|
21
|
+
elif isinstance(obj, (np.integer, np.floating)):
|
|
22
|
+
return obj.item()
|
|
23
|
+
elif isinstance(obj, list):
|
|
24
|
+
return [ConflictResolutionDataLoader._convert_to_native_types(item) for item in obj]
|
|
25
|
+
elif isinstance(obj, dict):
|
|
26
|
+
return {k: ConflictResolutionDataLoader._convert_to_native_types(v) for k, v in obj.items()}
|
|
27
|
+
else:
|
|
28
|
+
return obj
|
|
29
|
+
|
|
30
|
+
def __init__(self, filename="Conflict_Resolution.parquet"):
|
|
31
|
+
"""Initialize the dataloader - TEST VERSION (only load first 500 facts)"""
|
|
32
|
+
self.filepath = os.path.join(os.path.dirname(__file__), filename)
|
|
33
|
+
if not os.path.exists(self.filepath):
|
|
34
|
+
raise FileNotFoundError(f"Conflict Resolution file not found: {self.filepath}")
|
|
35
|
+
|
|
36
|
+
# Load parquet file
|
|
37
|
+
table = pq.read_table(self.filepath)
|
|
38
|
+
df = table.to_pandas()
|
|
39
|
+
|
|
40
|
+
# Only use first record for testing
|
|
41
|
+
target_records = [0] # Only load record 0
|
|
42
|
+
self.records = []
|
|
43
|
+
|
|
44
|
+
for idx in target_records:
|
|
45
|
+
row = df.iloc[idx]
|
|
46
|
+
context = row['context']
|
|
47
|
+
questions = list(row['questions'])
|
|
48
|
+
# Convert numpy arrays to native Python types
|
|
49
|
+
answers = self._convert_to_native_types(list(row['answers']))
|
|
50
|
+
metadata = row['metadata']
|
|
51
|
+
|
|
52
|
+
# Parse ALL facts from this record
|
|
53
|
+
facts = self._parse_facts(context)
|
|
54
|
+
|
|
55
|
+
# Limit to first 500 facts for testing
|
|
56
|
+
facts = facts[:500]
|
|
57
|
+
|
|
58
|
+
# Take first 25 questions (proportional to 500 facts)
|
|
59
|
+
questions = questions[:25]
|
|
60
|
+
answers = answers[:25]
|
|
61
|
+
|
|
62
|
+
self.records.append({
|
|
63
|
+
'original_idx': idx,
|
|
64
|
+
'facts': facts,
|
|
65
|
+
'questions': questions,
|
|
66
|
+
'answers': answers,
|
|
67
|
+
'metadata': metadata,
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
# Create one virtual task containing all facts
|
|
71
|
+
self.all_facts = []
|
|
72
|
+
self.question_boundaries = []
|
|
73
|
+
|
|
74
|
+
cumulative_facts = 0
|
|
75
|
+
for record in self.records:
|
|
76
|
+
facts = record['facts']
|
|
77
|
+
questions = list(zip(record['questions'], record['answers']))
|
|
78
|
+
|
|
79
|
+
start_idx = cumulative_facts
|
|
80
|
+
end_idx = cumulative_facts + len(facts) - 1
|
|
81
|
+
|
|
82
|
+
self.all_facts.extend(facts)
|
|
83
|
+
self.question_boundaries.append((start_idx, end_idx, questions))
|
|
84
|
+
|
|
85
|
+
cumulative_facts += len(facts)
|
|
86
|
+
|
|
87
|
+
print(f"[TEST MODE] Loaded {len(self.all_facts)} facts and {sum(len(qb[2]) for qb in self.question_boundaries)} questions")
|
|
88
|
+
|
|
89
|
+
self.sample_index = {
|
|
90
|
+
"task_all": {
|
|
91
|
+
'facts': self.all_facts,
|
|
92
|
+
'question_boundaries': self.question_boundaries,
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
def _parse_facts(self, context: str) -> List[str]:
|
|
97
|
+
"""Parse facts list from context"""
|
|
98
|
+
facts = []
|
|
99
|
+
lines = context.split('\n')
|
|
100
|
+
|
|
101
|
+
for line in lines:
|
|
102
|
+
match = re.match(r'^\d+\.\s+(.+)$', line.strip())
|
|
103
|
+
if match:
|
|
104
|
+
facts.append(match.group(1))
|
|
105
|
+
|
|
106
|
+
return facts
|
|
107
|
+
|
|
108
|
+
def get_sample_id(self) -> List[str]:
|
|
109
|
+
"""Return all sample_id list"""
|
|
110
|
+
return list(self.sample_index.keys())
|
|
111
|
+
|
|
112
|
+
def get_sample(self, sample_id: str) -> Dict[str, Any]:
|
|
113
|
+
"""Get a single sample dict by sample_id"""
|
|
114
|
+
if sample_id not in self.sample_index:
|
|
115
|
+
raise KeyError(f"sample_id '{sample_id}' not found.")
|
|
116
|
+
return self.sample_index[sample_id]
|
|
117
|
+
|
|
118
|
+
def iter_qa(self, sample_id: str) -> Generator[Dict[str, Any], None, None]:
|
|
119
|
+
"""Iterate all qa in given sample_id"""
|
|
120
|
+
sample = self.get_sample(sample_id)
|
|
121
|
+
|
|
122
|
+
for _, _, questions in sample['question_boundaries']:
|
|
123
|
+
for q, a in questions:
|
|
124
|
+
yield {
|
|
125
|
+
"question": q,
|
|
126
|
+
"answer": a,
|
|
127
|
+
"evidence": None,
|
|
128
|
+
"category": None,
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
def get_question_list(self, sample_id: str) -> List[Dict[str, Any]]:
|
|
132
|
+
"""Return the question list of given sample_id"""
|
|
133
|
+
return list(self.iter_qa(sample_id))
|
|
134
|
+
|
|
135
|
+
def get_dialog(self, sample_id: str, index: int) -> str:
|
|
136
|
+
"""Return single fact at index"""
|
|
137
|
+
sample = self.get_sample(sample_id)
|
|
138
|
+
facts = sample['facts']
|
|
139
|
+
|
|
140
|
+
if index < 0 or index >= len(facts):
|
|
141
|
+
raise IndexError(f"index {index} out of range [0, {len(facts)})")
|
|
142
|
+
|
|
143
|
+
return facts[index]
|
|
144
|
+
|
|
145
|
+
def get_visible_questions(self, sample_id: str, fact_index: int) -> List[Dict[str, Any]]:
|
|
146
|
+
"""Get questions that should be visible after inserting fact at fact_index"""
|
|
147
|
+
sample = self.get_sample(sample_id)
|
|
148
|
+
visible_questions = []
|
|
149
|
+
|
|
150
|
+
for start_idx, end_idx, questions in sample['question_boundaries']:
|
|
151
|
+
if fact_index >= end_idx:
|
|
152
|
+
visible_questions.extend(questions)
|
|
153
|
+
|
|
154
|
+
return visible_questions
|
|
155
|
+
|
|
156
|
+
def get_total_dialogs(self, sample_id: str) -> int:
|
|
157
|
+
"""Return total number of facts"""
|
|
158
|
+
sample = self.get_sample(sample_id)
|
|
159
|
+
return len(sample['facts'])
|
|
160
|
+
|
|
161
|
+
def get_turn(self, sample_id: str) -> List[tuple]:
|
|
162
|
+
"""Return turn information for compatibility"""
|
|
163
|
+
sample = self.get_sample(sample_id)
|
|
164
|
+
total_facts = len(sample['facts'])
|
|
165
|
+
return [(0, total_facts - 1)]
|
|
166
|
+
|
|
167
|
+
def get_total_valid_questions(self, sample_id: str) -> int:
|
|
168
|
+
"""Return total number of questions"""
|
|
169
|
+
return len(self.get_question_list(sample_id))
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
name: "memagentbench"
|
|
2
|
+
description: "MemoryAgentBench dataset for evaluating agent memory capabilities in conflict resolution tasks"
|
|
3
|
+
type: "text"
|
|
4
|
+
format: "parquet"
|
|
5
|
+
maintainer: "sage-team"
|
|
6
|
+
tags: ["memory", "agent", "conflict-resolution", "qa"]
|
|
7
|
+
size: "~1.5MB"
|
|
8
|
+
license: "Unknown"
|
|
9
|
+
version: "1.0.0"
|
|
10
|
+
source_url: "https://huggingface.co/datasets/ai-hyz/MemoryAgentBench"
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
name: "mmlu"
|
|
2
|
+
description: "MMLU (Massive Multitask Language Understanding) - 57 subjects across STEM, humanities, social sciences"
|
|
3
|
+
type: "text"
|
|
4
|
+
format: "huggingface-dataset"
|
|
5
|
+
maintainer: "sage-team"
|
|
6
|
+
tags: ["mmlu", "multitask", "evaluation", "knowledge"]
|
|
7
|
+
size: "~160MB (cached)"
|
|
8
|
+
license: "MIT"
|
|
9
|
+
version: "1.0.0"
|
|
10
|
+
source_url: "https://huggingface.co/datasets/cais/mmlu"
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
name: "qa_base"
|
|
2
|
+
description: "Question-Answering knowledge base for RAG systems and retrieval evaluation"
|
|
3
|
+
type: "text"
|
|
4
|
+
format: "txt+jsonl"
|
|
5
|
+
maintainer: "sage-team"
|
|
6
|
+
tags: ["qa", "rag", "knowledge-base", "retrieval"]
|
|
7
|
+
size: "~1MB"
|
|
8
|
+
license: "MIT"
|
|
9
|
+
version: "1.0.0"
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
ChromaDB是一个开源的向量数据库,专门为AI应用程序设计。它具有以下特点:
|
|
2
|
+
1. 高性能向量检索和相似性搜索
|
|
3
|
+
2. 支持多种嵌入模型
|
|
4
|
+
3. 简单易用的API接口
|
|
5
|
+
4. 支持本地部署和云端部署
|
|
6
|
+
5. 与LangChain等AI框架深度集成
|
|
7
|
+
|
|
8
|
+
DenseRetriever是SAGE框架中的密集向量检索组件,主要功能包括:
|
|
9
|
+
1. 支持多种向量数据库后端,包括ChromaDB和FAISS
|
|
10
|
+
2. 自动化的文档嵌入和索引
|
|
11
|
+
3. 高效的相似性搜索算法
|
|
12
|
+
4. 可配置的检索参数(top-k、阈值等)
|
|
13
|
+
5. 支持批量检索和实时查询
|
|
14
|
+
|
|
15
|
+
RAG(Retrieval-Augmented Generation)系统的主要优势:
|
|
16
|
+
1. 结合了检索和生成的优点
|
|
17
|
+
2. 能够访问外部知识库,提供更准确的答案
|
|
18
|
+
3. 减少大语言模型的幻觉问题
|
|
19
|
+
4. 支持动态知识更新
|
|
20
|
+
5. 提高生成内容的可信度和相关性
|
|
21
|
+
|
|
22
|
+
向量数据库在AI应用中的作用:
|
|
23
|
+
1. 存储和管理高维向量数据
|
|
24
|
+
2. 提供快速的相似性搜索功能
|
|
25
|
+
3. 支持语义搜索和推荐系统
|
|
26
|
+
4. 为RAG系统提供知识检索能力
|
|
27
|
+
5. 实现多模态数据的统一检索
|
|
28
|
+
|
|
29
|
+
优化检索系统性能的方法:
|
|
30
|
+
1. 选择合适的嵌入模型
|
|
31
|
+
2. 调整向量维度和索引参数
|
|
32
|
+
3. 使用分布式部署提高并发能力
|
|
33
|
+
4. 实施缓存策略减少重复计算
|
|
34
|
+
5. 优化数据预处理和后处理流程
|
|
35
|
+
6. 定期更新和维护向量索引
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
ChromaDB是一个开源的向量数据库,专门为AI应用程序设计。它具有以下特点:
|
|
2
|
+
1. 高性能向量检索和相似性搜索
|
|
3
|
+
2. 支持多种嵌入模型
|
|
4
|
+
3. 简单易用的API接口
|
|
5
|
+
4. 支持本地部署和云端部署
|
|
6
|
+
5. 与LangChain等AI框架深度集成
|
|
7
|
+
|
|
8
|
+
DenseRetriever是SAGE框架中的密集向量检索组件,主要功能包括:
|
|
9
|
+
1. 支持多种向量数据库后端,包括ChromaDB和FAISS
|
|
10
|
+
2. 自动化的文档嵌入和索引
|
|
11
|
+
3. 高效的相似性搜索算法
|
|
12
|
+
4. 可配置的检索参数(top-k、阈值等)
|
|
13
|
+
5. 支持批量检索和实时查询
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Answer with True or False: Meiosis produces four genetically identical daughter cells.
|