@remnic/cli 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -0
- package/dist/assets/download-datasets.sh +647 -0
- package/dist/index.js +5013 -452
- package/package.json +50 -5
- package/dist/chunk-U4MQO3IF.js +0 -1144
- package/dist/dist-B67STFFX.js +0 -48
package/README.md
CHANGED
|
@@ -37,7 +37,14 @@ remnic query "hello" --explain # Test query with tier breakdown
|
|
|
37
37
|
| `remnic sync` | Diff-aware sync with external sources |
|
|
38
38
|
| `remnic spaces` | Manage memory namespaces |
|
|
39
39
|
| `remnic bench list` | List published benchmark packs |
|
|
40
|
+
| `remnic bench datasets status/download` | Check or download local benchmark datasets |
|
|
41
|
+
| `remnic bench runs list/show/delete` | Manage stored benchmark result files |
|
|
40
42
|
| `remnic bench run` | Run one or more published benchmark packs |
|
|
43
|
+
| `remnic bench compare` | Compare two stored benchmark results |
|
|
44
|
+
| `remnic bench baseline` | Save or list named benchmark baselines |
|
|
45
|
+
| `remnic bench export` | Export a stored benchmark result as JSON, CSV, or HTML |
|
|
46
|
+
| `remnic bench providers discover` | Auto-detect local provider backends |
|
|
47
|
+
| `remnic bench publish --target remnic-ai` | Build the Remnic.ai benchmark feed from stored results |
|
|
41
48
|
|
|
42
49
|
Run `remnic --help` for the full command list.
|
|
43
50
|
|
|
@@ -48,8 +55,26 @@ kept as a compatibility alias.
|
|
|
48
55
|
|
|
49
56
|
```bash
|
|
50
57
|
remnic bench list
|
|
58
|
+
remnic bench run --quick longmemeval --runtime-profile baseline
|
|
59
|
+
remnic bench datasets status
|
|
60
|
+
remnic bench datasets download longmemeval
|
|
61
|
+
remnic bench datasets download --all
|
|
62
|
+
remnic bench runs list
|
|
63
|
+
remnic bench runs show candidate-run --detail
|
|
64
|
+
remnic bench runs delete candidate-run
|
|
51
65
|
remnic bench run --quick longmemeval
|
|
52
66
|
remnic bench run longmemeval --dataset-dir ~/datasets/longmemeval
|
|
67
|
+
remnic bench run longmemeval --runtime-profile real --remnic-config ~/.config/remnic/config.json
|
|
68
|
+
remnic bench run longmemeval --runtime-profile real --system-provider openai --system-model gpt-5.4-mini
|
|
69
|
+
remnic bench run longmemeval --runtime-profile openclaw-chain --openclaw-config ~/.openclaw/openclaw.json --gateway-agent-id memory-primary
|
|
70
|
+
remnic bench run longmemeval --matrix baseline,real,openclaw-chain
|
|
71
|
+
remnic bench compare base-run candidate-run
|
|
72
|
+
remnic bench baseline save main candidate-run
|
|
73
|
+
remnic bench baseline list
|
|
74
|
+
remnic bench export candidate-run --format csv --output ./candidate.csv
|
|
75
|
+
remnic bench export candidate-run --format html --output ./report.html
|
|
76
|
+
remnic bench providers discover
|
|
77
|
+
remnic bench publish --target remnic-ai
|
|
53
78
|
remnic benchmark run --quick longmemeval
|
|
54
79
|
```
|
|
55
80
|
|
|
@@ -60,6 +85,17 @@ full runs need a real benchmark dataset. In a repo checkout the CLI will use
|
|
|
60
85
|
`evals/datasets/<benchmark>` automatically; in packaged installs pass
|
|
61
86
|
`--dataset-dir <path>` explicitly.
|
|
62
87
|
|
|
88
|
+
Package-backed benchmark runs also write `MANIFEST.json` in the results
|
|
89
|
+
directory. The manifest records result artifact hashes, dataset file hashes,
|
|
90
|
+
fixed seeds, runtime profile/model configuration, git state, QMD collection
|
|
91
|
+
names, selected benchmark environment keys, and config-file hashes. Secret
|
|
92
|
+
argument values are redacted.
|
|
93
|
+
|
|
94
|
+
`remnic bench datasets download` currently manages the published benchmark
|
|
95
|
+
datasets for `ama-bench`, `memory-arena`, `amemgym`, `longmemeval`, `locomo`,
|
|
96
|
+
`beam`, `personamem`, `membench`, and `memoryagentbench`. Internal Remnic
|
|
97
|
+
benchmarks keep their bundled or repo-managed fixtures.
|
|
98
|
+
|
|
63
99
|
## Connecting agents
|
|
64
100
|
|
|
65
101
|
Once the daemon is running, connect any supported agent:
|
|
@@ -0,0 +1,647 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
5
|
+
# Honor an explicit DATASETS_DIR from the environment so packaged CLI
|
|
6
|
+
# installs can route downloads to a user-writable location (e.g.
|
|
7
|
+
# ~/.remnic/bench/datasets) instead of a sibling of the script dir.
|
|
8
|
+
DATASETS_DIR="${DATASETS_DIR:-$(cd "$SCRIPT_DIR/.." && pwd)/datasets}"
|
|
9
|
+
|
|
10
|
+
usage() {
|
|
11
|
+
echo "Usage: $0 [--benchmark <name>]"
|
|
12
|
+
echo ""
|
|
13
|
+
echo "Downloads benchmark datasets for the Remnic bench suite."
|
|
14
|
+
echo ""
|
|
15
|
+
echo "Benchmarks: ama-bench, longmemeval, amemgym, locomo, memory-arena, beam, personamem, membench, memoryagentbench, all"
|
|
16
|
+
echo ""
|
|
17
|
+
echo "Options:"
|
|
18
|
+
echo " --benchmark <name> Download only the specified benchmark (default: all)"
|
|
19
|
+
echo " --help Show this help"
|
|
20
|
+
exit 0
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
BENCHMARK="all"
|
|
24
|
+
while [[ $# -gt 0 ]]; do
|
|
25
|
+
case $1 in
|
|
26
|
+
--benchmark) BENCHMARK="$2"; shift 2 ;;
|
|
27
|
+
--help) usage ;;
|
|
28
|
+
*) echo "Unknown option: $1"; usage ;;
|
|
29
|
+
esac
|
|
30
|
+
done
|
|
31
|
+
|
|
32
|
+
check_deps() {
|
|
33
|
+
for cmd in git curl; do
|
|
34
|
+
if ! command -v "$cmd" &>/dev/null; then
|
|
35
|
+
echo "ERROR: $cmd is required but not found"
|
|
36
|
+
exit 1
|
|
37
|
+
fi
|
|
38
|
+
done
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
PYTHON_BIN=""
|
|
42
|
+
|
|
43
|
+
python_has_modules() {
|
|
44
|
+
local python_bin="$1"
|
|
45
|
+
shift
|
|
46
|
+
|
|
47
|
+
"$python_bin" - "$@" <<'PY'
|
|
48
|
+
import sys
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
import importlib.util as importlib_util
|
|
52
|
+
except Exception: # pragma: no cover - Python 2 fallback
|
|
53
|
+
importlib_util = None
|
|
54
|
+
import pkgutil
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def has_module(name):
|
|
58
|
+
if importlib_util is not None:
|
|
59
|
+
return importlib_util.find_spec(name) is not None
|
|
60
|
+
return pkgutil.find_loader(name) is not None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
missing = [name for name in sys.argv[1:] if not has_module(name)]
|
|
64
|
+
if missing:
|
|
65
|
+
names = ", ".join(missing)
|
|
66
|
+
sys.stderr.write(
|
|
67
|
+
"ERROR: missing required Python module(s): {}. Install them before downloading this dataset.\n".format(
|
|
68
|
+
names
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
sys.exit(1)
|
|
72
|
+
PY
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
resolve_python_bin() {
|
|
76
|
+
if [[ -n "$PYTHON_BIN" ]]; then
|
|
77
|
+
if [[ $# -eq 0 ]] || python_has_modules "$PYTHON_BIN" "$@" >/dev/null 2>&1; then
|
|
78
|
+
printf '%s\n' "$PYTHON_BIN"
|
|
79
|
+
return 0
|
|
80
|
+
fi
|
|
81
|
+
fi
|
|
82
|
+
|
|
83
|
+
local candidate
|
|
84
|
+
local found_any=0
|
|
85
|
+
for candidate in python3 python; do
|
|
86
|
+
if ! command -v "$candidate" &>/dev/null; then
|
|
87
|
+
continue
|
|
88
|
+
fi
|
|
89
|
+
found_any=1
|
|
90
|
+
if [[ $# -gt 0 ]] && ! python_has_modules "$candidate" "$@" >/dev/null 2>&1; then
|
|
91
|
+
continue
|
|
92
|
+
fi
|
|
93
|
+
PYTHON_BIN="$candidate"
|
|
94
|
+
printf '%s\n' "$PYTHON_BIN"
|
|
95
|
+
return 0
|
|
96
|
+
done
|
|
97
|
+
|
|
98
|
+
if [[ $found_any -eq 1 && $# -gt 0 ]]; then
|
|
99
|
+
local names
|
|
100
|
+
names=$(printf '%s, ' "$@")
|
|
101
|
+
names=${names%, }
|
|
102
|
+
echo "ERROR: missing required Python module(s): $names. Install them before downloading this dataset."
|
|
103
|
+
exit 1
|
|
104
|
+
fi
|
|
105
|
+
|
|
106
|
+
echo "ERROR: python or python3 is required but not found"
|
|
107
|
+
exit 1
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
require_python_modules() {
|
|
111
|
+
resolve_python_bin "$@" >/dev/null
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
download_ama_bench() {
|
|
115
|
+
local dir="$DATASETS_DIR/ama-bench"
|
|
116
|
+
if [[ -f "$dir/open_end_qa_set.jsonl" ]]; then
|
|
117
|
+
echo "[ama-bench] Already downloaded at $dir"
|
|
118
|
+
return
|
|
119
|
+
fi
|
|
120
|
+
echo "[ama-bench] Downloading from HuggingFace (AMA-bench/AMA-bench)..."
|
|
121
|
+
mkdir -p "$dir"
|
|
122
|
+
local tmpdir
|
|
123
|
+
tmpdir=$(mktemp -d)
|
|
124
|
+
git clone --depth 1 https://huggingface.co/datasets/AMA-bench/AMA-bench "$tmpdir/repo" 2>/dev/null || {
|
|
125
|
+
echo "[ama-bench] ERROR: Could not clone. Try manually:"
|
|
126
|
+
echo " git clone --depth 1 https://huggingface.co/datasets/AMA-bench/AMA-bench /tmp/amabench"
|
|
127
|
+
echo " cp /tmp/amabench/test/open_end_qa_set.jsonl $dir/"
|
|
128
|
+
rm -rf "$tmpdir"
|
|
129
|
+
return 1
|
|
130
|
+
}
|
|
131
|
+
cp "$tmpdir/repo/test/open_end_qa_set.jsonl" "$dir/" 2>/dev/null || true
|
|
132
|
+
rm -rf "$tmpdir"
|
|
133
|
+
echo "[ama-bench] Downloaded to $dir ($(wc -l < "$dir/open_end_qa_set.jsonl") episodes)"
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
download_longmemeval() {
|
|
137
|
+
local dir="$DATASETS_DIR/longmemeval"
|
|
138
|
+
if [[ -f "$dir/longmemeval_oracle.json" ]]; then
|
|
139
|
+
echo "[longmemeval] Already downloaded at $dir"
|
|
140
|
+
return
|
|
141
|
+
fi
|
|
142
|
+
echo "[longmemeval] Downloading from HuggingFace (xiaowu0162/longmemeval-cleaned)..."
|
|
143
|
+
mkdir -p "$dir"
|
|
144
|
+
curl -sL "https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_oracle.json" \
|
|
145
|
+
-o "$dir/longmemeval_oracle.json"
|
|
146
|
+
if [[ ! -s "$dir/longmemeval_oracle.json" ]]; then
|
|
147
|
+
echo "[longmemeval] ERROR: Download failed. Try manually:"
|
|
148
|
+
echo " curl -sL https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_oracle.json -o $dir/longmemeval_oracle.json"
|
|
149
|
+
rm -f "$dir/longmemeval_oracle.json"
|
|
150
|
+
return 1
|
|
151
|
+
fi
|
|
152
|
+
echo "[longmemeval] Downloaded to $dir ($(du -h "$dir/longmemeval_oracle.json" | cut -f1))"
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
download_amemgym() {
|
|
156
|
+
local dir="$DATASETS_DIR/amemgym"
|
|
157
|
+
if [[ -f "$dir/amemgym-v1-base.json" ]]; then
|
|
158
|
+
echo "[amemgym] Already downloaded at $dir"
|
|
159
|
+
return
|
|
160
|
+
fi
|
|
161
|
+
echo "[amemgym] Downloading from HuggingFace (AGI-Eval/AMemGym)..."
|
|
162
|
+
mkdir -p "$dir"
|
|
163
|
+
local tmpdir
|
|
164
|
+
tmpdir=$(mktemp -d)
|
|
165
|
+
git clone --depth 1 https://huggingface.co/datasets/AGI-Eval/AMemGym "$tmpdir/repo" 2>/dev/null || {
|
|
166
|
+
echo "[amemgym] ERROR: Could not clone. Try manually:"
|
|
167
|
+
echo " git clone --depth 1 https://huggingface.co/datasets/AGI-Eval/AMemGym /tmp/amemgym"
|
|
168
|
+
echo " cp /tmp/amemgym/v1.base/data.json $dir/amemgym-v1-base.json"
|
|
169
|
+
rm -rf "$tmpdir"
|
|
170
|
+
return 1
|
|
171
|
+
}
|
|
172
|
+
cp "$tmpdir/repo/v1.base/data.json" "$dir/amemgym-v1-base.json" 2>/dev/null || true
|
|
173
|
+
rm -rf "$tmpdir"
|
|
174
|
+
echo "[amemgym] Downloaded to $dir"
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
download_locomo() {
|
|
178
|
+
local dir="$DATASETS_DIR/locomo"
|
|
179
|
+
if [[ -f "$dir/locomo10.json" ]]; then
|
|
180
|
+
echo "[locomo] Already downloaded at $dir"
|
|
181
|
+
return
|
|
182
|
+
fi
|
|
183
|
+
echo "[locomo] Downloading from GitHub (snap-research/locomo)..."
|
|
184
|
+
mkdir -p "$dir"
|
|
185
|
+
local tmpdir
|
|
186
|
+
tmpdir=$(mktemp -d)
|
|
187
|
+
git clone --depth 1 https://github.com/snap-research/locomo.git "$tmpdir/repo" 2>/dev/null || {
|
|
188
|
+
echo "[locomo] ERROR: Could not clone. Try manually:"
|
|
189
|
+
echo " git clone --depth 1 https://github.com/snap-research/locomo.git /tmp/locomo"
|
|
190
|
+
echo " cp /tmp/locomo/data/locomo10.json $dir/"
|
|
191
|
+
rm -rf "$tmpdir"
|
|
192
|
+
return 1
|
|
193
|
+
}
|
|
194
|
+
cp "$tmpdir/repo/data/locomo10.json" "$dir/" 2>/dev/null || true
|
|
195
|
+
rm -rf "$tmpdir"
|
|
196
|
+
echo "[locomo] Downloaded to $dir ($(du -h "$dir/locomo10.json" | cut -f1))"
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
download_memory_arena() {
|
|
200
|
+
local dir="$DATASETS_DIR/memory-arena"
|
|
201
|
+
if [[ -d "$dir" ]] && ls "$dir"/*.jsonl &>/dev/null; then
|
|
202
|
+
echo "[memory-arena] Already downloaded at $dir"
|
|
203
|
+
return
|
|
204
|
+
fi
|
|
205
|
+
echo "[memory-arena] Downloading from HuggingFace (ZexueHe/memoryarena)..."
|
|
206
|
+
mkdir -p "$dir"
|
|
207
|
+
local tmpdir
|
|
208
|
+
tmpdir=$(mktemp -d)
|
|
209
|
+
git clone --depth 1 https://huggingface.co/datasets/ZexueHe/memoryarena "$tmpdir/repo" 2>/dev/null || {
|
|
210
|
+
echo "[memory-arena] ERROR: Could not clone. Try manually:"
|
|
211
|
+
echo " git clone --depth 1 https://huggingface.co/datasets/ZexueHe/memoryarena /tmp/memoryarena"
|
|
212
|
+
echo " for d in /tmp/memoryarena/*/; do cp \"\$d/data.jsonl\" \"$dir/\$(basename \$d).jsonl\"; done"
|
|
213
|
+
rm -rf "$tmpdir"
|
|
214
|
+
return 1
|
|
215
|
+
}
|
|
216
|
+
for d in "$tmpdir/repo"/*/; do
|
|
217
|
+
local name
|
|
218
|
+
name=$(basename "$d")
|
|
219
|
+
if [[ -f "$d/data.jsonl" ]]; then
|
|
220
|
+
cp "$d/data.jsonl" "$dir/${name}.jsonl"
|
|
221
|
+
fi
|
|
222
|
+
done
|
|
223
|
+
rm -rf "$tmpdir"
|
|
224
|
+
local count
|
|
225
|
+
count=$(ls "$dir"/*.jsonl 2>/dev/null | wc -l | tr -d ' ')
|
|
226
|
+
echo "[memory-arena] Downloaded to $dir ($count domains)"
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
download_beam() {
|
|
230
|
+
local dir="$DATASETS_DIR/beam"
|
|
231
|
+
if [[ -f "$dir/beam_100k.json" && -f "$dir/beam_500k.json" && -f "$dir/beam_1m.json" && -f "$dir/beam_10m.json" ]]; then
|
|
232
|
+
echo "[beam] Already downloaded at $dir"
|
|
233
|
+
return
|
|
234
|
+
fi
|
|
235
|
+
echo "[beam] Downloading from Hugging Face parquet sources (Mohammadta/BEAM, Mohammadta/BEAM-10M)..."
|
|
236
|
+
mkdir -p "$dir"
|
|
237
|
+
require_python_modules huggingface_hub pyarrow
|
|
238
|
+
local python_bin
|
|
239
|
+
python_bin="$(resolve_python_bin)"
|
|
240
|
+
"$python_bin" - "$dir" <<'PY'
|
|
241
|
+
from __future__ import annotations
|
|
242
|
+
|
|
243
|
+
import json
|
|
244
|
+
import sys
|
|
245
|
+
from pathlib import Path
|
|
246
|
+
|
|
247
|
+
import pyarrow.parquet as pq
|
|
248
|
+
from huggingface_hub import hf_hub_download
|
|
249
|
+
|
|
250
|
+
out_dir = Path(sys.argv[1])
|
|
251
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
252
|
+
|
|
253
|
+
targets = [
|
|
254
|
+
("Mohammadta/BEAM", ["data/100K-00000-of-00001.parquet"], "beam_100k.json"),
|
|
255
|
+
("Mohammadta/BEAM", ["data/500K-00000-of-00001.parquet"], "beam_500k.json"),
|
|
256
|
+
("Mohammadta/BEAM", ["data/1M-00000-of-00001.parquet"], "beam_1m.json"),
|
|
257
|
+
(
|
|
258
|
+
"Mohammadta/BEAM-10M",
|
|
259
|
+
["data/10M-00000-of-00002.parquet", "data/10M-00001-of-00002.parquet"],
|
|
260
|
+
"beam_10m.json",
|
|
261
|
+
),
|
|
262
|
+
]
|
|
263
|
+
|
|
264
|
+
for repo_id, parquet_files, output_name in targets:
|
|
265
|
+
output_path = out_dir / output_name
|
|
266
|
+
if output_path.exists() and output_path.stat().st_size > 0:
|
|
267
|
+
print(f"[beam] Reusing {output_name}")
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
rows: list[dict] = []
|
|
271
|
+
for parquet_file in parquet_files:
|
|
272
|
+
parquet_path = hf_hub_download(
|
|
273
|
+
repo_id=repo_id,
|
|
274
|
+
repo_type="dataset",
|
|
275
|
+
filename=parquet_file,
|
|
276
|
+
)
|
|
277
|
+
rows.extend(pq.read_table(parquet_path).to_pylist())
|
|
278
|
+
|
|
279
|
+
with output_path.open("w", encoding="utf-8") as handle:
|
|
280
|
+
json.dump(rows, handle, ensure_ascii=False)
|
|
281
|
+
print(f"[beam] Wrote {output_name} ({len(rows)} conversations)")
|
|
282
|
+
PY
|
|
283
|
+
echo "[beam] Downloaded to $dir"
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
download_personamem() {
|
|
287
|
+
local dir="$DATASETS_DIR/personamem"
|
|
288
|
+
if [[ -f "$dir/benchmark/text/benchmark.csv" ]] \
|
|
289
|
+
&& [[ -f "$dir/data/chat_history_32k/.download-complete" ]]; then
|
|
290
|
+
echo "[personamem] Already downloaded at $dir"
|
|
291
|
+
return
|
|
292
|
+
fi
|
|
293
|
+
echo "[personamem] Downloading from Hugging Face (bowen-upenn/PersonaMem-v2)..."
|
|
294
|
+
mkdir -p "$dir"
|
|
295
|
+
require_python_modules huggingface_hub
|
|
296
|
+
local python_bin
|
|
297
|
+
python_bin="$(resolve_python_bin)"
|
|
298
|
+
"$python_bin" - "$dir" <<'PY'
|
|
299
|
+
from __future__ import annotations
|
|
300
|
+
|
|
301
|
+
import csv
|
|
302
|
+
import os
|
|
303
|
+
import shutil
|
|
304
|
+
import sys
|
|
305
|
+
import time
|
|
306
|
+
from pathlib import Path, PurePosixPath
|
|
307
|
+
|
|
308
|
+
from huggingface_hub import hf_hub_download
|
|
309
|
+
|
|
310
|
+
REPO_ID = "bowen-upenn/PersonaMem-v2"
|
|
311
|
+
BENCHMARK_PATH = "benchmark/text/benchmark.csv"
|
|
312
|
+
|
|
313
|
+
out_dir = Path(sys.argv[1])
|
|
314
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
315
|
+
out_dir_root = out_dir.resolve()
|
|
316
|
+
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def resolve_dataset_destination(relative_path: str) -> tuple[str, Path]:
|
|
320
|
+
normalized = relative_path.strip().replace("\\", "/")
|
|
321
|
+
if not normalized:
|
|
322
|
+
raise ValueError("dataset path cannot be empty")
|
|
323
|
+
|
|
324
|
+
posix_path = PurePosixPath(normalized)
|
|
325
|
+
if posix_path.is_absolute():
|
|
326
|
+
raise ValueError(
|
|
327
|
+
f'PersonaMem dataset file reference "{relative_path}" must stay within dataset root.'
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
safe_parts = []
|
|
331
|
+
for part in posix_path.parts:
|
|
332
|
+
if part in ("", "."):
|
|
333
|
+
continue
|
|
334
|
+
if part == "..":
|
|
335
|
+
raise ValueError(
|
|
336
|
+
f'PersonaMem dataset file reference "{relative_path}" must stay within dataset root.'
|
|
337
|
+
)
|
|
338
|
+
safe_parts.append(part)
|
|
339
|
+
|
|
340
|
+
if not safe_parts:
|
|
341
|
+
raise ValueError("dataset path cannot resolve to the dataset root")
|
|
342
|
+
|
|
343
|
+
destination = (out_dir / Path(*safe_parts)).resolve()
|
|
344
|
+
try:
|
|
345
|
+
destination.relative_to(out_dir_root)
|
|
346
|
+
except ValueError as exc:
|
|
347
|
+
raise ValueError(
|
|
348
|
+
f'PersonaMem dataset file reference "{relative_path}" must stay within dataset root.'
|
|
349
|
+
) from exc
|
|
350
|
+
|
|
351
|
+
return PurePosixPath(*safe_parts).as_posix(), destination
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def copy_dataset_file(relative_path: str) -> Path:
|
|
355
|
+
safe_relative_path, destination = resolve_dataset_destination(relative_path)
|
|
356
|
+
source = Path(
|
|
357
|
+
hf_hub_download(
|
|
358
|
+
repo_id=REPO_ID,
|
|
359
|
+
repo_type="dataset",
|
|
360
|
+
filename=safe_relative_path,
|
|
361
|
+
token=token,
|
|
362
|
+
)
|
|
363
|
+
)
|
|
364
|
+
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
365
|
+
shutil.copy2(source, destination)
|
|
366
|
+
return destination
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
benchmark_destination = copy_dataset_file(BENCHMARK_PATH)
|
|
370
|
+
|
|
371
|
+
with benchmark_destination.open("r", encoding="utf8", newline="") as handle:
|
|
372
|
+
reader = csv.DictReader(handle)
|
|
373
|
+
history_paths = sorted(
|
|
374
|
+
{
|
|
375
|
+
(row.get("chat_history_32k_link") or "").strip()
|
|
376
|
+
for row in reader
|
|
377
|
+
if (row.get("chat_history_32k_link") or "").strip()
|
|
378
|
+
}
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
if not history_paths:
|
|
382
|
+
raise SystemExit("PersonaMem benchmark.csv did not contain any chat_history_32k_link values")
|
|
383
|
+
|
|
384
|
+
completed = 0
|
|
385
|
+
for index, relative_path in enumerate(history_paths, start=1):
|
|
386
|
+
_, destination = resolve_dataset_destination(relative_path)
|
|
387
|
+
if destination.is_file():
|
|
388
|
+
completed += 1
|
|
389
|
+
continue
|
|
390
|
+
|
|
391
|
+
for attempt in range(1, 6):
|
|
392
|
+
try:
|
|
393
|
+
copy_dataset_file(relative_path)
|
|
394
|
+
completed += 1
|
|
395
|
+
break
|
|
396
|
+
except Exception as exc: # noqa: BLE001
|
|
397
|
+
if attempt == 5:
|
|
398
|
+
raise SystemExit(
|
|
399
|
+
f"failed to download PersonaMem asset {relative_path}: {exc}"
|
|
400
|
+
) from exc
|
|
401
|
+
delay_seconds = min(30, 2 ** attempt)
|
|
402
|
+
print(
|
|
403
|
+
f"[personamem] Retry {attempt}/5 for {relative_path} after error: {exc}. "
|
|
404
|
+
f"Sleeping {delay_seconds}s..."
|
|
405
|
+
)
|
|
406
|
+
time.sleep(delay_seconds)
|
|
407
|
+
|
|
408
|
+
if index % 100 == 0 or index == len(history_paths):
|
|
409
|
+
print(f"[personamem] Downloaded {completed}/{len(history_paths)} chat histories")
|
|
410
|
+
|
|
411
|
+
print(
|
|
412
|
+
f"[personamem] Mirrored benchmark.csv and {completed} chat histories into {out_dir}"
|
|
413
|
+
)
|
|
414
|
+
PY
|
|
415
|
+
touch "$dir/data/chat_history_32k/.download-complete"
|
|
416
|
+
echo "[personamem] Downloaded to $dir"
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
download_membench() {
|
|
420
|
+
local dir="$DATASETS_DIR/membench"
|
|
421
|
+
if [[ -f "$dir/membench.json" ]]; then
|
|
422
|
+
echo "[membench] Already downloaded at $dir"
|
|
423
|
+
return
|
|
424
|
+
fi
|
|
425
|
+
echo "[membench] Downloading and normalizing from GitHub (import-myself/Membench)..."
|
|
426
|
+
mkdir -p "$dir"
|
|
427
|
+
local tmpdir
|
|
428
|
+
tmpdir=$(mktemp -d)
|
|
429
|
+
git clone --depth 1 https://github.com/import-myself/Membench.git "$tmpdir/repo" 2>/dev/null || {
|
|
430
|
+
echo "[membench] ERROR: Could not clone. Try manually:"
|
|
431
|
+
echo " git clone --depth 1 https://github.com/import-myself/Membench.git /tmp/membench"
|
|
432
|
+
rm -rf "$tmpdir"
|
|
433
|
+
return 1
|
|
434
|
+
}
|
|
435
|
+
local python_bin
|
|
436
|
+
python_bin="$(resolve_python_bin)"
|
|
437
|
+
"$python_bin" - "$tmpdir/repo" "$dir/membench.json" <<'PY'
|
|
438
|
+
from __future__ import annotations
|
|
439
|
+
|
|
440
|
+
import json
|
|
441
|
+
import re
|
|
442
|
+
import sys
|
|
443
|
+
from pathlib import Path
|
|
444
|
+
|
|
445
|
+
repo_root = Path(sys.argv[1])
|
|
446
|
+
output_path = Path(sys.argv[2])
|
|
447
|
+
|
|
448
|
+
def normalize_text(value):
|
|
449
|
+
if isinstance(value, str):
|
|
450
|
+
return value.strip()
|
|
451
|
+
if isinstance(value, list):
|
|
452
|
+
for item in value:
|
|
453
|
+
text = normalize_text(item)
|
|
454
|
+
if text:
|
|
455
|
+
return text
|
|
456
|
+
return ""
|
|
457
|
+
if value is None:
|
|
458
|
+
return ""
|
|
459
|
+
return str(value).strip()
|
|
460
|
+
|
|
461
|
+
def sanitize_case_id(value: str) -> str:
|
|
462
|
+
return re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
|
|
463
|
+
|
|
464
|
+
def iter_qa_entries(value):
|
|
465
|
+
if isinstance(value, dict):
|
|
466
|
+
return [value]
|
|
467
|
+
if isinstance(value, list):
|
|
468
|
+
return [entry for entry in value if isinstance(entry, dict)]
|
|
469
|
+
return []
|
|
470
|
+
|
|
471
|
+
def build_turns(message_list):
|
|
472
|
+
turns = []
|
|
473
|
+
if not isinstance(message_list, list):
|
|
474
|
+
return turns
|
|
475
|
+
for session in message_list:
|
|
476
|
+
if isinstance(session, dict):
|
|
477
|
+
session = [session]
|
|
478
|
+
if not isinstance(session, list):
|
|
479
|
+
continue
|
|
480
|
+
for step in session:
|
|
481
|
+
if not isinstance(step, dict):
|
|
482
|
+
continue
|
|
483
|
+
user = normalize_text(step.get("user"))
|
|
484
|
+
assistant = normalize_text(step.get("assistant"))
|
|
485
|
+
if user:
|
|
486
|
+
turns.append({"role": "user", "content": user})
|
|
487
|
+
if assistant:
|
|
488
|
+
turns.append({"role": "assistant", "content": assistant})
|
|
489
|
+
return turns
|
|
490
|
+
|
|
491
|
+
cases = []
|
|
492
|
+
source_roots = [
|
|
493
|
+
("FirstAgent", "participant"),
|
|
494
|
+
("ThirdAgent", "observation"),
|
|
495
|
+
]
|
|
496
|
+
|
|
497
|
+
for source_root, scenario in source_roots:
|
|
498
|
+
for dataset_path in sorted((repo_root / "MemData" / source_root).glob("*.json")):
|
|
499
|
+
label = dataset_path.stem.lower()
|
|
500
|
+
memory_type = "reflective" if "highlevel" in label else "factual"
|
|
501
|
+
level = "high_level" if memory_type == "reflective" else "low_level"
|
|
502
|
+
document = json.loads(dataset_path.read_text(encoding="utf-8"))
|
|
503
|
+
|
|
504
|
+
if not isinstance(document, dict):
|
|
505
|
+
continue
|
|
506
|
+
|
|
507
|
+
for group_name, entries in document.items():
|
|
508
|
+
if not isinstance(entries, list):
|
|
509
|
+
continue
|
|
510
|
+
for entry_index, entry in enumerate(entries):
|
|
511
|
+
if not isinstance(entry, dict):
|
|
512
|
+
continue
|
|
513
|
+
|
|
514
|
+
turns = build_turns(entry.get("message_list") or entry.get("messages"))
|
|
515
|
+
if not turns:
|
|
516
|
+
continue
|
|
517
|
+
|
|
518
|
+
qa_entries = iter_qa_entries(
|
|
519
|
+
entry.get("QA")
|
|
520
|
+
or entry.get("qa")
|
|
521
|
+
or entry.get("qas")
|
|
522
|
+
or entry.get("question_answers")
|
|
523
|
+
)
|
|
524
|
+
for qa_index, qa in enumerate(qa_entries):
|
|
525
|
+
question = normalize_text(qa.get("question") or qa.get("query"))
|
|
526
|
+
answer = normalize_text(qa.get("answer"))
|
|
527
|
+
if not question or not answer:
|
|
528
|
+
continue
|
|
529
|
+
|
|
530
|
+
qid = normalize_text(
|
|
531
|
+
qa.get("qid") or qa.get("id") or qa.get("question_id") or qa_index
|
|
532
|
+
)
|
|
533
|
+
raw_id = (
|
|
534
|
+
f"{source_root}-{dataset_path.stem}-{group_name}-"
|
|
535
|
+
f"{entry_index}-{qid}"
|
|
536
|
+
)
|
|
537
|
+
case_id = sanitize_case_id(raw_id)
|
|
538
|
+
cases.append(
|
|
539
|
+
{
|
|
540
|
+
"id": case_id,
|
|
541
|
+
"memoryType": memory_type,
|
|
542
|
+
"scenario": scenario,
|
|
543
|
+
"level": level,
|
|
544
|
+
"turns": turns,
|
|
545
|
+
"question": question,
|
|
546
|
+
"answer": answer,
|
|
547
|
+
}
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
if not cases:
|
|
551
|
+
raise SystemExit("MemBench normalization produced no runnable cases.")
|
|
552
|
+
|
|
553
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
554
|
+
with output_path.open("w", encoding="utf-8") as handle:
|
|
555
|
+
json.dump(cases, handle, ensure_ascii=False)
|
|
556
|
+
|
|
557
|
+
print(f"[membench] Wrote {output_path.name} ({len(cases)} cases)")
|
|
558
|
+
PY
|
|
559
|
+
rm -rf "$tmpdir"
|
|
560
|
+
echo "[membench] Downloaded to $dir"
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
download_memoryagentbench() {
|
|
564
|
+
local dir="$DATASETS_DIR/memoryagentbench"
|
|
565
|
+
if [[ -f "$dir/Accurate_Retrieval.json" && -f "$dir/Test_Time_Learning.json" && -f "$dir/Long_Range_Understanding.json" && -f "$dir/Conflict_Resolution.json" ]]; then
|
|
566
|
+
echo "[memoryagentbench] Already downloaded at $dir"
|
|
567
|
+
return
|
|
568
|
+
fi
|
|
569
|
+
echo "[memoryagentbench] Downloading from Hugging Face parquet sources (ai-hyz/MemoryAgentBench)..."
|
|
570
|
+
mkdir -p "$dir"
|
|
571
|
+
require_python_modules huggingface_hub pyarrow
|
|
572
|
+
local python_bin
|
|
573
|
+
python_bin="$(resolve_python_bin)"
|
|
574
|
+
"$python_bin" - "$dir" <<'PY'
|
|
575
|
+
from __future__ import annotations
|
|
576
|
+
|
|
577
|
+
import json
|
|
578
|
+
import sys
|
|
579
|
+
from pathlib import Path
|
|
580
|
+
|
|
581
|
+
import pyarrow.parquet as pq
|
|
582
|
+
from huggingface_hub import hf_hub_download
|
|
583
|
+
|
|
584
|
+
out_dir = Path(sys.argv[1])
|
|
585
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
586
|
+
|
|
587
|
+
targets = [
|
|
588
|
+
("data/Accurate_Retrieval-00000-of-00001.parquet", "Accurate_Retrieval.json"),
|
|
589
|
+
("data/Test_Time_Learning-00000-of-00001.parquet", "Test_Time_Learning.json"),
|
|
590
|
+
("data/Long_Range_Understanding-00000-of-00001.parquet", "Long_Range_Understanding.json"),
|
|
591
|
+
("data/Conflict_Resolution-00000-of-00001.parquet", "Conflict_Resolution.json"),
|
|
592
|
+
]
|
|
593
|
+
|
|
594
|
+
for parquet_file, output_name in targets:
|
|
595
|
+
output_path = out_dir / output_name
|
|
596
|
+
if output_path.exists() and output_path.stat().st_size > 0:
|
|
597
|
+
print(f"[memoryagentbench] Reusing {output_name}")
|
|
598
|
+
continue
|
|
599
|
+
|
|
600
|
+
parquet_path = hf_hub_download(
|
|
601
|
+
repo_id="ai-hyz/MemoryAgentBench",
|
|
602
|
+
repo_type="dataset",
|
|
603
|
+
filename=parquet_file,
|
|
604
|
+
)
|
|
605
|
+
rows = pq.read_table(parquet_path).to_pylist()
|
|
606
|
+
with output_path.open("w", encoding="utf-8") as handle:
|
|
607
|
+
json.dump(rows, handle, ensure_ascii=False)
|
|
608
|
+
print(f"[memoryagentbench] Wrote {output_name} ({len(rows)} samples)")
|
|
609
|
+
PY
|
|
610
|
+
echo "[memoryagentbench] Downloaded to $dir"
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
# ── Main ──
|
|
614
|
+
|
|
615
|
+
check_deps
|
|
616
|
+
mkdir -p "$DATASETS_DIR"
|
|
617
|
+
|
|
618
|
+
case "$BENCHMARK" in
|
|
619
|
+
ama-bench) download_ama_bench ;;
|
|
620
|
+
longmemeval) download_longmemeval ;;
|
|
621
|
+
amemgym) download_amemgym ;;
|
|
622
|
+
locomo) download_locomo ;;
|
|
623
|
+
memory-arena) download_memory_arena ;;
|
|
624
|
+
beam) download_beam ;;
|
|
625
|
+
personamem) download_personamem ;;
|
|
626
|
+
membench) download_membench ;;
|
|
627
|
+
memoryagentbench) download_memoryagentbench ;;
|
|
628
|
+
all)
|
|
629
|
+
download_ama_bench
|
|
630
|
+
download_longmemeval
|
|
631
|
+
download_amemgym
|
|
632
|
+
download_locomo
|
|
633
|
+
download_memory_arena
|
|
634
|
+
download_beam
|
|
635
|
+
download_personamem
|
|
636
|
+
download_membench
|
|
637
|
+
download_memoryagentbench
|
|
638
|
+
;;
|
|
639
|
+
*)
|
|
640
|
+
echo "Unknown benchmark: $BENCHMARK"
|
|
641
|
+
echo "Available: ama-bench, longmemeval, amemgym, locomo, memory-arena, beam, personamem, membench, memoryagentbench, all"
|
|
642
|
+
exit 1
|
|
643
|
+
;;
|
|
644
|
+
esac
|
|
645
|
+
|
|
646
|
+
echo ""
|
|
647
|
+
echo "Done. Datasets at: $DATASETS_DIR"
|