@remnic/cli 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -37,7 +37,14 @@ remnic query "hello" --explain # Test query with tier breakdown
37
37
  | `remnic sync` | Diff-aware sync with external sources |
38
38
  | `remnic spaces` | Manage memory namespaces |
39
39
  | `remnic bench list` | List published benchmark packs |
40
+ | `remnic bench datasets status/download` | Check or download local benchmark datasets |
41
+ | `remnic bench runs list/show/delete` | Manage stored benchmark result files |
40
42
  | `remnic bench run` | Run one or more published benchmark packs |
43
+ | `remnic bench compare` | Compare two stored benchmark results |
44
+ | `remnic bench baseline` | Save or list named benchmark baselines |
45
+ | `remnic bench export` | Export a stored benchmark result as JSON, CSV, or HTML |
46
+ | `remnic bench providers discover` | Auto-detect local provider backends |
47
+ | `remnic bench publish --target remnic-ai` | Build the Remnic.ai benchmark feed from stored results |
41
48
 
42
49
  Run `remnic --help` for the full command list.
43
50
 
@@ -48,8 +55,26 @@ kept as a compatibility alias.
48
55
 
49
56
  ```bash
50
57
  remnic bench list
58
+ remnic bench run --quick longmemeval --runtime-profile baseline
59
+ remnic bench datasets status
60
+ remnic bench datasets download longmemeval
61
+ remnic bench datasets download --all
62
+ remnic bench runs list
63
+ remnic bench runs show candidate-run --detail
64
+ remnic bench runs delete candidate-run
51
65
  remnic bench run --quick longmemeval
52
66
  remnic bench run longmemeval --dataset-dir ~/datasets/longmemeval
67
+ remnic bench run longmemeval --runtime-profile real --remnic-config ~/.config/remnic/config.json
68
+ remnic bench run longmemeval --runtime-profile real --system-provider openai --system-model gpt-5.4-mini
69
+ remnic bench run longmemeval --runtime-profile openclaw-chain --openclaw-config ~/.openclaw/openclaw.json --gateway-agent-id memory-primary
70
+ remnic bench run longmemeval --matrix baseline,real,openclaw-chain
71
+ remnic bench compare base-run candidate-run
72
+ remnic bench baseline save main candidate-run
73
+ remnic bench baseline list
74
+ remnic bench export candidate-run --format csv --output ./candidate.csv
75
+ remnic bench export candidate-run --format html --output ./report.html
76
+ remnic bench providers discover
77
+ remnic bench publish --target remnic-ai
53
78
  remnic benchmark run --quick longmemeval
54
79
  ```
55
80
 
@@ -60,6 +85,17 @@ full runs need a real benchmark dataset. In a repo checkout the CLI will use
60
85
  `evals/datasets/<benchmark>` automatically; in packaged installs pass
61
86
  `--dataset-dir <path>` explicitly.
62
87
 
88
+ Package-backed benchmark runs also write `MANIFEST.json` in the results
89
+ directory. The manifest records result artifact hashes, dataset file hashes,
90
+ fixed seeds, runtime profile/model configuration, git state, QMD collection
91
+ names, selected benchmark environment keys, and config-file hashes. Secret
92
+ argument values are redacted.
93
+
94
+ `remnic bench datasets download` currently manages the published benchmark
95
+ datasets for `ama-bench`, `memory-arena`, `amemgym`, `longmemeval`, `locomo`,
96
+ `beam`, `personamem`, `membench`, and `memoryagentbench`. Internal Remnic
97
+ benchmarks keep their bundled or repo-managed fixtures.
98
+
63
99
  ## Connecting agents
64
100
 
65
101
  Once the daemon is running, connect any supported agent:
@@ -0,0 +1,647 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
5
+ # Honor an explicit DATASETS_DIR from the environment so packaged CLI
6
+ # installs can route downloads to a user-writable location (e.g.
7
+ # ~/.remnic/bench/datasets) instead of a sibling of the script dir.
8
+ DATASETS_DIR="${DATASETS_DIR:-$(cd "$SCRIPT_DIR/.." && pwd)/datasets}"
9
+
10
+ usage() {
11
+ echo "Usage: $0 [--benchmark <name>]"
12
+ echo ""
13
+ echo "Downloads benchmark datasets for the Remnic bench suite."
14
+ echo ""
15
+ echo "Benchmarks: ama-bench, longmemeval, amemgym, locomo, memory-arena, beam, personamem, membench, memoryagentbench, all"
16
+ echo ""
17
+ echo "Options:"
18
+ echo " --benchmark <name> Download only the specified benchmark (default: all)"
19
+ echo " --help Show this help"
20
+ exit 0
21
+ }
22
+
23
+ BENCHMARK="all"
24
+ while [[ $# -gt 0 ]]; do
25
+ case $1 in
26
+ --benchmark) BENCHMARK="$2"; shift 2 ;;
27
+ --help) usage ;;
28
+ *) echo "Unknown option: $1"; usage ;;
29
+ esac
30
+ done
31
+
32
+ check_deps() {
33
+ for cmd in git curl; do
34
+ if ! command -v "$cmd" &>/dev/null; then
35
+ echo "ERROR: $cmd is required but not found"
36
+ exit 1
37
+ fi
38
+ done
39
+ }
40
+
41
+ PYTHON_BIN=""
42
+
43
+ python_has_modules() {
44
+ local python_bin="$1"
45
+ shift
46
+
47
+ "$python_bin" - "$@" <<'PY'
48
+ import sys
49
+
50
+ try:
51
+ import importlib.util as importlib_util
52
+ except Exception: # pragma: no cover - Python 2 fallback
53
+ importlib_util = None
54
+ import pkgutil
55
+
56
+
57
+ def has_module(name):
58
+ if importlib_util is not None:
59
+ return importlib_util.find_spec(name) is not None
60
+ return pkgutil.find_loader(name) is not None
61
+
62
+
63
+ missing = [name for name in sys.argv[1:] if not has_module(name)]
64
+ if missing:
65
+ names = ", ".join(missing)
66
+ sys.stderr.write(
67
+ "ERROR: missing required Python module(s): {}. Install them before downloading this dataset.\n".format(
68
+ names
69
+ )
70
+ )
71
+ sys.exit(1)
72
+ PY
73
+ }
74
+
75
+ resolve_python_bin() {
76
+ if [[ -n "$PYTHON_BIN" ]]; then
77
+ if [[ $# -eq 0 ]] || python_has_modules "$PYTHON_BIN" "$@" >/dev/null 2>&1; then
78
+ printf '%s\n' "$PYTHON_BIN"
79
+ return 0
80
+ fi
81
+ fi
82
+
83
+ local candidate
84
+ local found_any=0
85
+ for candidate in python3 python; do
86
+ if ! command -v "$candidate" &>/dev/null; then
87
+ continue
88
+ fi
89
+ found_any=1
90
+ if [[ $# -gt 0 ]] && ! python_has_modules "$candidate" "$@" >/dev/null 2>&1; then
91
+ continue
92
+ fi
93
+ PYTHON_BIN="$candidate"
94
+ printf '%s\n' "$PYTHON_BIN"
95
+ return 0
96
+ done
97
+
98
+ if [[ $found_any -eq 1 && $# -gt 0 ]]; then
99
+ local names
100
+ names=$(printf '%s, ' "$@")
101
+ names=${names%, }
102
+ echo "ERROR: missing required Python module(s): $names. Install them before downloading this dataset."
103
+ exit 1
104
+ fi
105
+
106
+ echo "ERROR: python or python3 is required but not found"
107
+ exit 1
108
+ }
109
+
110
+ require_python_modules() {
111
+ resolve_python_bin "$@" >/dev/null
112
+ }
113
+
114
+ download_ama_bench() {
115
+ local dir="$DATASETS_DIR/ama-bench"
116
+ if [[ -f "$dir/open_end_qa_set.jsonl" ]]; then
117
+ echo "[ama-bench] Already downloaded at $dir"
118
+ return
119
+ fi
120
+ echo "[ama-bench] Downloading from HuggingFace (AMA-bench/AMA-bench)..."
121
+ mkdir -p "$dir"
122
+ local tmpdir
123
+ tmpdir=$(mktemp -d)
124
+ git clone --depth 1 https://huggingface.co/datasets/AMA-bench/AMA-bench "$tmpdir/repo" 2>/dev/null || {
125
+ echo "[ama-bench] ERROR: Could not clone. Try manually:"
126
+ echo " git clone --depth 1 https://huggingface.co/datasets/AMA-bench/AMA-bench /tmp/amabench"
127
+ echo " cp /tmp/amabench/test/open_end_qa_set.jsonl $dir/"
128
+ rm -rf "$tmpdir"
129
+ return 1
130
+ }
131
+ cp "$tmpdir/repo/test/open_end_qa_set.jsonl" "$dir/" 2>/dev/null || true
132
+ rm -rf "$tmpdir"
133
+ echo "[ama-bench] Downloaded to $dir ($(wc -l < "$dir/open_end_qa_set.jsonl") episodes)"
134
+ }
135
+
136
+ download_longmemeval() {
137
+ local dir="$DATASETS_DIR/longmemeval"
138
+ if [[ -f "$dir/longmemeval_oracle.json" ]]; then
139
+ echo "[longmemeval] Already downloaded at $dir"
140
+ return
141
+ fi
142
+ echo "[longmemeval] Downloading from HuggingFace (xiaowu0162/longmemeval-cleaned)..."
143
+ mkdir -p "$dir"
144
+ curl -sL "https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_oracle.json" \
145
+ -o "$dir/longmemeval_oracle.json"
146
+ if [[ ! -s "$dir/longmemeval_oracle.json" ]]; then
147
+ echo "[longmemeval] ERROR: Download failed. Try manually:"
148
+ echo " curl -sL https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_oracle.json -o $dir/longmemeval_oracle.json"
149
+ rm -f "$dir/longmemeval_oracle.json"
150
+ return 1
151
+ fi
152
+ echo "[longmemeval] Downloaded to $dir ($(du -h "$dir/longmemeval_oracle.json" | cut -f1))"
153
+ }
154
+
155
+ download_amemgym() {
156
+ local dir="$DATASETS_DIR/amemgym"
157
+ if [[ -f "$dir/amemgym-v1-base.json" ]]; then
158
+ echo "[amemgym] Already downloaded at $dir"
159
+ return
160
+ fi
161
+ echo "[amemgym] Downloading from HuggingFace (AGI-Eval/AMemGym)..."
162
+ mkdir -p "$dir"
163
+ local tmpdir
164
+ tmpdir=$(mktemp -d)
165
+ git clone --depth 1 https://huggingface.co/datasets/AGI-Eval/AMemGym "$tmpdir/repo" 2>/dev/null || {
166
+ echo "[amemgym] ERROR: Could not clone. Try manually:"
167
+ echo " git clone --depth 1 https://huggingface.co/datasets/AGI-Eval/AMemGym /tmp/amemgym"
168
+ echo " cp /tmp/amemgym/v1.base/data.json $dir/amemgym-v1-base.json"
169
+ rm -rf "$tmpdir"
170
+ return 1
171
+ }
172
+ cp "$tmpdir/repo/v1.base/data.json" "$dir/amemgym-v1-base.json" 2>/dev/null || true
173
+ rm -rf "$tmpdir"
174
+ echo "[amemgym] Downloaded to $dir"
175
+ }
176
+
177
+ download_locomo() {
178
+ local dir="$DATASETS_DIR/locomo"
179
+ if [[ -f "$dir/locomo10.json" ]]; then
180
+ echo "[locomo] Already downloaded at $dir"
181
+ return
182
+ fi
183
+ echo "[locomo] Downloading from GitHub (snap-research/locomo)..."
184
+ mkdir -p "$dir"
185
+ local tmpdir
186
+ tmpdir=$(mktemp -d)
187
+ git clone --depth 1 https://github.com/snap-research/locomo.git "$tmpdir/repo" 2>/dev/null || {
188
+ echo "[locomo] ERROR: Could not clone. Try manually:"
189
+ echo " git clone --depth 1 https://github.com/snap-research/locomo.git /tmp/locomo"
190
+ echo " cp /tmp/locomo/data/locomo10.json $dir/"
191
+ rm -rf "$tmpdir"
192
+ return 1
193
+ }
194
+ cp "$tmpdir/repo/data/locomo10.json" "$dir/" 2>/dev/null || true
195
+ rm -rf "$tmpdir"
196
+ echo "[locomo] Downloaded to $dir ($(du -h "$dir/locomo10.json" | cut -f1))"
197
+ }
198
+
199
+ download_memory_arena() {
200
+ local dir="$DATASETS_DIR/memory-arena"
201
+ if [[ -d "$dir" ]] && ls "$dir"/*.jsonl &>/dev/null; then
202
+ echo "[memory-arena] Already downloaded at $dir"
203
+ return
204
+ fi
205
+ echo "[memory-arena] Downloading from HuggingFace (ZexueHe/memoryarena)..."
206
+ mkdir -p "$dir"
207
+ local tmpdir
208
+ tmpdir=$(mktemp -d)
209
+ git clone --depth 1 https://huggingface.co/datasets/ZexueHe/memoryarena "$tmpdir/repo" 2>/dev/null || {
210
+ echo "[memory-arena] ERROR: Could not clone. Try manually:"
211
+ echo " git clone --depth 1 https://huggingface.co/datasets/ZexueHe/memoryarena /tmp/memoryarena"
212
+ echo " for d in /tmp/memoryarena/*/; do cp \"\$d/data.jsonl\" \"$dir/\$(basename \$d).jsonl\"; done"
213
+ rm -rf "$tmpdir"
214
+ return 1
215
+ }
216
+ for d in "$tmpdir/repo"/*/; do
217
+ local name
218
+ name=$(basename "$d")
219
+ if [[ -f "$d/data.jsonl" ]]; then
220
+ cp "$d/data.jsonl" "$dir/${name}.jsonl"
221
+ fi
222
+ done
223
+ rm -rf "$tmpdir"
224
+ local count
225
+ count=$(ls "$dir"/*.jsonl 2>/dev/null | wc -l | tr -d ' ')
226
+ echo "[memory-arena] Downloaded to $dir ($count domains)"
227
+ }
228
+
229
+ download_beam() {
230
+ local dir="$DATASETS_DIR/beam"
231
+ if [[ -f "$dir/beam_100k.json" && -f "$dir/beam_500k.json" && -f "$dir/beam_1m.json" && -f "$dir/beam_10m.json" ]]; then
232
+ echo "[beam] Already downloaded at $dir"
233
+ return
234
+ fi
235
+ echo "[beam] Downloading from Hugging Face parquet sources (Mohammadta/BEAM, Mohammadta/BEAM-10M)..."
236
+ mkdir -p "$dir"
237
+ require_python_modules huggingface_hub pyarrow
238
+ local python_bin
239
+ python_bin="$(resolve_python_bin)"
240
+ "$python_bin" - "$dir" <<'PY'
241
+ from __future__ import annotations
242
+
243
+ import json
244
+ import sys
245
+ from pathlib import Path
246
+
247
+ import pyarrow.parquet as pq
248
+ from huggingface_hub import hf_hub_download
249
+
250
+ out_dir = Path(sys.argv[1])
251
+ out_dir.mkdir(parents=True, exist_ok=True)
252
+
253
+ targets = [
254
+ ("Mohammadta/BEAM", ["data/100K-00000-of-00001.parquet"], "beam_100k.json"),
255
+ ("Mohammadta/BEAM", ["data/500K-00000-of-00001.parquet"], "beam_500k.json"),
256
+ ("Mohammadta/BEAM", ["data/1M-00000-of-00001.parquet"], "beam_1m.json"),
257
+ (
258
+ "Mohammadta/BEAM-10M",
259
+ ["data/10M-00000-of-00002.parquet", "data/10M-00001-of-00002.parquet"],
260
+ "beam_10m.json",
261
+ ),
262
+ ]
263
+
264
+ for repo_id, parquet_files, output_name in targets:
265
+ output_path = out_dir / output_name
266
+ if output_path.exists() and output_path.stat().st_size > 0:
267
+ print(f"[beam] Reusing {output_name}")
268
+ continue
269
+
270
+ rows: list[dict] = []
271
+ for parquet_file in parquet_files:
272
+ parquet_path = hf_hub_download(
273
+ repo_id=repo_id,
274
+ repo_type="dataset",
275
+ filename=parquet_file,
276
+ )
277
+ rows.extend(pq.read_table(parquet_path).to_pylist())
278
+
279
+ with output_path.open("w", encoding="utf-8") as handle:
280
+ json.dump(rows, handle, ensure_ascii=False)
281
+ print(f"[beam] Wrote {output_name} ({len(rows)} conversations)")
282
+ PY
283
+ echo "[beam] Downloaded to $dir"
284
+ }
285
+
286
+ download_personamem() {
287
+ local dir="$DATASETS_DIR/personamem"
288
+ if [[ -f "$dir/benchmark/text/benchmark.csv" ]] \
289
+ && [[ -f "$dir/data/chat_history_32k/.download-complete" ]]; then
290
+ echo "[personamem] Already downloaded at $dir"
291
+ return
292
+ fi
293
+ echo "[personamem] Downloading from Hugging Face (bowen-upenn/PersonaMem-v2)..."
294
+ mkdir -p "$dir"
295
+ require_python_modules huggingface_hub
296
+ local python_bin
297
+ python_bin="$(resolve_python_bin)"
298
+ "$python_bin" - "$dir" <<'PY'
299
+ from __future__ import annotations
300
+
301
+ import csv
302
+ import os
303
+ import shutil
304
+ import sys
305
+ import time
306
+ from pathlib import Path, PurePosixPath
307
+
308
+ from huggingface_hub import hf_hub_download
309
+
310
+ REPO_ID = "bowen-upenn/PersonaMem-v2"
311
+ BENCHMARK_PATH = "benchmark/text/benchmark.csv"
312
+
313
+ out_dir = Path(sys.argv[1])
314
+ out_dir.mkdir(parents=True, exist_ok=True)
315
+ out_dir_root = out_dir.resolve()
316
+ token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
317
+
318
+
319
+ def resolve_dataset_destination(relative_path: str) -> tuple[str, Path]:
320
+ normalized = relative_path.strip().replace("\\", "/")
321
+ if not normalized:
322
+ raise ValueError("dataset path cannot be empty")
323
+
324
+ posix_path = PurePosixPath(normalized)
325
+ if posix_path.is_absolute():
326
+ raise ValueError(
327
+ f'PersonaMem dataset file reference "{relative_path}" must stay within dataset root.'
328
+ )
329
+
330
+ safe_parts = []
331
+ for part in posix_path.parts:
332
+ if part in ("", "."):
333
+ continue
334
+ if part == "..":
335
+ raise ValueError(
336
+ f'PersonaMem dataset file reference "{relative_path}" must stay within dataset root.'
337
+ )
338
+ safe_parts.append(part)
339
+
340
+ if not safe_parts:
341
+ raise ValueError("dataset path cannot resolve to the dataset root")
342
+
343
+ destination = (out_dir / Path(*safe_parts)).resolve()
344
+ try:
345
+ destination.relative_to(out_dir_root)
346
+ except ValueError as exc:
347
+ raise ValueError(
348
+ f'PersonaMem dataset file reference "{relative_path}" must stay within dataset root.'
349
+ ) from exc
350
+
351
+ return PurePosixPath(*safe_parts).as_posix(), destination
352
+
353
+
354
+ def copy_dataset_file(relative_path: str) -> Path:
355
+ safe_relative_path, destination = resolve_dataset_destination(relative_path)
356
+ source = Path(
357
+ hf_hub_download(
358
+ repo_id=REPO_ID,
359
+ repo_type="dataset",
360
+ filename=safe_relative_path,
361
+ token=token,
362
+ )
363
+ )
364
+ destination.parent.mkdir(parents=True, exist_ok=True)
365
+ shutil.copy2(source, destination)
366
+ return destination
367
+
368
+
369
+ benchmark_destination = copy_dataset_file(BENCHMARK_PATH)
370
+
371
+ with benchmark_destination.open("r", encoding="utf8", newline="") as handle:
372
+ reader = csv.DictReader(handle)
373
+ history_paths = sorted(
374
+ {
375
+ (row.get("chat_history_32k_link") or "").strip()
376
+ for row in reader
377
+ if (row.get("chat_history_32k_link") or "").strip()
378
+ }
379
+ )
380
+
381
+ if not history_paths:
382
+ raise SystemExit("PersonaMem benchmark.csv did not contain any chat_history_32k_link values")
383
+
384
+ completed = 0
385
+ for index, relative_path in enumerate(history_paths, start=1):
386
+ _, destination = resolve_dataset_destination(relative_path)
387
+ if destination.is_file():
388
+ completed += 1
389
+ continue
390
+
391
+ for attempt in range(1, 6):
392
+ try:
393
+ copy_dataset_file(relative_path)
394
+ completed += 1
395
+ break
396
+ except Exception as exc: # noqa: BLE001
397
+ if attempt == 5:
398
+ raise SystemExit(
399
+ f"failed to download PersonaMem asset {relative_path}: {exc}"
400
+ ) from exc
401
+ delay_seconds = min(30, 2 ** attempt)
402
+ print(
403
+ f"[personamem] Retry {attempt}/5 for {relative_path} after error: {exc}. "
404
+ f"Sleeping {delay_seconds}s..."
405
+ )
406
+ time.sleep(delay_seconds)
407
+
408
+ if index % 100 == 0 or index == len(history_paths):
409
+ print(f"[personamem] Downloaded {completed}/{len(history_paths)} chat histories")
410
+
411
+ print(
412
+ f"[personamem] Mirrored benchmark.csv and {completed} chat histories into {out_dir}"
413
+ )
414
+ PY
415
+ touch "$dir/data/chat_history_32k/.download-complete"
416
+ echo "[personamem] Downloaded to $dir"
417
+ }
418
+
419
+ download_membench() {
420
+ local dir="$DATASETS_DIR/membench"
421
+ if [[ -f "$dir/membench.json" ]]; then
422
+ echo "[membench] Already downloaded at $dir"
423
+ return
424
+ fi
425
+ echo "[membench] Downloading and normalizing from GitHub (import-myself/Membench)..."
426
+ mkdir -p "$dir"
427
+ local tmpdir
428
+ tmpdir=$(mktemp -d)
429
+ git clone --depth 1 https://github.com/import-myself/Membench.git "$tmpdir/repo" 2>/dev/null || {
430
+ echo "[membench] ERROR: Could not clone. Try manually:"
431
+ echo " git clone --depth 1 https://github.com/import-myself/Membench.git /tmp/membench"
432
+ rm -rf "$tmpdir"
433
+ return 1
434
+ }
435
+ local python_bin
436
+ python_bin="$(resolve_python_bin)"
437
+ "$python_bin" - "$tmpdir/repo" "$dir/membench.json" <<'PY'
438
+ from __future__ import annotations
439
+
440
+ import json
441
+ import re
442
+ import sys
443
+ from pathlib import Path
444
+
445
+ repo_root = Path(sys.argv[1])
446
+ output_path = Path(sys.argv[2])
447
+
448
+ def normalize_text(value):
449
+ if isinstance(value, str):
450
+ return value.strip()
451
+ if isinstance(value, list):
452
+ for item in value:
453
+ text = normalize_text(item)
454
+ if text:
455
+ return text
456
+ return ""
457
+ if value is None:
458
+ return ""
459
+ return str(value).strip()
460
+
461
+ def sanitize_case_id(value: str) -> str:
462
+ return re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
463
+
464
+ def iter_qa_entries(value):
465
+ if isinstance(value, dict):
466
+ return [value]
467
+ if isinstance(value, list):
468
+ return [entry for entry in value if isinstance(entry, dict)]
469
+ return []
470
+
471
+ def build_turns(message_list):
472
+ turns = []
473
+ if not isinstance(message_list, list):
474
+ return turns
475
+ for session in message_list:
476
+ if isinstance(session, dict):
477
+ session = [session]
478
+ if not isinstance(session, list):
479
+ continue
480
+ for step in session:
481
+ if not isinstance(step, dict):
482
+ continue
483
+ user = normalize_text(step.get("user"))
484
+ assistant = normalize_text(step.get("assistant"))
485
+ if user:
486
+ turns.append({"role": "user", "content": user})
487
+ if assistant:
488
+ turns.append({"role": "assistant", "content": assistant})
489
+ return turns
490
+
491
+ cases = []
492
+ source_roots = [
493
+ ("FirstAgent", "participant"),
494
+ ("ThirdAgent", "observation"),
495
+ ]
496
+
497
+ for source_root, scenario in source_roots:
498
+ for dataset_path in sorted((repo_root / "MemData" / source_root).glob("*.json")):
499
+ label = dataset_path.stem.lower()
500
+ memory_type = "reflective" if "highlevel" in label else "factual"
501
+ level = "high_level" if memory_type == "reflective" else "low_level"
502
+ document = json.loads(dataset_path.read_text(encoding="utf-8"))
503
+
504
+ if not isinstance(document, dict):
505
+ continue
506
+
507
+ for group_name, entries in document.items():
508
+ if not isinstance(entries, list):
509
+ continue
510
+ for entry_index, entry in enumerate(entries):
511
+ if not isinstance(entry, dict):
512
+ continue
513
+
514
+ turns = build_turns(entry.get("message_list") or entry.get("messages"))
515
+ if not turns:
516
+ continue
517
+
518
+ qa_entries = iter_qa_entries(
519
+ entry.get("QA")
520
+ or entry.get("qa")
521
+ or entry.get("qas")
522
+ or entry.get("question_answers")
523
+ )
524
+ for qa_index, qa in enumerate(qa_entries):
525
+ question = normalize_text(qa.get("question") or qa.get("query"))
526
+ answer = normalize_text(qa.get("answer"))
527
+ if not question or not answer:
528
+ continue
529
+
530
+ qid = normalize_text(
531
+ qa.get("qid") or qa.get("id") or qa.get("question_id") or qa_index
532
+ )
533
+ raw_id = (
534
+ f"{source_root}-{dataset_path.stem}-{group_name}-"
535
+ f"{entry_index}-{qid}"
536
+ )
537
+ case_id = sanitize_case_id(raw_id)
538
+ cases.append(
539
+ {
540
+ "id": case_id,
541
+ "memoryType": memory_type,
542
+ "scenario": scenario,
543
+ "level": level,
544
+ "turns": turns,
545
+ "question": question,
546
+ "answer": answer,
547
+ }
548
+ )
549
+
550
+ if not cases:
551
+ raise SystemExit("MemBench normalization produced no runnable cases.")
552
+
553
+ output_path.parent.mkdir(parents=True, exist_ok=True)
554
+ with output_path.open("w", encoding="utf-8") as handle:
555
+ json.dump(cases, handle, ensure_ascii=False)
556
+
557
+ print(f"[membench] Wrote {output_path.name} ({len(cases)} cases)")
558
+ PY
559
+ rm -rf "$tmpdir"
560
+ echo "[membench] Downloaded to $dir"
561
+ }
562
+
563
+ download_memoryagentbench() {
564
+ local dir="$DATASETS_DIR/memoryagentbench"
565
+ if [[ -f "$dir/Accurate_Retrieval.json" && -f "$dir/Test_Time_Learning.json" && -f "$dir/Long_Range_Understanding.json" && -f "$dir/Conflict_Resolution.json" ]]; then
566
+ echo "[memoryagentbench] Already downloaded at $dir"
567
+ return
568
+ fi
569
+ echo "[memoryagentbench] Downloading from Hugging Face parquet sources (ai-hyz/MemoryAgentBench)..."
570
+ mkdir -p "$dir"
571
+ require_python_modules huggingface_hub pyarrow
572
+ local python_bin
573
+ python_bin="$(resolve_python_bin)"
574
+ "$python_bin" - "$dir" <<'PY'
575
+ from __future__ import annotations
576
+
577
+ import json
578
+ import sys
579
+ from pathlib import Path
580
+
581
+ import pyarrow.parquet as pq
582
+ from huggingface_hub import hf_hub_download
583
+
584
+ out_dir = Path(sys.argv[1])
585
+ out_dir.mkdir(parents=True, exist_ok=True)
586
+
587
+ targets = [
588
+ ("data/Accurate_Retrieval-00000-of-00001.parquet", "Accurate_Retrieval.json"),
589
+ ("data/Test_Time_Learning-00000-of-00001.parquet", "Test_Time_Learning.json"),
590
+ ("data/Long_Range_Understanding-00000-of-00001.parquet", "Long_Range_Understanding.json"),
591
+ ("data/Conflict_Resolution-00000-of-00001.parquet", "Conflict_Resolution.json"),
592
+ ]
593
+
594
+ for parquet_file, output_name in targets:
595
+ output_path = out_dir / output_name
596
+ if output_path.exists() and output_path.stat().st_size > 0:
597
+ print(f"[memoryagentbench] Reusing {output_name}")
598
+ continue
599
+
600
+ parquet_path = hf_hub_download(
601
+ repo_id="ai-hyz/MemoryAgentBench",
602
+ repo_type="dataset",
603
+ filename=parquet_file,
604
+ )
605
+ rows = pq.read_table(parquet_path).to_pylist()
606
+ with output_path.open("w", encoding="utf-8") as handle:
607
+ json.dump(rows, handle, ensure_ascii=False)
608
+ print(f"[memoryagentbench] Wrote {output_name} ({len(rows)} samples)")
609
+ PY
610
+ echo "[memoryagentbench] Downloaded to $dir"
611
+ }
612
+
613
+ # ── Main ──
614
+
615
+ check_deps
616
+ mkdir -p "$DATASETS_DIR"
617
+
618
+ case "$BENCHMARK" in
619
+ ama-bench) download_ama_bench ;;
620
+ longmemeval) download_longmemeval ;;
621
+ amemgym) download_amemgym ;;
622
+ locomo) download_locomo ;;
623
+ memory-arena) download_memory_arena ;;
624
+ beam) download_beam ;;
625
+ personamem) download_personamem ;;
626
+ membench) download_membench ;;
627
+ memoryagentbench) download_memoryagentbench ;;
628
+ all)
629
+ download_ama_bench
630
+ download_longmemeval
631
+ download_amemgym
632
+ download_locomo
633
+ download_memory_arena
634
+ download_beam
635
+ download_personamem
636
+ download_membench
637
+ download_memoryagentbench
638
+ ;;
639
+ *)
640
+ echo "Unknown benchmark: $BENCHMARK"
641
+ echo "Available: ama-bench, longmemeval, amemgym, locomo, memory-arena, beam, personamem, membench, memoryagentbench, all"
642
+ exit 1
643
+ ;;
644
+ esac
645
+
646
+ echo ""
647
+ echo "Done. Datasets at: $DATASETS_DIR"