@remnic/cli 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/assets/download-datasets.sh +662 -0
- package/bin/engram.cjs +29 -11
- package/bin/remnic.cjs +29 -10
- package/dist/assets/download-datasets.sh +17 -2
- package/dist/index.js +2751 -602
- package/package.json +11 -7
package/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# @remnic/cli
|
|
2
2
|
|
|
3
|
-
CLI for Remnic memory -- init, query, daemon management, connectors, curation, and more.
|
|
3
|
+
CLI for Remnic memory and context -- init, query, daemon management, connectors, curation, and more.
|
|
4
4
|
|
|
5
|
-
Part of [Remnic](https://github.com/joshuaswarren/remnic),
|
|
5
|
+
Part of [Remnic](https://github.com/joshuaswarren/remnic), open-source memory and context for user-aware agents.
|
|
6
6
|
|
|
7
7
|
## Install
|
|
8
8
|
|
|
@@ -35,6 +35,7 @@ remnic query "hello" --explain # Test query with tier breakdown
|
|
|
35
35
|
| `remnic curate` | Interactive memory curation |
|
|
36
36
|
| `remnic dedup` | Find and merge duplicate memories |
|
|
37
37
|
| `remnic sync` | Diff-aware sync with external sources |
|
|
38
|
+
| `remnic offline prepare/sync/status/watch` | Use a local memory cache and sync with a remote Remnic daemon |
|
|
38
39
|
| `remnic spaces` | Manage memory namespaces |
|
|
39
40
|
| `remnic bench list` | List published benchmark packs |
|
|
40
41
|
| `remnic bench datasets status/download` | Check or download local benchmark datasets |
|
|
@@ -48,6 +49,11 @@ remnic query "hello" --explain # Test query with tier breakdown
|
|
|
48
49
|
|
|
49
50
|
Run `remnic --help` for the full command list.
|
|
50
51
|
|
|
52
|
+
Offline mode is intended for laptops that need Remnic on flights, cruises, or
|
|
53
|
+
other disconnected stretches. Point agents at the laptop daemon, then run
|
|
54
|
+
`remnic offline watch` to sync with the home daemon whenever it is reachable.
|
|
55
|
+
See [Offline Mode](../../docs/guides/offline-mode.md).
|
|
56
|
+
|
|
51
57
|
## Benchmarks
|
|
52
58
|
|
|
53
59
|
The phase-1 benchmark surface is exposed through `remnic bench`, with `remnic benchmark`
|
|
@@ -0,0 +1,662 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
5
|
+
# Honor an explicit DATASETS_DIR from the environment so packaged CLI
|
|
6
|
+
# installs can route downloads to a user-writable location (e.g.
|
|
7
|
+
# ~/.remnic/bench/datasets) instead of a sibling of the script dir.
|
|
8
|
+
DATASETS_DIR="${DATASETS_DIR:-$(cd "$SCRIPT_DIR/.." && pwd)/datasets}"
|
|
9
|
+
|
|
10
|
+
usage() {
|
|
11
|
+
echo "Usage: $0 [--benchmark <name>]"
|
|
12
|
+
echo ""
|
|
13
|
+
echo "Downloads benchmark datasets for the Remnic bench suite."
|
|
14
|
+
echo ""
|
|
15
|
+
echo "Benchmarks: ama-bench, longmemeval, amemgym, locomo, memory-arena, beam, personamem, membench, memoryagentbench, all"
|
|
16
|
+
echo ""
|
|
17
|
+
echo "Options:"
|
|
18
|
+
echo " --benchmark <name> Download only the specified benchmark (default: all)"
|
|
19
|
+
echo " --help Show this help"
|
|
20
|
+
exit 0
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
BENCHMARK="all"
|
|
24
|
+
while [[ $# -gt 0 ]]; do
|
|
25
|
+
case $1 in
|
|
26
|
+
--benchmark) BENCHMARK="$2"; shift 2 ;;
|
|
27
|
+
--help) usage ;;
|
|
28
|
+
*) echo "Unknown option: $1"; usage ;;
|
|
29
|
+
esac
|
|
30
|
+
done
|
|
31
|
+
|
|
32
|
+
check_deps() {
|
|
33
|
+
for cmd in git curl; do
|
|
34
|
+
if ! command -v "$cmd" &>/dev/null; then
|
|
35
|
+
echo "ERROR: $cmd is required but not found"
|
|
36
|
+
exit 1
|
|
37
|
+
fi
|
|
38
|
+
done
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
PYTHON_BIN=""
|
|
42
|
+
|
|
43
|
+
python_has_modules() {
|
|
44
|
+
local python_bin="$1"
|
|
45
|
+
shift
|
|
46
|
+
|
|
47
|
+
"$python_bin" - "$@" <<'PY'
|
|
48
|
+
import sys
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
import importlib.util as importlib_util
|
|
52
|
+
except Exception: # pragma: no cover - Python 2 fallback
|
|
53
|
+
importlib_util = None
|
|
54
|
+
import pkgutil
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def has_module(name):
|
|
58
|
+
if importlib_util is not None:
|
|
59
|
+
return importlib_util.find_spec(name) is not None
|
|
60
|
+
return pkgutil.find_loader(name) is not None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
missing = [name for name in sys.argv[1:] if not has_module(name)]
|
|
64
|
+
if missing:
|
|
65
|
+
names = ", ".join(missing)
|
|
66
|
+
sys.stderr.write(
|
|
67
|
+
"ERROR: missing required Python module(s): {}. Install them before downloading this dataset.\n".format(
|
|
68
|
+
names
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
sys.exit(1)
|
|
72
|
+
PY
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
resolve_python_bin() {
|
|
76
|
+
if [[ -n "$PYTHON_BIN" ]]; then
|
|
77
|
+
if [[ $# -eq 0 ]] || python_has_modules "$PYTHON_BIN" "$@" >/dev/null 2>&1; then
|
|
78
|
+
printf '%s\n' "$PYTHON_BIN"
|
|
79
|
+
return 0
|
|
80
|
+
fi
|
|
81
|
+
fi
|
|
82
|
+
|
|
83
|
+
local candidate
|
|
84
|
+
local found_any=0
|
|
85
|
+
for candidate in python3 python; do
|
|
86
|
+
if ! command -v "$candidate" &>/dev/null; then
|
|
87
|
+
continue
|
|
88
|
+
fi
|
|
89
|
+
found_any=1
|
|
90
|
+
if [[ $# -gt 0 ]] && ! python_has_modules "$candidate" "$@" >/dev/null 2>&1; then
|
|
91
|
+
continue
|
|
92
|
+
fi
|
|
93
|
+
PYTHON_BIN="$candidate"
|
|
94
|
+
printf '%s\n' "$PYTHON_BIN"
|
|
95
|
+
return 0
|
|
96
|
+
done
|
|
97
|
+
|
|
98
|
+
if [[ $found_any -eq 1 && $# -gt 0 ]]; then
|
|
99
|
+
local names
|
|
100
|
+
names=$(printf '%s, ' "$@")
|
|
101
|
+
names=${names%, }
|
|
102
|
+
echo "ERROR: missing required Python module(s): $names. Install them before downloading this dataset."
|
|
103
|
+
exit 1
|
|
104
|
+
fi
|
|
105
|
+
|
|
106
|
+
echo "ERROR: python or python3 is required but not found"
|
|
107
|
+
exit 1
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
require_python_modules() {
|
|
111
|
+
resolve_python_bin "$@" >/dev/null
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
download_ama_bench() {
|
|
115
|
+
local dir="$DATASETS_DIR/ama-bench"
|
|
116
|
+
if [[ -f "$dir/open_end_qa_set.jsonl" ]]; then
|
|
117
|
+
echo "[ama-bench] Already downloaded at $dir"
|
|
118
|
+
return
|
|
119
|
+
fi
|
|
120
|
+
echo "[ama-bench] Downloading from HuggingFace (AMA-bench/AMA-bench)..."
|
|
121
|
+
mkdir -p "$dir"
|
|
122
|
+
local tmpdir
|
|
123
|
+
tmpdir=$(mktemp -d)
|
|
124
|
+
git clone --depth 1 https://huggingface.co/datasets/AMA-bench/AMA-bench "$tmpdir/repo" 2>/dev/null || {
|
|
125
|
+
echo "[ama-bench] ERROR: Could not clone. Try manually:"
|
|
126
|
+
echo " git clone --depth 1 https://huggingface.co/datasets/AMA-bench/AMA-bench /tmp/amabench"
|
|
127
|
+
echo " cp /tmp/amabench/test/open_end_qa_set.jsonl $dir/"
|
|
128
|
+
rm -rf "$tmpdir"
|
|
129
|
+
return 1
|
|
130
|
+
}
|
|
131
|
+
cp "$tmpdir/repo/test/open_end_qa_set.jsonl" "$dir/" 2>/dev/null || true
|
|
132
|
+
rm -rf "$tmpdir"
|
|
133
|
+
echo "[ama-bench] Downloaded to $dir ($(wc -l < "$dir/open_end_qa_set.jsonl") episodes)"
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
download_longmemeval() {
|
|
137
|
+
local dir="$DATASETS_DIR/longmemeval"
|
|
138
|
+
if [[ -f "$dir/longmemeval_oracle.json" ]]; then
|
|
139
|
+
echo "[longmemeval] Already downloaded at $dir"
|
|
140
|
+
return
|
|
141
|
+
fi
|
|
142
|
+
echo "[longmemeval] Downloading from HuggingFace (xiaowu0162/longmemeval-cleaned)..."
|
|
143
|
+
mkdir -p "$dir"
|
|
144
|
+
curl -sL "https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_oracle.json" \
|
|
145
|
+
-o "$dir/longmemeval_oracle.json"
|
|
146
|
+
if [[ ! -s "$dir/longmemeval_oracle.json" ]]; then
|
|
147
|
+
echo "[longmemeval] ERROR: Download failed. Try manually:"
|
|
148
|
+
echo " curl -sL https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_oracle.json -o $dir/longmemeval_oracle.json"
|
|
149
|
+
rm -f "$dir/longmemeval_oracle.json"
|
|
150
|
+
return 1
|
|
151
|
+
fi
|
|
152
|
+
echo "[longmemeval] Downloaded to $dir ($(du -h "$dir/longmemeval_oracle.json" | cut -f1))"
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
download_amemgym() {
|
|
156
|
+
local dir="$DATASETS_DIR/amemgym"
|
|
157
|
+
if [[ -f "$dir/amemgym-v1-base.json" ]]; then
|
|
158
|
+
echo "[amemgym] Already downloaded at $dir"
|
|
159
|
+
return
|
|
160
|
+
fi
|
|
161
|
+
echo "[amemgym] Downloading from HuggingFace (AGI-Eval/AMemGym)..."
|
|
162
|
+
mkdir -p "$dir"
|
|
163
|
+
local tmpdir
|
|
164
|
+
tmpdir=$(mktemp -d)
|
|
165
|
+
git clone --depth 1 https://huggingface.co/datasets/AGI-Eval/AMemGym "$tmpdir/repo" 2>/dev/null || {
|
|
166
|
+
echo "[amemgym] ERROR: Could not clone. Try manually:"
|
|
167
|
+
echo " git clone --depth 1 https://huggingface.co/datasets/AGI-Eval/AMemGym /tmp/amemgym"
|
|
168
|
+
echo " cp /tmp/amemgym/v1.base/data.json $dir/amemgym-v1-base.json"
|
|
169
|
+
rm -rf "$tmpdir"
|
|
170
|
+
return 1
|
|
171
|
+
}
|
|
172
|
+
cp "$tmpdir/repo/v1.base/data.json" "$dir/amemgym-v1-base.json" 2>/dev/null || true
|
|
173
|
+
rm -rf "$tmpdir"
|
|
174
|
+
echo "[amemgym] Downloaded to $dir"
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
download_locomo() {
|
|
178
|
+
local dir="$DATASETS_DIR/locomo"
|
|
179
|
+
if [[ -f "$dir/locomo10.json" ]]; then
|
|
180
|
+
echo "[locomo] Already downloaded at $dir"
|
|
181
|
+
return
|
|
182
|
+
fi
|
|
183
|
+
echo "[locomo] Downloading from GitHub (snap-research/locomo)..."
|
|
184
|
+
mkdir -p "$dir"
|
|
185
|
+
local tmpdir
|
|
186
|
+
tmpdir=$(mktemp -d)
|
|
187
|
+
git clone --depth 1 https://github.com/snap-research/locomo.git "$tmpdir/repo" 2>/dev/null || {
|
|
188
|
+
echo "[locomo] ERROR: Could not clone. Try manually:"
|
|
189
|
+
echo " git clone --depth 1 https://github.com/snap-research/locomo.git /tmp/locomo"
|
|
190
|
+
echo " cp /tmp/locomo/data/locomo10.json $dir/"
|
|
191
|
+
rm -rf "$tmpdir"
|
|
192
|
+
return 1
|
|
193
|
+
}
|
|
194
|
+
cp "$tmpdir/repo/data/locomo10.json" "$dir/" 2>/dev/null || true
|
|
195
|
+
rm -rf "$tmpdir"
|
|
196
|
+
echo "[locomo] Downloaded to $dir ($(du -h "$dir/locomo10.json" | cut -f1))"
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
download_memory_arena() {
|
|
200
|
+
local dir="$DATASETS_DIR/memory-arena"
|
|
201
|
+
if [[ -d "$dir" ]] && ls "$dir"/*.jsonl &>/dev/null; then
|
|
202
|
+
echo "[memory-arena] Already downloaded at $dir"
|
|
203
|
+
return
|
|
204
|
+
fi
|
|
205
|
+
echo "[memory-arena] Downloading from HuggingFace (ZexueHe/memoryarena)..."
|
|
206
|
+
mkdir -p "$dir"
|
|
207
|
+
local tmpdir
|
|
208
|
+
tmpdir=$(mktemp -d)
|
|
209
|
+
git clone --depth 1 https://huggingface.co/datasets/ZexueHe/memoryarena "$tmpdir/repo" 2>/dev/null || {
|
|
210
|
+
echo "[memory-arena] ERROR: Could not clone. Try manually:"
|
|
211
|
+
echo " git clone --depth 1 https://huggingface.co/datasets/ZexueHe/memoryarena /tmp/memoryarena"
|
|
212
|
+
echo " for d in /tmp/memoryarena/*/; do cp \"\$d/data.jsonl\" \"$dir/\$(basename \$d).jsonl\"; done"
|
|
213
|
+
rm -rf "$tmpdir"
|
|
214
|
+
return 1
|
|
215
|
+
}
|
|
216
|
+
for d in "$tmpdir/repo"/*/; do
|
|
217
|
+
local name
|
|
218
|
+
name=$(basename "$d")
|
|
219
|
+
if [[ -f "$d/data.jsonl" ]]; then
|
|
220
|
+
cp "$d/data.jsonl" "$dir/${name}.jsonl"
|
|
221
|
+
fi
|
|
222
|
+
done
|
|
223
|
+
rm -rf "$tmpdir"
|
|
224
|
+
local count
|
|
225
|
+
count=$(ls "$dir"/*.jsonl 2>/dev/null | wc -l | tr -d ' ')
|
|
226
|
+
echo "[memory-arena] Downloaded to $dir ($count domains)"
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
download_beam() {
|
|
230
|
+
local dir="$DATASETS_DIR/beam"
|
|
231
|
+
if [[ -f "$dir/beam_100k.json" && -f "$dir/beam_500k.json" && -f "$dir/beam_1m.json" && -f "$dir/beam_10m.json" ]]; then
|
|
232
|
+
echo "[beam] Already downloaded at $dir"
|
|
233
|
+
return
|
|
234
|
+
fi
|
|
235
|
+
echo "[beam] Downloading from Hugging Face parquet sources (Mohammadta/BEAM, Mohammadta/BEAM-10M)..."
|
|
236
|
+
mkdir -p "$dir"
|
|
237
|
+
require_python_modules huggingface_hub pyarrow
|
|
238
|
+
local python_bin
|
|
239
|
+
python_bin="$(resolve_python_bin)"
|
|
240
|
+
"$python_bin" - "$dir" <<'PY'
|
|
241
|
+
from __future__ import annotations
|
|
242
|
+
|
|
243
|
+
import json
|
|
244
|
+
import sys
|
|
245
|
+
from pathlib import Path
|
|
246
|
+
|
|
247
|
+
import pyarrow.parquet as pq
|
|
248
|
+
from huggingface_hub import hf_hub_download
|
|
249
|
+
|
|
250
|
+
out_dir = Path(sys.argv[1])
|
|
251
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
252
|
+
|
|
253
|
+
targets = [
|
|
254
|
+
("Mohammadta/BEAM", ["data/100K-00000-of-00001.parquet"], "beam_100k.json"),
|
|
255
|
+
("Mohammadta/BEAM", ["data/500K-00000-of-00001.parquet"], "beam_500k.json"),
|
|
256
|
+
("Mohammadta/BEAM", ["data/1M-00000-of-00001.parquet"], "beam_1m.json"),
|
|
257
|
+
(
|
|
258
|
+
"Mohammadta/BEAM-10M",
|
|
259
|
+
["data/10M-00000-of-00002.parquet", "data/10M-00001-of-00002.parquet"],
|
|
260
|
+
"beam_10m.json",
|
|
261
|
+
),
|
|
262
|
+
]
|
|
263
|
+
|
|
264
|
+
for repo_id, parquet_files, output_name in targets:
|
|
265
|
+
output_path = out_dir / output_name
|
|
266
|
+
if output_path.exists() and output_path.stat().st_size > 0:
|
|
267
|
+
print(f"[beam] Reusing {output_name}")
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
rows: list[dict] = []
|
|
271
|
+
for parquet_file in parquet_files:
|
|
272
|
+
parquet_path = hf_hub_download(
|
|
273
|
+
repo_id=repo_id,
|
|
274
|
+
repo_type="dataset",
|
|
275
|
+
filename=parquet_file,
|
|
276
|
+
)
|
|
277
|
+
rows.extend(pq.read_table(parquet_path).to_pylist())
|
|
278
|
+
|
|
279
|
+
with output_path.open("w", encoding="utf-8") as handle:
|
|
280
|
+
json.dump(rows, handle, ensure_ascii=False)
|
|
281
|
+
print(f"[beam] Wrote {output_name} ({len(rows)} conversations)")
|
|
282
|
+
PY
|
|
283
|
+
echo "[beam] Downloaded to $dir"
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
download_personamem() {
|
|
287
|
+
local dir="$DATASETS_DIR/personamem"
|
|
288
|
+
if [[ -f "$dir/benchmark/text/benchmark.csv" ]] \
|
|
289
|
+
&& [[ -f "$dir/data/chat_history_32k/.download-complete" ]]; then
|
|
290
|
+
echo "[personamem] Already downloaded at $dir"
|
|
291
|
+
return
|
|
292
|
+
fi
|
|
293
|
+
echo "[personamem] Downloading from Hugging Face (bowen-upenn/PersonaMem-v2)..."
|
|
294
|
+
mkdir -p "$dir"
|
|
295
|
+
require_python_modules huggingface_hub
|
|
296
|
+
local python_bin
|
|
297
|
+
python_bin="$(resolve_python_bin)"
|
|
298
|
+
"$python_bin" - "$dir" <<'PY'
|
|
299
|
+
from __future__ import annotations
|
|
300
|
+
|
|
301
|
+
import csv
|
|
302
|
+
import os
|
|
303
|
+
import shutil
|
|
304
|
+
import sys
|
|
305
|
+
import time
|
|
306
|
+
from pathlib import Path, PurePosixPath
|
|
307
|
+
|
|
308
|
+
from huggingface_hub import hf_hub_download
|
|
309
|
+
|
|
310
|
+
REPO_ID = "bowen-upenn/PersonaMem-v2"
|
|
311
|
+
BENCHMARK_PATH = "benchmark/text/benchmark.csv"
|
|
312
|
+
|
|
313
|
+
out_dir = Path(sys.argv[1])
|
|
314
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
315
|
+
out_dir_root = out_dir.resolve()
|
|
316
|
+
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def resolve_dataset_destination(relative_path: str) -> tuple[str, Path]:
|
|
320
|
+
normalized = relative_path.strip().replace("\\", "/")
|
|
321
|
+
if not normalized:
|
|
322
|
+
raise ValueError("dataset path cannot be empty")
|
|
323
|
+
|
|
324
|
+
posix_path = PurePosixPath(normalized)
|
|
325
|
+
if posix_path.is_absolute():
|
|
326
|
+
raise ValueError(
|
|
327
|
+
f'PersonaMem dataset file reference "{relative_path}" must stay within dataset root.'
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
safe_parts = []
|
|
331
|
+
for part in posix_path.parts:
|
|
332
|
+
if part in ("", "."):
|
|
333
|
+
continue
|
|
334
|
+
if part == "..":
|
|
335
|
+
raise ValueError(
|
|
336
|
+
f'PersonaMem dataset file reference "{relative_path}" must stay within dataset root.'
|
|
337
|
+
)
|
|
338
|
+
safe_parts.append(part)
|
|
339
|
+
|
|
340
|
+
if not safe_parts:
|
|
341
|
+
raise ValueError("dataset path cannot resolve to the dataset root")
|
|
342
|
+
|
|
343
|
+
destination = (out_dir / Path(*safe_parts)).resolve()
|
|
344
|
+
try:
|
|
345
|
+
destination.relative_to(out_dir_root)
|
|
346
|
+
except ValueError as exc:
|
|
347
|
+
raise ValueError(
|
|
348
|
+
f'PersonaMem dataset file reference "{relative_path}" must stay within dataset root.'
|
|
349
|
+
) from exc
|
|
350
|
+
|
|
351
|
+
return PurePosixPath(*safe_parts).as_posix(), destination
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def copy_dataset_file(relative_path: str) -> Path:
|
|
355
|
+
safe_relative_path, destination = resolve_dataset_destination(relative_path)
|
|
356
|
+
source = Path(
|
|
357
|
+
hf_hub_download(
|
|
358
|
+
repo_id=REPO_ID,
|
|
359
|
+
repo_type="dataset",
|
|
360
|
+
filename=safe_relative_path,
|
|
361
|
+
token=token,
|
|
362
|
+
)
|
|
363
|
+
)
|
|
364
|
+
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
365
|
+
shutil.copy2(source, destination)
|
|
366
|
+
return destination
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
benchmark_destination = copy_dataset_file(BENCHMARK_PATH)
|
|
370
|
+
|
|
371
|
+
with benchmark_destination.open("r", encoding="utf8", newline="") as handle:
|
|
372
|
+
reader = csv.DictReader(handle)
|
|
373
|
+
history_paths = sorted(
|
|
374
|
+
{
|
|
375
|
+
(row.get("chat_history_32k_link") or "").strip()
|
|
376
|
+
for row in reader
|
|
377
|
+
if (row.get("chat_history_32k_link") or "").strip()
|
|
378
|
+
}
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
if not history_paths:
|
|
382
|
+
raise SystemExit("PersonaMem benchmark.csv did not contain any chat_history_32k_link values")
|
|
383
|
+
|
|
384
|
+
completed = 0
|
|
385
|
+
for index, relative_path in enumerate(history_paths, start=1):
|
|
386
|
+
_, destination = resolve_dataset_destination(relative_path)
|
|
387
|
+
if destination.is_file():
|
|
388
|
+
completed += 1
|
|
389
|
+
continue
|
|
390
|
+
|
|
391
|
+
for attempt in range(1, 6):
|
|
392
|
+
try:
|
|
393
|
+
copy_dataset_file(relative_path)
|
|
394
|
+
completed += 1
|
|
395
|
+
break
|
|
396
|
+
except Exception as exc: # noqa: BLE001
|
|
397
|
+
if attempt == 5:
|
|
398
|
+
raise SystemExit(
|
|
399
|
+
f"failed to download PersonaMem asset {relative_path}: {exc}"
|
|
400
|
+
) from exc
|
|
401
|
+
delay_seconds = min(30, 2 ** attempt)
|
|
402
|
+
print(
|
|
403
|
+
f"[personamem] Retry {attempt}/5 for {relative_path} after error: {exc}. "
|
|
404
|
+
f"Sleeping {delay_seconds}s..."
|
|
405
|
+
)
|
|
406
|
+
time.sleep(delay_seconds)
|
|
407
|
+
|
|
408
|
+
if index % 100 == 0 or index == len(history_paths):
|
|
409
|
+
print(f"[personamem] Downloaded {completed}/{len(history_paths)} chat histories")
|
|
410
|
+
|
|
411
|
+
print(
|
|
412
|
+
f"[personamem] Mirrored benchmark.csv and {completed} chat histories into {out_dir}"
|
|
413
|
+
)
|
|
414
|
+
PY
|
|
415
|
+
touch "$dir/data/chat_history_32k/.download-complete"
|
|
416
|
+
echo "[personamem] Downloaded to $dir"
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
download_membench() {
|
|
420
|
+
local dir="$DATASETS_DIR/membench"
|
|
421
|
+
if [[ -f "$dir/membench.json" ]]; then
|
|
422
|
+
echo "[membench] Already downloaded at $dir"
|
|
423
|
+
return
|
|
424
|
+
fi
|
|
425
|
+
echo "[membench] Downloading and normalizing from GitHub (import-myself/Membench)..."
|
|
426
|
+
mkdir -p "$dir"
|
|
427
|
+
local tmpdir
|
|
428
|
+
tmpdir=$(mktemp -d)
|
|
429
|
+
git clone --depth 1 https://github.com/import-myself/Membench.git "$tmpdir/repo" 2>/dev/null || {
|
|
430
|
+
echo "[membench] ERROR: Could not clone. Try manually:"
|
|
431
|
+
echo " git clone --depth 1 https://github.com/import-myself/Membench.git /tmp/membench"
|
|
432
|
+
rm -rf "$tmpdir"
|
|
433
|
+
return 1
|
|
434
|
+
}
|
|
435
|
+
local python_bin
|
|
436
|
+
python_bin="$(resolve_python_bin)"
|
|
437
|
+
"$python_bin" - "$tmpdir/repo" "$dir/membench.json" <<'PY'
|
|
438
|
+
from __future__ import annotations
|
|
439
|
+
|
|
440
|
+
import json
|
|
441
|
+
import re
|
|
442
|
+
import sys
|
|
443
|
+
from pathlib import Path
|
|
444
|
+
|
|
445
|
+
repo_root = Path(sys.argv[1])
|
|
446
|
+
output_path = Path(sys.argv[2])
|
|
447
|
+
|
|
448
|
+
def normalize_text(value):
|
|
449
|
+
if isinstance(value, str):
|
|
450
|
+
return value.strip()
|
|
451
|
+
if isinstance(value, list):
|
|
452
|
+
for item in value:
|
|
453
|
+
text = normalize_text(item)
|
|
454
|
+
if text:
|
|
455
|
+
return text
|
|
456
|
+
return ""
|
|
457
|
+
if value is None:
|
|
458
|
+
return ""
|
|
459
|
+
return str(value).strip()
|
|
460
|
+
|
|
461
|
+
def sanitize_case_id(value: str) -> str:
|
|
462
|
+
return re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
|
|
463
|
+
|
|
464
|
+
def iter_qa_entries(value):
|
|
465
|
+
if isinstance(value, dict):
|
|
466
|
+
return [value]
|
|
467
|
+
if isinstance(value, list):
|
|
468
|
+
return [entry for entry in value if isinstance(entry, dict)]
|
|
469
|
+
return []
|
|
470
|
+
|
|
471
|
+
def build_turns(message_list):
|
|
472
|
+
turns = []
|
|
473
|
+
if not isinstance(message_list, list):
|
|
474
|
+
return turns
|
|
475
|
+
for session in message_list:
|
|
476
|
+
if isinstance(session, dict):
|
|
477
|
+
session = [session]
|
|
478
|
+
if not isinstance(session, list):
|
|
479
|
+
continue
|
|
480
|
+
for step in session:
|
|
481
|
+
if not isinstance(step, dict):
|
|
482
|
+
continue
|
|
483
|
+
user = normalize_text(step.get("user"))
|
|
484
|
+
assistant = normalize_text(step.get("assistant"))
|
|
485
|
+
if user:
|
|
486
|
+
turns.append({"role": "user", "content": user})
|
|
487
|
+
if assistant:
|
|
488
|
+
turns.append({"role": "assistant", "content": assistant})
|
|
489
|
+
return turns
|
|
490
|
+
|
|
491
|
+
cases = []
|
|
492
|
+
source_roots = [
|
|
493
|
+
("FirstAgent", "participant"),
|
|
494
|
+
("ThirdAgent", "observation"),
|
|
495
|
+
]
|
|
496
|
+
|
|
497
|
+
for source_root, scenario in source_roots:
|
|
498
|
+
for dataset_path in sorted((repo_root / "MemData" / source_root).glob("*.json")):
|
|
499
|
+
label = dataset_path.stem.lower()
|
|
500
|
+
memory_type = "reflective" if "highlevel" in label else "factual"
|
|
501
|
+
level = "high_level" if memory_type == "reflective" else "low_level"
|
|
502
|
+
document = json.loads(dataset_path.read_text(encoding="utf-8"))
|
|
503
|
+
|
|
504
|
+
if not isinstance(document, dict):
|
|
505
|
+
continue
|
|
506
|
+
|
|
507
|
+
for group_name, entries in document.items():
|
|
508
|
+
if not isinstance(entries, list):
|
|
509
|
+
continue
|
|
510
|
+
for entry_index, entry in enumerate(entries):
|
|
511
|
+
if not isinstance(entry, dict):
|
|
512
|
+
continue
|
|
513
|
+
|
|
514
|
+
turns = build_turns(entry.get("message_list") or entry.get("messages"))
|
|
515
|
+
if not turns:
|
|
516
|
+
continue
|
|
517
|
+
|
|
518
|
+
qa_entries = iter_qa_entries(
|
|
519
|
+
entry.get("QA")
|
|
520
|
+
or entry.get("qa")
|
|
521
|
+
or entry.get("qas")
|
|
522
|
+
or entry.get("question_answers")
|
|
523
|
+
)
|
|
524
|
+
for qa_index, qa in enumerate(qa_entries):
|
|
525
|
+
question = normalize_text(qa.get("question") or qa.get("query"))
|
|
526
|
+
answer = normalize_text(qa.get("answer"))
|
|
527
|
+
if not question or not answer:
|
|
528
|
+
continue
|
|
529
|
+
|
|
530
|
+
qid = normalize_text(
|
|
531
|
+
qa.get("qid") or qa.get("id") or qa.get("question_id") or qa_index
|
|
532
|
+
)
|
|
533
|
+
raw_id = (
|
|
534
|
+
f"{source_root}-{dataset_path.stem}-{group_name}-"
|
|
535
|
+
f"{entry_index}-{qid}"
|
|
536
|
+
)
|
|
537
|
+
case_id = sanitize_case_id(raw_id)
|
|
538
|
+
cases.append(
|
|
539
|
+
{
|
|
540
|
+
"id": case_id,
|
|
541
|
+
"memoryType": memory_type,
|
|
542
|
+
"scenario": scenario,
|
|
543
|
+
"level": level,
|
|
544
|
+
"turns": turns,
|
|
545
|
+
"question": question,
|
|
546
|
+
"answer": answer,
|
|
547
|
+
}
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
if not cases:
|
|
551
|
+
raise SystemExit("MemBench normalization produced no runnable cases.")
|
|
552
|
+
|
|
553
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
554
|
+
with output_path.open("w", encoding="utf-8") as handle:
|
|
555
|
+
json.dump(cases, handle, ensure_ascii=False)
|
|
556
|
+
|
|
557
|
+
print(f"[membench] Wrote {output_path.name} ({len(cases)} cases)")
|
|
558
|
+
PY
|
|
559
|
+
rm -rf "$tmpdir"
|
|
560
|
+
echo "[membench] Downloaded to $dir"
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
download_memoryagentbench() {
|
|
564
|
+
local dir="$DATASETS_DIR/memoryagentbench"
|
|
565
|
+
if [[ -f "$dir/Accurate_Retrieval.json" && -f "$dir/Test_Time_Learning.json" && -f "$dir/Long_Range_Understanding.json" && -f "$dir/Conflict_Resolution.json" && -f "$dir/entity2id.json" ]]; then
|
|
566
|
+
echo "[memoryagentbench] Already downloaded at $dir"
|
|
567
|
+
return
|
|
568
|
+
fi
|
|
569
|
+
echo "[memoryagentbench] Downloading from Hugging Face sources (ai-hyz/MemoryAgentBench)..."
|
|
570
|
+
mkdir -p "$dir"
|
|
571
|
+
require_python_modules huggingface_hub pyarrow
|
|
572
|
+
local python_bin
|
|
573
|
+
python_bin="$(resolve_python_bin)"
|
|
574
|
+
"$python_bin" - "$dir" <<'PY'
|
|
575
|
+
from __future__ import annotations
|
|
576
|
+
|
|
577
|
+
import json
|
|
578
|
+
import sys
|
|
579
|
+
from pathlib import Path
|
|
580
|
+
|
|
581
|
+
import pyarrow.parquet as pq
|
|
582
|
+
from huggingface_hub import hf_hub_download
|
|
583
|
+
|
|
584
|
+
out_dir = Path(sys.argv[1])
|
|
585
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
586
|
+
|
|
587
|
+
targets = [
|
|
588
|
+
("data/Accurate_Retrieval-00000-of-00001.parquet", "Accurate_Retrieval.json"),
|
|
589
|
+
("data/Test_Time_Learning-00000-of-00001.parquet", "Test_Time_Learning.json"),
|
|
590
|
+
("data/Long_Range_Understanding-00000-of-00001.parquet", "Long_Range_Understanding.json"),
|
|
591
|
+
("data/Conflict_Resolution-00000-of-00001.parquet", "Conflict_Resolution.json"),
|
|
592
|
+
]
|
|
593
|
+
|
|
594
|
+
for parquet_file, output_name in targets:
|
|
595
|
+
output_path = out_dir / output_name
|
|
596
|
+
if output_path.exists() and output_path.stat().st_size > 0:
|
|
597
|
+
print(f"[memoryagentbench] Reusing {output_name}")
|
|
598
|
+
continue
|
|
599
|
+
|
|
600
|
+
parquet_path = hf_hub_download(
|
|
601
|
+
repo_id="ai-hyz/MemoryAgentBench",
|
|
602
|
+
repo_type="dataset",
|
|
603
|
+
filename=parquet_file,
|
|
604
|
+
)
|
|
605
|
+
rows = pq.read_table(parquet_path).to_pylist()
|
|
606
|
+
with output_path.open("w", encoding="utf-8") as handle:
|
|
607
|
+
json.dump(rows, handle, ensure_ascii=False)
|
|
608
|
+
print(f"[memoryagentbench] Wrote {output_name} ({len(rows)} samples)")
|
|
609
|
+
|
|
610
|
+
entity_output_path = out_dir / "entity2id.json"
|
|
611
|
+
if entity_output_path.exists() and entity_output_path.stat().st_size > 0:
|
|
612
|
+
print("[memoryagentbench] Reusing entity2id.json")
|
|
613
|
+
else:
|
|
614
|
+
entity_path = hf_hub_download(
|
|
615
|
+
repo_id="ai-hyz/MemoryAgentBench",
|
|
616
|
+
repo_type="dataset",
|
|
617
|
+
filename="entity2id.json",
|
|
618
|
+
)
|
|
619
|
+
with open(entity_path, "r", encoding="utf-8") as source:
|
|
620
|
+
entity_mapping = json.load(source)
|
|
621
|
+
with entity_output_path.open("w", encoding="utf-8") as handle:
|
|
622
|
+
json.dump(entity_mapping, handle, ensure_ascii=False)
|
|
623
|
+
print(f"[memoryagentbench] Wrote entity2id.json ({len(entity_mapping)} mappings)")
|
|
624
|
+
PY
|
|
625
|
+
echo "[memoryagentbench] Downloaded to $dir"
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
# ── Main ──
|
|
629
|
+
|
|
630
|
+
check_deps
|
|
631
|
+
mkdir -p "$DATASETS_DIR"
|
|
632
|
+
|
|
633
|
+
case "$BENCHMARK" in
|
|
634
|
+
ama-bench) download_ama_bench ;;
|
|
635
|
+
longmemeval) download_longmemeval ;;
|
|
636
|
+
amemgym) download_amemgym ;;
|
|
637
|
+
locomo) download_locomo ;;
|
|
638
|
+
memory-arena) download_memory_arena ;;
|
|
639
|
+
beam) download_beam ;;
|
|
640
|
+
personamem) download_personamem ;;
|
|
641
|
+
membench) download_membench ;;
|
|
642
|
+
memoryagentbench) download_memoryagentbench ;;
|
|
643
|
+
all)
|
|
644
|
+
download_ama_bench
|
|
645
|
+
download_longmemeval
|
|
646
|
+
download_amemgym
|
|
647
|
+
download_locomo
|
|
648
|
+
download_memory_arena
|
|
649
|
+
download_beam
|
|
650
|
+
download_personamem
|
|
651
|
+
download_membench
|
|
652
|
+
download_memoryagentbench
|
|
653
|
+
;;
|
|
654
|
+
*)
|
|
655
|
+
echo "Unknown benchmark: $BENCHMARK"
|
|
656
|
+
echo "Available: ama-bench, longmemeval, amemgym, locomo, memory-arena, beam, personamem, membench, memoryagentbench, all"
|
|
657
|
+
exit 1
|
|
658
|
+
;;
|
|
659
|
+
esac
|
|
660
|
+
|
|
661
|
+
echo ""
|
|
662
|
+
echo "Done. Datasets at: $DATASETS_DIR"
|