rMatch 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rmatch-0.2.0/.gitignore +9 -0
- rmatch-0.2.0/.pre-commit-config.yaml +21 -0
- rmatch-0.2.0/PKG-INFO +291 -0
- rmatch-0.2.0/README.md +254 -0
- rmatch-0.2.0/imports/import_cyoa.py +80 -0
- rmatch-0.2.0/imports/import_filmfest.py +150 -0
- rmatch-0.2.0/imports/import_memsearch.py +313 -0
- rmatch-0.2.0/imports/import_nfrd.py +286 -0
- rmatch-0.2.0/pyproject.toml +52 -0
- rmatch-0.2.0/scripts/2026-ccn/ccn_dataset_stats.py +128 -0
- rmatch-0.2.0/scripts/2026-ccn/ccn_example_matrices.py +247 -0
- rmatch-0.2.0/scripts/2026-ccn/ccn_plots.py +555 -0
- rmatch-0.2.0/scripts/2026-ccn/ccn_plots_pearsonr.py +612 -0
- rmatch-0.2.0/scripts/create_benchmark.py +302 -0
- rmatch-0.2.0/src/rmatch/__init__.py +53 -0
- rmatch-0.2.0/src/rmatch/evaluate.py +736 -0
- rmatch-0.2.0/src/rmatch/load.py +163 -0
- rmatch-0.2.0/src/rmatch/match.py +381 -0
- rmatch-0.2.0/src/rmatch/matchers/__init__.py +13 -0
- rmatch-0.2.0/src/rmatch/matchers/matcher.py +71 -0
- rmatch-0.2.0/src/rmatch/matchers/matcher_anthropic.py +163 -0
- rmatch-0.2.0/src/rmatch/matchers/matcher_huggingface.py +176 -0
- rmatch-0.2.0/src/rmatch/matchers/matcher_openai.py +137 -0
- rmatch-0.2.0/src/rmatch/matchers/matcher_reranker.py +76 -0
- rmatch-0.2.0/src/rmatch/prompt.py +195 -0
- rmatch-0.2.0/src/rmatch/utils.py +73 -0
- rmatch-0.2.0/uv.lock +2538 -0
rmatch-0.2.0/.gitignore
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
+
rev: v6.0.0
|
|
4
|
+
hooks:
|
|
5
|
+
# list of supported hooks: https://pre-commit.com/hooks.html
|
|
6
|
+
- id: trailing-whitespace
|
|
7
|
+
- id: end-of-file-fixer
|
|
8
|
+
- id: check-yaml
|
|
9
|
+
- id: check-added-large-files
|
|
10
|
+
- id: debug-statements
|
|
11
|
+
- id: detect-private-key
|
|
12
|
+
|
|
13
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
14
|
+
# Ruff version.
|
|
15
|
+
rev: v0.14.11
|
|
16
|
+
hooks:
|
|
17
|
+
# Run the linter.
|
|
18
|
+
- id: ruff
|
|
19
|
+
args: [--fix]
|
|
20
|
+
# Run the formatter.
|
|
21
|
+
- id: ruff-format
|
rmatch-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rMatch
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Match recall segments with story segments.
|
|
5
|
+
Author-email: Gabriel Kressin Palacios <gkressi1@jhu.edu>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Requires-Python: <3.14,>=3.12
|
|
8
|
+
Requires-Dist: accelerate<2,>=1.10.1
|
|
9
|
+
Requires-Dist: anthropic>=0.84.0
|
|
10
|
+
Requires-Dist: bitsandbytes>=0.45.0
|
|
11
|
+
Requires-Dist: chardet>=5.2.0
|
|
12
|
+
Requires-Dist: codecarbon>=2.8.3
|
|
13
|
+
Requires-Dist: einops>=0.8.1
|
|
14
|
+
Requires-Dist: huggingface>=0.0.1
|
|
15
|
+
Requires-Dist: kaleido>=1.2.0
|
|
16
|
+
Requires-Dist: krippendorff>=0.8.2
|
|
17
|
+
Requires-Dist: matplotlib<4,>=3.10.7
|
|
18
|
+
Requires-Dist: nltk>=3.9.2
|
|
19
|
+
Requires-Dist: numpy<3,>=2.3.3
|
|
20
|
+
Requires-Dist: openai>=2.15.0
|
|
21
|
+
Requires-Dist: openpyxl<4,>=3.1.5
|
|
22
|
+
Requires-Dist: pandas<3,>=2.3.3
|
|
23
|
+
Requires-Dist: plotly>=6.6.0
|
|
24
|
+
Requires-Dist: pre-commit<5,>=4.3.0
|
|
25
|
+
Requires-Dist: python-dotenv<2,>=1.1.1
|
|
26
|
+
Requires-Dist: rich<15,>=14.2.0
|
|
27
|
+
Requires-Dist: ruff<0.15,>=0.14.0
|
|
28
|
+
Requires-Dist: scikit-learn>=1.7.2
|
|
29
|
+
Requires-Dist: sentence-transformers<6,>=5.1.1
|
|
30
|
+
Requires-Dist: spacy<4,>=3.8.7
|
|
31
|
+
Requires-Dist: tiktoken>=0.12.0
|
|
32
|
+
Requires-Dist: torch<2.10,>=2.9
|
|
33
|
+
Requires-Dist: torchvision<0.25,>=0.24
|
|
34
|
+
Requires-Dist: tqdm<5,>=4.67.1
|
|
35
|
+
Requires-Dist: transformers<4.57.0
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
<h1 align="center">rMatch</h1>
|
|
39
|
+
|
|
40
|
+
<p align="center">Automatic recall & story matching tool.</p>
|
|
41
|
+
|
|
42
|
+
<p align="center">
|
|
43
|
+
<a href="https://www.python.org/"><img alt="" src="https://img.shields.io/badge/code-Python-blue?logo=Python"></a>
|
|
44
|
+
<a href="https://docs.astral.sh/ruff/"><img alt="Ruff" src="https://img.shields.io/badge/code%20style-Ruff-green?logo=Ruff"></a>
|
|
45
|
+
<a href="https://docs.astral.sh/uv/"><img alt="packaging framework: uv" src="https://img.shields.io/badge/packaging-uv-lightblue?logo=uv"></a>
|
|
46
|
+
<a href="https://pre-commit.com/"><img alt="pre-commit" src="https://img.shields.io/badge/tool-Pre%20Commit-yellow?logo=Pre-Commit"></a>
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
## Quick start
|
|
50
|
+
|
|
51
|
+
### Command line
|
|
52
|
+
|
|
53
|
+
```sh
|
|
54
|
+
pip install rmatch
|
|
55
|
+
|
|
56
|
+
# single recall file
|
|
57
|
+
rmatch story.txt recall.txt --matcher anthropic
|
|
58
|
+
|
|
59
|
+
# directory of recall files (one per subject)
|
|
60
|
+
rmatch story.txt recalls/ --matcher anthropic
|
|
61
|
+
|
|
62
|
+
# estimate API cost without sending requests
|
|
63
|
+
rmatch story.txt recalls/ --matcher openai --dry-run
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Python API
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from rmatch import Matcher
|
|
70
|
+
|
|
71
|
+
matcher = Matcher(matcher_name="anthropic", api_key="your_api_key")
|
|
72
|
+
matches = matcher.match(
|
|
73
|
+
story_segments=["The cat sat on the mat.", "It purred softly."],
|
|
74
|
+
recall_segments=["A cat was on a mat."],
|
|
75
|
+
)
|
|
76
|
+
# [(0, [0])] — recall segment 0 matched story segment 0
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Or use `run_matching` to load files, run matching, and save results in one call:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from rmatch.match import run_matching
|
|
83
|
+
|
|
84
|
+
results = run_matching(
|
|
85
|
+
story_file="story.txt",
|
|
86
|
+
recall_file="recalls/",
|
|
87
|
+
matcher_name="anthropic",
|
|
88
|
+
api_key="your_api_key",
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Setup API keys
|
|
93
|
+
|
|
94
|
+
API keys are resolved in this order (first match wins):
|
|
95
|
+
|
|
96
|
+
1. **`api_key` argument** passed directly in Python
|
|
97
|
+
2. **`.env` file** in the current working directory
|
|
98
|
+
3. **Environment variables** already set in your shell
|
|
99
|
+
|
|
100
|
+
Set them as environment variables:
|
|
101
|
+
|
|
102
|
+
```sh
|
|
103
|
+
export ANTHROPIC_API_KEY="your_api_key" # for --matcher anthropic (default)
|
|
104
|
+
export OPENAI_API_KEY="your_api_key" # for --matcher openai
|
|
105
|
+
export HF_TOKEN="your_hf_token" # for --matcher huggingface
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Or put a `.env` file in your working directory:
|
|
109
|
+
|
|
110
|
+
```sh
|
|
111
|
+
ANTHROPIC_API_KEY="your_api_key"
|
|
112
|
+
OPENAI_API_KEY="your_api_key"
|
|
113
|
+
HF_TOKEN="your_hf_token"
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Output format
|
|
117
|
+
|
|
118
|
+
A JSON file with:
|
|
119
|
+
|
|
120
|
+
```json
|
|
121
|
+
{
|
|
122
|
+
"matcher_name": "anthropic",
|
|
123
|
+
"story_name": "story",
|
|
124
|
+
"story_segmentation": "lines",
|
|
125
|
+
"recall_segmentation": "lines",
|
|
126
|
+
"matches": {
|
|
127
|
+
"sub-001": [[0, [3, 7]], [1, [12]]],
|
|
128
|
+
"sub-002": [[0, [1]], [1, [5, 6]]]
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Each entry in `matches` maps a subject ID to a list of `[recall_segment_id, [matched_story_segment_ids...]]` pairs.
|
|
134
|
+
|
|
135
|
+
## Benchmarking
|
|
136
|
+
|
|
137
|
+
Requires [rBench](https://github.com/GabrielKP/rBench):
|
|
138
|
+
|
|
139
|
+
```sh
|
|
140
|
+
# outside of this dir
|
|
141
|
+
git clone git@github.com:GabrielKP/rBench.git
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Add to `.env` or environment:
|
|
145
|
+
```sh
|
|
146
|
+
BENCHMARK_ROOT="path/to/rBench"
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Run:
|
|
150
|
+
```sh
|
|
151
|
+
uv run src/rmatch/evaluate.py {alice,monthiversary,memsearch}
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## API / Documentation
|
|
157
|
+
|
|
158
|
+
### Input formats
|
|
159
|
+
|
|
160
|
+
**Story file** — a `.txt` or `.json` file containing the story segments to match against.
|
|
161
|
+
|
|
162
|
+
- **`.txt`**: one segment per line (blank lines are ignored).
|
|
163
|
+
- **`.json`**: must contain a `"segments"` array of strings. Optionally includes `"segmentation_method"`.
|
|
164
|
+
|
|
165
|
+
```json
|
|
166
|
+
{
|
|
167
|
+
"segmentation_method": "sentences",
|
|
168
|
+
"segments": [
|
|
169
|
+
"The cat sat on the mat.",
|
|
170
|
+
"It purred softly."
|
|
171
|
+
]
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
**Recall file** — a `.txt` file, a `.json` file, or a **directory** of either.
|
|
176
|
+
|
|
177
|
+
- **`.txt` file**: one recall segment per line. The filename stem is used as the subject ID.
|
|
178
|
+
- **`.json` file**: must contain a `"recalls"` object mapping subject IDs to segment arrays.
|
|
179
|
+
- **Directory**: all `.txt` or all `.json` files inside are loaded (mixing formats is not allowed). Each `.txt` file becomes one subject; `.json` files are merged.
|
|
180
|
+
|
|
181
|
+
```json
|
|
182
|
+
{
|
|
183
|
+
"segmentation_method": "clauses",
|
|
184
|
+
"recalls": {
|
|
185
|
+
"sub-001": ["A cat was on a mat.", "It was purring."],
|
|
186
|
+
"sub-002": ["There was a cat on something."]
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### CLI reference
|
|
192
|
+
|
|
193
|
+
```
|
|
194
|
+
rmatch STORY_FILE RECALL_FILE [options]
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
#### General options
|
|
198
|
+
|
|
199
|
+
- **`STORY_FILE`** *(positional, required)* — Path to the story `.txt` or `.json` file.
|
|
200
|
+
- **`RECALL_FILE`** *(positional, required)* — Path to a recall `.txt`/`.json` file or a directory of them.
|
|
201
|
+
- **`-M`, `--matcher`** *(str)* — Which matcher backend to use. One of: `anthropic`, `openai`, `reranker`, `huggingface`. Default: `anthropic`.
|
|
202
|
+
- **`-m`, `--model-name`** *(str)* — Override the matcher's default model (see defaults below).
|
|
203
|
+
- **`--track-emissions`** — Enable [CodeCarbon](https://github.com/mlco2/codecarbon) carbon-emissions tracking. Results are saved beside the output file.
|
|
204
|
+
- **`-f`, `--overwrite`** — Overwrite the output file if it already exists.
|
|
205
|
+
|
|
206
|
+
#### LLM matcher options (anthropic, openai, huggingface)
|
|
207
|
+
|
|
208
|
+
- **`--window-size`** *(int)* — Number of surrounding recall segments (before and after) to include as context for each target segment. Set to `0` to disable context. Default: `5`.
|
|
209
|
+
- **`--dry-run`** — *anthropic & openai only.* Estimate token usage and cost without making API calls.
|
|
210
|
+
|
|
211
|
+
#### Self-hosted / HuggingFace options
|
|
212
|
+
|
|
213
|
+
- **`-q`, `--quantization`** *(str)* — Load the model in reduced precision: `4bit` (NF4) or `8bit`. Requires `bitsandbytes`.
|
|
214
|
+
- **`-bs`, `--batch-size`** *(int)* — Number of prompts to process in parallel. Default: `4`.
|
|
215
|
+
- **`--max-new-tokens`** *(int)* — Maximum tokens the model may generate per prompt. Default: `64`.
|
|
216
|
+
- **`--verbose-errors`** — Print the raw model output when parsing fails. Useful for debugging prompt issues.
|
|
217
|
+
|
|
218
|
+
#### Reranker options
|
|
219
|
+
|
|
220
|
+
- **`--device`** *(str)* — PyTorch device for the reranker model (e.g. `cpu`, `cuda`, `mps`). Default: auto.
|
|
221
|
+
- **`--threshold`** *(float)* — Minimum similarity score for a story segment to be considered a match. Default: `0.09`.
|
|
222
|
+
- **`--top-k`** *(int)* — Number of top-scoring story candidates to evaluate per recall segment. Default: `5`.
|
|
223
|
+
|
|
224
|
+
### Default models
|
|
225
|
+
|
|
226
|
+
- **anthropic** — `claude-opus-4-6`
|
|
227
|
+
- **openai** — `gpt-4.1`
|
|
228
|
+
- **reranker** — `BAAI/bge-reranker-v2-m3`
|
|
229
|
+
- **huggingface** — `meta-llama/Llama-3.2-1B-Instruct`
|
|
230
|
+
|
|
231
|
+
### Python API
|
|
232
|
+
|
|
233
|
+
#### `Matcher` (main entry point)
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
from rmatch import Matcher
|
|
237
|
+
|
|
238
|
+
matcher = Matcher(matcher_name="anthropic", model_name=None, **kwargs)
|
|
239
|
+
matches = matcher.match(story_segments, recall_segments)
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
`Matcher(matcher_name, **kwargs)` is a factory — it returns the appropriate subclass based on `matcher_name`. All keyword arguments are forwarded to the subclass constructor.
|
|
243
|
+
|
|
244
|
+
**Constructor arguments:**
|
|
245
|
+
|
|
246
|
+
- **`model_name`** *(str)* — Override the default model. Applies to all matchers.
|
|
247
|
+
- **`window_size`** *(int)* — Context window radius around the target recall segment. Default: `5`. Applies to: `anthropic`, `openai`, `huggingface`.
|
|
248
|
+
- **`dry_run`** *(bool)* — Estimate cost without calling the API. Applies to: `anthropic`, `openai`.
|
|
249
|
+
- **`api_key`** *(str)* — API key. Falls back to `.env`, then environment variables. Applies to: `anthropic`, `openai`, `huggingface`.
|
|
250
|
+
- **`device`** *(str)* — PyTorch device string. Applies to: `reranker`.
|
|
251
|
+
- **`threshold`** *(float)* — Score threshold for matches. Default: `0.09`. Applies to: `reranker`.
|
|
252
|
+
- **`top_k`** *(int)* — Top-k candidates per recall segment. Default: `5`. Applies to: `reranker`.
|
|
253
|
+
- **`quantization`** *(str)* — `"4bit"` or `"8bit"`. Applies to: `huggingface`.
|
|
254
|
+
- **`batch_size`** *(int)* — Batch size for inference. Default: `4`. Applies to: `huggingface`.
|
|
255
|
+
- **`max_new_tokens`** *(int)* — Max generated tokens. Default: `64`. Applies to: `huggingface`.
|
|
256
|
+
- **`verbose_errors`** *(bool)* — Log raw output on parse failures. Applies to: `huggingface`.
|
|
257
|
+
|
|
258
|
+
**`matcher.match(story_segments, recall_segments)`**
|
|
259
|
+
|
|
260
|
+
- **`story_segments`** *(list[str])* — Ordered list of story segments (the ground-truth story elements).
|
|
261
|
+
- **`recall_segments`** *(list[str])* — Ordered list of a single participant's recall segments.
|
|
262
|
+
|
|
263
|
+
Returns `list[tuple[int, list[int]]]` — one entry per recall segment:
|
|
264
|
+
|
|
265
|
+
```python
|
|
266
|
+
[
|
|
267
|
+
(0, [2, 5]), # recall segment 0 matched story segments 2 and 5
|
|
268
|
+
(1, []), # recall segment 1 had no matches
|
|
269
|
+
(2, [0]), # recall segment 2 matched story segment 0
|
|
270
|
+
]
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
#### `run_matching` (file-level convenience)
|
|
274
|
+
|
|
275
|
+
```python
|
|
276
|
+
from rmatch.match import run_matching
|
|
277
|
+
|
|
278
|
+
results = run_matching(
|
|
279
|
+
story_file, # Path — story .txt or .json
|
|
280
|
+
recall_file, # Path — recall file or directory
|
|
281
|
+
matcher_name, # str — "anthropic", "openai", "reranker", "huggingface"
|
|
282
|
+
track_emissions, # bool — enable CodeCarbon tracking
|
|
283
|
+
story_name=None, # str | None — override auto-detected story name
|
|
284
|
+
story_segmentation=None, # str | None — override detected segmentation method
|
|
285
|
+
recall_segmentation=None, # str | None — override detected segmentation method
|
|
286
|
+
overwrite=False, # bool — overwrite existing output file
|
|
287
|
+
**kwargs, # forwarded to the Matcher constructor (model_name, window_size, etc.)
|
|
288
|
+
)
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
Loads story and recall files, runs matching for every subject, and saves a JSON results file. Returns the output dictionary.
|
rmatch-0.2.0/README.md
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
<h1 align="center">rMatch</h1>
|
|
2
|
+
|
|
3
|
+
<p align="center">Automatic recall & story matching tool.</p>
|
|
4
|
+
|
|
5
|
+
<p align="center">
|
|
6
|
+
<a href="https://www.python.org/"><img alt="" src="https://img.shields.io/badge/code-Python-blue?logo=Python"></a>
|
|
7
|
+
<a href="https://docs.astral.sh/ruff/"><img alt="Ruff" src="https://img.shields.io/badge/code%20style-Ruff-green?logo=Ruff"></a>
|
|
8
|
+
<a href="https://docs.astral.sh/uv/"><img alt="packaging framework: uv" src="https://img.shields.io/badge/packaging-uv-lightblue?logo=uv"></a>
|
|
9
|
+
<a href="https://pre-commit.com/"><img alt="pre-commit" src="https://img.shields.io/badge/tool-Pre%20Commit-yellow?logo=Pre-Commit"></a>
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
## Quick start
|
|
13
|
+
|
|
14
|
+
### Command line
|
|
15
|
+
|
|
16
|
+
```sh
|
|
17
|
+
pip install rmatch
|
|
18
|
+
|
|
19
|
+
# single recall file
|
|
20
|
+
rmatch story.txt recall.txt --matcher anthropic
|
|
21
|
+
|
|
22
|
+
# directory of recall files (one per subject)
|
|
23
|
+
rmatch story.txt recalls/ --matcher anthropic
|
|
24
|
+
|
|
25
|
+
# estimate API cost without sending requests
|
|
26
|
+
rmatch story.txt recalls/ --matcher openai --dry-run
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Python API
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from rmatch import Matcher
|
|
33
|
+
|
|
34
|
+
matcher = Matcher(matcher_name="anthropic", api_key="your_api_key")
|
|
35
|
+
matches = matcher.match(
|
|
36
|
+
story_segments=["The cat sat on the mat.", "It purred softly."],
|
|
37
|
+
recall_segments=["A cat was on a mat."],
|
|
38
|
+
)
|
|
39
|
+
# [(0, [0])] — recall segment 0 matched story segment 0
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Or use `run_matching` to load files, run matching, and save results in one call:
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from rmatch.match import run_matching
|
|
46
|
+
|
|
47
|
+
results = run_matching(
|
|
48
|
+
story_file="story.txt",
|
|
49
|
+
recall_file="recalls/",
|
|
50
|
+
matcher_name="anthropic",
|
|
51
|
+
api_key="your_api_key",
|
|
52
|
+
)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Setup API keys
|
|
56
|
+
|
|
57
|
+
API keys are resolved in this order (first match wins):
|
|
58
|
+
|
|
59
|
+
1. **`api_key` argument** passed directly in Python
|
|
60
|
+
2. **`.env` file** in the current working directory
|
|
61
|
+
3. **Environment variables** already set in your shell
|
|
62
|
+
|
|
63
|
+
Set them as environment variables:
|
|
64
|
+
|
|
65
|
+
```sh
|
|
66
|
+
export ANTHROPIC_API_KEY="your_api_key" # for --matcher anthropic (default)
|
|
67
|
+
export OPENAI_API_KEY="your_api_key" # for --matcher openai
|
|
68
|
+
export HF_TOKEN="your_hf_token" # for --matcher huggingface
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Or put a `.env` file in your working directory:
|
|
72
|
+
|
|
73
|
+
```sh
|
|
74
|
+
ANTHROPIC_API_KEY="your_api_key"
|
|
75
|
+
OPENAI_API_KEY="your_api_key"
|
|
76
|
+
HF_TOKEN="your_hf_token"
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Output format
|
|
80
|
+
|
|
81
|
+
A JSON file with:
|
|
82
|
+
|
|
83
|
+
```json
|
|
84
|
+
{
|
|
85
|
+
"matcher_name": "anthropic",
|
|
86
|
+
"story_name": "story",
|
|
87
|
+
"story_segmentation": "lines",
|
|
88
|
+
"recall_segmentation": "lines",
|
|
89
|
+
"matches": {
|
|
90
|
+
"sub-001": [[0, [3, 7]], [1, [12]]],
|
|
91
|
+
"sub-002": [[0, [1]], [1, [5, 6]]]
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Each entry in `matches` maps a subject ID to a list of `[recall_segment_id, [matched_story_segment_ids...]]` pairs.
|
|
97
|
+
|
|
98
|
+
## Benchmarking
|
|
99
|
+
|
|
100
|
+
Requires [rBench](https://github.com/GabrielKP/rBench):
|
|
101
|
+
|
|
102
|
+
```sh
|
|
103
|
+
# outside of this dir
|
|
104
|
+
git clone git@github.com:GabrielKP/rBench.git
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Add to `.env` or environment:
|
|
108
|
+
```sh
|
|
109
|
+
BENCHMARK_ROOT="path/to/rBench"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Run:
|
|
113
|
+
```sh
|
|
114
|
+
uv run src/rmatch/evaluate.py {alice,monthiversary,memsearch}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## API / Documentation
|
|
120
|
+
|
|
121
|
+
### Input formats
|
|
122
|
+
|
|
123
|
+
**Story file** — a `.txt` or `.json` file containing the story segments to match against.
|
|
124
|
+
|
|
125
|
+
- **`.txt`**: one segment per line (blank lines are ignored).
|
|
126
|
+
- **`.json`**: must contain a `"segments"` array of strings. Optionally includes `"segmentation_method"`.
|
|
127
|
+
|
|
128
|
+
```json
|
|
129
|
+
{
|
|
130
|
+
"segmentation_method": "sentences",
|
|
131
|
+
"segments": [
|
|
132
|
+
"The cat sat on the mat.",
|
|
133
|
+
"It purred softly."
|
|
134
|
+
]
|
|
135
|
+
}
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
**Recall file** — a `.txt` file, a `.json` file, or a **directory** of either.
|
|
139
|
+
|
|
140
|
+
- **`.txt` file**: one recall segment per line. The filename stem is used as the subject ID.
|
|
141
|
+
- **`.json` file**: must contain a `"recalls"` object mapping subject IDs to segment arrays.
|
|
142
|
+
- **Directory**: all `.txt` or all `.json` files inside are loaded (mixing formats is not allowed). Each `.txt` file becomes one subject; `.json` files are merged.
|
|
143
|
+
|
|
144
|
+
```json
|
|
145
|
+
{
|
|
146
|
+
"segmentation_method": "clauses",
|
|
147
|
+
"recalls": {
|
|
148
|
+
"sub-001": ["A cat was on a mat.", "It was purring."],
|
|
149
|
+
"sub-002": ["There was a cat on something."]
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### CLI reference
|
|
155
|
+
|
|
156
|
+
```
|
|
157
|
+
rmatch STORY_FILE RECALL_FILE [options]
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
#### General options
|
|
161
|
+
|
|
162
|
+
- **`STORY_FILE`** *(positional, required)* — Path to the story `.txt` or `.json` file.
|
|
163
|
+
- **`RECALL_FILE`** *(positional, required)* — Path to a recall `.txt`/`.json` file or a directory of them.
|
|
164
|
+
- **`-M`, `--matcher`** *(str)* — Which matcher backend to use. One of: `anthropic`, `openai`, `reranker`, `huggingface`. Default: `anthropic`.
|
|
165
|
+
- **`-m`, `--model-name`** *(str)* — Override the matcher's default model (see defaults below).
|
|
166
|
+
- **`--track-emissions`** — Enable [CodeCarbon](https://github.com/mlco2/codecarbon) carbon-emissions tracking. Results are saved beside the output file.
|
|
167
|
+
- **`-f`, `--overwrite`** — Overwrite the output file if it already exists.
|
|
168
|
+
|
|
169
|
+
#### LLM matcher options (anthropic, openai, huggingface)
|
|
170
|
+
|
|
171
|
+
- **`--window-size`** *(int)* — Number of surrounding recall segments (before and after) to include as context for each target segment. Set to `0` to disable context. Default: `5`.
|
|
172
|
+
- **`--dry-run`** — *anthropic & openai only.* Estimate token usage and cost without making API calls.
|
|
173
|
+
|
|
174
|
+
#### Self-hosted / HuggingFace options
|
|
175
|
+
|
|
176
|
+
- **`-q`, `--quantization`** *(str)* — Load the model in reduced precision: `4bit` (NF4) or `8bit`. Requires `bitsandbytes`.
|
|
177
|
+
- **`-bs`, `--batch-size`** *(int)* — Number of prompts to process in parallel. Default: `4`.
|
|
178
|
+
- **`--max-new-tokens`** *(int)* — Maximum tokens the model may generate per prompt. Default: `64`.
|
|
179
|
+
- **`--verbose-errors`** — Print the raw model output when parsing fails. Useful for debugging prompt issues.
|
|
180
|
+
|
|
181
|
+
#### Reranker options
|
|
182
|
+
|
|
183
|
+
- **`--device`** *(str)* — PyTorch device for the reranker model (e.g. `cpu`, `cuda`, `mps`). Default: auto.
|
|
184
|
+
- **`--threshold`** *(float)* — Minimum similarity score for a story segment to be considered a match. Default: `0.09`.
|
|
185
|
+
- **`--top-k`** *(int)* — Number of top-scoring story candidates to evaluate per recall segment. Default: `5`.
|
|
186
|
+
|
|
187
|
+
### Default models
|
|
188
|
+
|
|
189
|
+
- **anthropic** — `claude-opus-4-6`
|
|
190
|
+
- **openai** — `gpt-4.1`
|
|
191
|
+
- **reranker** — `BAAI/bge-reranker-v2-m3`
|
|
192
|
+
- **huggingface** — `meta-llama/Llama-3.2-1B-Instruct`
|
|
193
|
+
|
|
194
|
+
### Python API
|
|
195
|
+
|
|
196
|
+
#### `Matcher` (main entry point)
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
from rmatch import Matcher
|
|
200
|
+
|
|
201
|
+
matcher = Matcher(matcher_name="anthropic", model_name=None, **kwargs)
|
|
202
|
+
matches = matcher.match(story_segments, recall_segments)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
`Matcher(matcher_name, **kwargs)` is a factory — it returns the appropriate subclass based on `matcher_name`. All keyword arguments are forwarded to the subclass constructor.
|
|
206
|
+
|
|
207
|
+
**Constructor arguments:**
|
|
208
|
+
|
|
209
|
+
- **`model_name`** *(str)* — Override the default model. Applies to all matchers.
|
|
210
|
+
- **`window_size`** *(int)* — Context window radius around the target recall segment. Default: `5`. Applies to: `anthropic`, `openai`, `huggingface`.
|
|
211
|
+
- **`dry_run`** *(bool)* — Estimate cost without calling the API. Applies to: `anthropic`, `openai`.
|
|
212
|
+
- **`api_key`** *(str)* — API key. Falls back to `.env`, then environment variables. Applies to: `anthropic`, `openai`, `huggingface`.
|
|
213
|
+
- **`device`** *(str)* — PyTorch device string. Applies to: `reranker`.
|
|
214
|
+
- **`threshold`** *(float)* — Score threshold for matches. Default: `0.09`. Applies to: `reranker`.
|
|
215
|
+
- **`top_k`** *(int)* — Top-k candidates per recall segment. Default: `5`. Applies to: `reranker`.
|
|
216
|
+
- **`quantization`** *(str)* — `"4bit"` or `"8bit"`. Applies to: `huggingface`.
|
|
217
|
+
- **`batch_size`** *(int)* — Batch size for inference. Default: `4`. Applies to: `huggingface`.
|
|
218
|
+
- **`max_new_tokens`** *(int)* — Max generated tokens. Default: `64`. Applies to: `huggingface`.
|
|
219
|
+
- **`verbose_errors`** *(bool)* — Log raw output on parse failures. Applies to: `huggingface`.
|
|
220
|
+
|
|
221
|
+
**`matcher.match(story_segments, recall_segments)`**
|
|
222
|
+
|
|
223
|
+
- **`story_segments`** *(list[str])* — Ordered list of story segments (the ground-truth story elements).
|
|
224
|
+
- **`recall_segments`** *(list[str])* — Ordered list of a single participant's recall segments.
|
|
225
|
+
|
|
226
|
+
Returns `list[tuple[int, list[int]]]` — one entry per recall segment:
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
[
|
|
230
|
+
(0, [2, 5]), # recall segment 0 matched story segments 2 and 5
|
|
231
|
+
(1, []), # recall segment 1 had no matches
|
|
232
|
+
(2, [0]), # recall segment 2 matched story segment 0
|
|
233
|
+
]
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
#### `run_matching` (file-level convenience)
|
|
237
|
+
|
|
238
|
+
```python
|
|
239
|
+
from rmatch.match import run_matching
|
|
240
|
+
|
|
241
|
+
results = run_matching(
|
|
242
|
+
story_file, # Path — story .txt or .json
|
|
243
|
+
recall_file, # Path — recall file or directory
|
|
244
|
+
matcher_name, # str — "anthropic", "openai", "reranker", "huggingface"
|
|
245
|
+
track_emissions, # bool — enable CodeCarbon tracking
|
|
246
|
+
story_name=None, # str | None — override auto-detected story name
|
|
247
|
+
story_segmentation=None, # str | None — override detected segmentation method
|
|
248
|
+
recall_segmentation=None, # str | None — override detected segmentation method
|
|
249
|
+
overwrite=False, # bool — overwrite existing output file
|
|
250
|
+
**kwargs, # forwarded to the Matcher constructor (model_name, window_size, etc.)
|
|
251
|
+
)
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
Loads story and recall files, runs matching for every subject, and saves a JSON results file. Returns the output dictionary.
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from argparse import ArgumentParser
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def import_cyoa(cyoa_path: Path | str):
|
|
8
|
+
cyoa_path = Path(cyoa_path)
|
|
9
|
+
if not cyoa_path.exists():
|
|
10
|
+
raise FileNotFoundError(f"Cyoa path {cyoa_path} does not exist")
|
|
11
|
+
|
|
12
|
+
for base_story in ["alice", "monthiversary"]:
|
|
13
|
+
# 1. recall data
|
|
14
|
+
recall_dir = cyoa_path / base_story / "3_pasv"
|
|
15
|
+
recall_paths = sorted(list(recall_dir.glob("*recall.xlsx")))
|
|
16
|
+
|
|
17
|
+
for recall_path in recall_paths:
|
|
18
|
+
# filename/storyname
|
|
19
|
+
filestem_splits = recall_path.stem.split("_")
|
|
20
|
+
story_version = filestem_splits[0][-(len(filestem_splits[0]) - 2) :]
|
|
21
|
+
story_name = f"{base_story}_{story_version}"
|
|
22
|
+
sub_id = filestem_splits[1]
|
|
23
|
+
assert filestem_splits[2].startswith("rate-recall")
|
|
24
|
+
|
|
25
|
+
output_dir_recalls_segmented = (
|
|
26
|
+
Path("data") / "cyoa" / story_name / "recalls" / "segmentation"
|
|
27
|
+
)
|
|
28
|
+
output_dir_recalls_segmented.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
|
|
30
|
+
recall_df = pd.read_excel(recall_path)
|
|
31
|
+
|
|
32
|
+
recall_df = recall_df.rename(
|
|
33
|
+
columns={
|
|
34
|
+
"recalled_events": "events",
|
|
35
|
+
"recall_in_temporal_order": "text",
|
|
36
|
+
}
|
|
37
|
+
)
|
|
38
|
+
recall_df["segment"] = list(range(1, len(recall_df) + 1))
|
|
39
|
+
recall_df = recall_df[["segment", "events", "text"]]
|
|
40
|
+
|
|
41
|
+
recall_df.to_csv(
|
|
42
|
+
output_dir_recalls_segmented / f"{sub_id}.csv",
|
|
43
|
+
index=False,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# 2. story transcript data
|
|
47
|
+
transcript_paths = sorted(list(recall_dir.glob("*events.xlsx")))
|
|
48
|
+
for transcript_path in transcript_paths:
|
|
49
|
+
if transcript_path.stem.startswith("~$"):
|
|
50
|
+
continue
|
|
51
|
+
# filename/storyname
|
|
52
|
+
filestem_splits = transcript_path.stem.split("_")
|
|
53
|
+
story_version = filestem_splits[0][-(len(filestem_splits[0]) - 2) :]
|
|
54
|
+
story_name = f"{base_story}_{story_version}"
|
|
55
|
+
assert filestem_splits[2].startswith("events")
|
|
56
|
+
|
|
57
|
+
# output dir
|
|
58
|
+
output_dir_transcripts = Path("data") / "cyoa" / story_name / "transcripts"
|
|
59
|
+
output_dir_transcripts.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
|
|
61
|
+
# load data
|
|
62
|
+
transcript_df = pd.read_excel(transcript_path)
|
|
63
|
+
transcript_df = transcript_df.rename(columns={"story_texts": "text"})
|
|
64
|
+
|
|
65
|
+
transcript_df = transcript_df[["event", "text"]]
|
|
66
|
+
|
|
67
|
+
transcript_df.to_csv(
|
|
68
|
+
output_dir_transcripts / f"{story_name}.csv",
|
|
69
|
+
index=False,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
if __name__ == "__main__":
|
|
74
|
+
parser = ArgumentParser()
|
|
75
|
+
parser.add_argument("--cyoa-path", type=str, default="downloads/cyoa")
|
|
76
|
+
args = parser.parse_args()
|
|
77
|
+
|
|
78
|
+
cyoa_path = args.cyoa_path
|
|
79
|
+
|
|
80
|
+
import_cyoa(cyoa_path=cyoa_path)
|