marm-behavior 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. marm_behavior-0.1.0/LICENSE +21 -0
  2. marm_behavior-0.1.0/PKG-INFO +378 -0
  3. marm_behavior-0.1.0/README.md +332 -0
  4. marm_behavior-0.1.0/marm_behavior/__init__.py +25 -0
  5. marm_behavior-0.1.0/marm_behavior/__main__.py +435 -0
  6. marm_behavior-0.1.0/marm_behavior/_data_files.py +458 -0
  7. marm_behavior-0.1.0/marm_behavior/_tf_quiet.py +71 -0
  8. marm_behavior-0.1.0/marm_behavior/data/README.txt +60 -0
  9. marm_behavior-0.1.0/marm_behavior/data/__init__.py +1 -0
  10. marm_behavior-0.1.0/marm_behavior/data/nn_reference/README.txt +57 -0
  11. marm_behavior-0.1.0/marm_behavior/depths/__init__.py +0 -0
  12. marm_behavior-0.1.0/marm_behavior/depths/depths_1.py +291 -0
  13. marm_behavior-0.1.0/marm_behavior/dlc_inference.py +369 -0
  14. marm_behavior-0.1.0/marm_behavior/el_to_csv.py +131 -0
  15. marm_behavior-0.1.0/marm_behavior/extract/__init__.py +0 -0
  16. marm_behavior-0.1.0/marm_behavior/extract/extract_1.py +331 -0
  17. marm_behavior-0.1.0/marm_behavior/extract/extract_2.py +666 -0
  18. marm_behavior-0.1.0/marm_behavior/extract/extract_3.py +285 -0
  19. marm_behavior-0.1.0/marm_behavior/features/__init__.py +0 -0
  20. marm_behavior-0.1.0/marm_behavior/features/labels.py +1300 -0
  21. marm_behavior-0.1.0/marm_behavior/io/__init__.py +0 -0
  22. marm_behavior-0.1.0/marm_behavior/io/csv_io.py +43 -0
  23. marm_behavior-0.1.0/marm_behavior/io/mat_io.py +249 -0
  24. marm_behavior-0.1.0/marm_behavior/nn_postprocess.py +945 -0
  25. marm_behavior-0.1.0/marm_behavior/numerics/__init__.py +0 -0
  26. marm_behavior-0.1.0/marm_behavior/numerics/helpers.py +284 -0
  27. marm_behavior-0.1.0/marm_behavior/numerics/hull.py +252 -0
  28. marm_behavior-0.1.0/marm_behavior/pipeline/__init__.py +0 -0
  29. marm_behavior-0.1.0/marm_behavior/pipeline/orchestrators.py +508 -0
  30. marm_behavior-0.1.0/marm_behavior/process/__init__.py +0 -0
  31. marm_behavior-0.1.0/marm_behavior/process/postures.py +153 -0
  32. marm_behavior-0.1.0/marm_behavior/process/process_1.py +99 -0
  33. marm_behavior-0.1.0/marm_behavior/process/process_2.py +292 -0
  34. marm_behavior-0.1.0/marm_behavior/process/process_3.py +323 -0
  35. marm_behavior-0.1.0/marm_behavior/process/process_4.py +502 -0
  36. marm_behavior-0.1.0/marm_behavior/run.py +525 -0
  37. marm_behavior-0.1.0/marm_behavior.egg-info/PKG-INFO +378 -0
  38. marm_behavior-0.1.0/marm_behavior.egg-info/SOURCES.txt +42 -0
  39. marm_behavior-0.1.0/marm_behavior.egg-info/dependency_links.txt +1 -0
  40. marm_behavior-0.1.0/marm_behavior.egg-info/entry_points.txt +2 -0
  41. marm_behavior-0.1.0/marm_behavior.egg-info/requires.txt +23 -0
  42. marm_behavior-0.1.0/marm_behavior.egg-info/top_level.txt +1 -0
  43. marm_behavior-0.1.0/pyproject.toml +117 -0
  44. marm_behavior-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 William Menegas
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,378 @@
1
+ Metadata-Version: 2.1
2
+ Name: marm_behavior
3
+ Version: 0.1.0
4
+ Summary: Multi-animal marmoset behavioral analysis pipeline (DeepLabCut pose tracking + LSTM encoder + openTSNE clustering)
5
+ Author-email: William Menegas <william.s.menegas@gmail.com>
6
+ Maintainer-email: William Menegas <william.s.menegas@gmail.com>
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/williammenegas/marm_behavior
9
+ Project-URL: Repository, https://github.com/williammenegas/marm_behavior
10
+ Project-URL: Issues, https://github.com/williammenegas/marm_behavior/issues
11
+ Project-URL: Reference Data, https://huggingface.co/datasets/williammenegas/data
12
+ Keywords: neuroscience,behavior,marmoset,primate,deeplabcut,pose-estimation,tsne,behavioral-clustering,computational-ethology
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.8
19
+ Classifier: Programming Language :: Python :: 3.9
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Topic :: Scientific/Engineering
23
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
24
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
+ Requires-Python: >=3.8
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: numpy>=1.22
29
+ Requires-Dist: scipy>=1.9
30
+ Requires-Dist: h5py>=3.6
31
+ Requires-Dist: huggingface_hub>=0.20
32
+ Provides-Extra: video
33
+ Requires-Dist: opencv-python-headless>=4.5; extra == "video"
34
+ Provides-Extra: imageio
35
+ Requires-Dist: imageio>=2.25; extra == "imageio"
36
+ Requires-Dist: imageio-ffmpeg>=0.4.7; extra == "imageio"
37
+ Provides-Extra: dlc
38
+ Requires-Dist: deeplabcut>=2.2; extra == "dlc"
39
+ Provides-Extra: nn
40
+ Requires-Dist: tensorflow>=2.5; extra == "nn"
41
+ Requires-Dist: openTSNE>=0.7; extra == "nn"
42
+ Requires-Dist: annoy>=1.17; extra == "nn"
43
+ Requires-Dist: scikit-learn>=1.0; extra == "nn"
44
+ Provides-Extra: fast-download
45
+ Requires-Dist: hf_xet; extra == "fast-download"
46
+
47
+ # marm_behavior
48
+
49
+ A six-stage multi-animal marmoset behavioral analysis pipeline. Takes a
50
+ video of four differently-marked marmosets in a stereo RGB+depth arena,
51
+ runs DeepLabCut pose estimation, extracts per-animal body-part tracks,
52
+ computes per-frame behavioral features, and projects the features into a
53
+ learned behavioral cluster space.
54
+
55
+ Everything is packaged so a single command runs the full pipeline:
56
+
57
+ ```bash
58
+ python -m marm_behavior path/to/video.avi
59
+ ```
60
+
61
+ For a detailed reference covering every command-line flag, see
62
+ [USER_GUIDE.md](USER_GUIDE.md).
63
+
64
+ ## Install
65
+
66
+ The fastest way is to use the bundled conda environment file. It pins
67
+ the exact ML stack (TensorFlow 2.9.1, openTSNE 0.6.2, scikit-learn
68
+ 1.1.2, DeepLabCut 2.2.0.6, NumPy 1.22.4) the canonical reference
69
+ outputs were produced with — important because the nn stage's t-SNE
70
+ output is sensitive to openTSNE's version. Full install takes ~10
71
+ minutes on a clean machine.
72
+
73
+ ### 1. Create the conda environment
74
+
75
+ If you don't have conda, install
76
+ [Miniconda](https://docs.conda.io/en/latest/miniconda.html) first.
77
+ Then from the repo root:
78
+
79
+ ```bash
80
+ conda env create -f env/deep_learning.yml
81
+ conda activate deep_learning
82
+ ```
83
+
84
+ This creates an env named `deep_learning` (matching the lab's
85
+ canonical name) with everything the pipeline needs.
86
+
87
+ ### 2. (GPU users only) Install CUDA + cuDNN
88
+
89
+ For TensorFlow 2.9.1, CUDA 11.2 + cuDNN 8.1 is the supported
90
+ combination:
91
+
92
+ ```bash
93
+ conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1 -y
94
+ ```
95
+
96
+ Verify the GPU is visible to TensorFlow:
97
+
98
+ ```bash
99
+ python -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
100
+ ```
101
+
102
+ You should see at least one `PhysicalDevice(name='/physical_device:GPU:0', ...)`.
103
+ If the list is empty, the `dlc` stage will still run on CPU but will be
104
+ ~50× slower.
105
+
106
+ ### 3. Install the marm_behavior package
107
+
108
+ Either clone and run in-place:
109
+
110
+ ```bash
111
+ git clone <repo-url> marm_behavior
112
+ cd marm_behavior
113
+ python -m marm_behavior path/to/video.avi
114
+ ```
115
+
116
+ ...or install it editable:
117
+
118
+ ```bash
119
+ cd marm_behavior
120
+ pip install -e .
121
+ marm-behavior path/to/video.avi
122
+ ```
123
+
124
+ On the first invocation, the bundled data (DLC model ~128 MB, NN
125
+ encoder ~2 MB, NN reference set ~3 GB) downloads from the Hugging
126
+ Face Hub and is cached under `~/.cache/huggingface/hub/`. Every
127
+ subsequent run reuses the cache.
128
+
129
+ ### 4. (Optional) Override the NN reference data
130
+
131
+ The default reference set is fetched from
132
+ [`williammenegas/data`](https://huggingface.co/datasets/williammenegas/data)
133
+ on first run. You only need to override this if you want to use your
134
+ own reference set (e.g. for a different cohort) — in that case, pass
135
+ `--nn-reference-dir /path/to/your/reference/folder`. See the
136
+ [Reference files for the NN stage](#reference-files-for-the-nn-stage)
137
+ section below.
138
+
139
+ ### Verify the install
140
+
141
+ ```bash
142
+ python -m marm_behavior --help
143
+ ```
144
+
145
+ Should print the full usage banner with all the per-colour and
146
+ per-stage flags.
147
+
148
+ ## The six stages
149
+
150
+ | Stage | Input | Output |
151
+ |---|---|---|
152
+ | **dlc** | video | `*DLC_..._el.picklesingle.csv`, `*DLC_..._el.picklemulti.csv` |
153
+ | **extract** | DLC CSVs | `tracks_<video>.mat` (per-animal body-part tracks) |
154
+ | **process** | `tracks_*.mat` | `edges_<video>.mat` (per-animal edge matrices) |
155
+ | **depths** | `edges_*.mat` + video | `depths_<video>.mat` (pixel-depth lookups) |
156
+ | **labels** | `edges_*.mat` + `depths_*.mat` | `{w,b,r,y}_description_<video>.csv` (30 behavioral features per frame) |
157
+ | **nn** | description CSVs | `hcoord_{Red,White,Blue,Yellow}_<video>.csv` (2D t-SNE coords) and `hlabel_*_<video>.csv` (cluster labels) |
158
+
159
+ ## Common invocations
160
+
161
+ **Run everything with defaults:**
162
+ ```bash
163
+ python -m marm_behavior path/to/video.avi
164
+ ```
165
+
166
+ **Skip DLC if the CSVs already exist:**
167
+ ```bash
168
+ python -m marm_behavior video.avi --stages extract process depths labels nn
169
+ ```
170
+
171
+ **Re-run just the labels stage** against existing `edges_*.mat` / `depths_*.mat`:
172
+ ```bash
173
+ python -m marm_behavior video.avi --stages labels
174
+ ```
175
+
176
+ **Point the NN stage at a custom reference folder:**
177
+ ```bash
178
+ python -m marm_behavior video.avi --nn-reference-dir /path/to/references
179
+ ```
180
+
181
+ **Declare one colour absent:**
182
+ ```bash
183
+ python -m marm_behavior video.avi --no-yellow
184
+ ```
185
+
186
+ **Use your own DLC project** instead of the bundled one:
187
+ ```bash
188
+ python -m marm_behavior video.avi --dlc-config /path/to/your/config.yaml
189
+ ```
190
+
191
+ See `python -m marm_behavior --help` for every flag.
192
+
193
+ ## Bundled data
194
+
195
+ The runtime data marm_behavior needs — the trained DeepLabCut project
196
+ (~128 MB), the LSTM encoder (~2 MB), the canonical NN reference set
197
+ (~190 MB), and a `ground_normalized.npz` body-part cloud (~570 KB) — is
198
+ **not shipped in the wheel**. It's hosted on the Hugging Face Hub at
199
+ [`williammenegas/data`](https://huggingface.co/datasets/williammenegas/data)
200
+ and downloaded lazily on first use, then cached under
201
+ `~/.cache/huggingface/hub/`. The first pipeline run on a fresh machine
202
+ incurs a one-time ~310 MB download; every run after that is instant.
203
+
204
+ To pre-warm the cache (e.g. before going offline):
205
+
206
+ ```bash
207
+ python -m marm_behavior --prefetch-data
208
+ ```
209
+
210
+ For shared cluster installs where every user should hit the same
211
+ on-disk copy, set `MARM_BEHAVIOR_DATA_DIR=/shared/path/to/data`. The
212
+ helper checks that path before falling back to the per-user HF cache.
213
+
214
+ For lab-internal mirrors of the data repo, set
215
+ `MARM_BEHAVIOR_HF_REPO=your-org/your-mirror` to override the source
216
+ repo.
217
+
218
+ The lookup logic lives in `marm_behavior/_data_files.py` if you want
219
+ to read it.
220
+
221
+ ## Reference files for the NN stage
222
+
223
+ The NN stage projects video features into a stable behavioral cluster
224
+ space. The canonical reference data is **fetched from the Hugging Face
225
+ Hub** as part of the bundled-data download above, so the stage works
226
+ out of the box. You can ignore this section unless you want to use a
227
+ different reference set.
228
+
229
+ The bundled folder contains:
230
+
231
+ ```
232
+ out_inner_mean1.csv (256,) normalization mean
233
+ out_inner_std1.csv (256,) normalization std
234
+ tsne_temp1_1.csv (N, 2) reference 2D coords
235
+ dbscan_temp1_1.csv (N,) reference cluster labels
236
+ embedding_train_coords.npy (n_train, 2) training 2D coords
237
+ embedding_train_annoy.bin (~180 MB) cached annoy k-NN index
238
+ embedding_train_meta.json cache metadata
239
+ embedding_train_optimizer_gains.npy (n_train, 2) optimizer state
240
+ ```
241
+
242
+ The 3 GB `out_inner1.csv` (raw training latents) is **not** shipped —
243
+ it isn't needed at runtime because the cache files above already encode
244
+ everything `transform()` needs.
245
+
246
+ **Using your own reference set.** Pass `--nn-reference-dir /path/to/dir`.
247
+ The folder needs the four small CSVs at minimum. If the
248
+ `embedding_train_*` cache files are missing, the stage will fit
249
+ openTSNE from `out_inner1.csv` (which must be present in that case;
250
+ takes ~6 min) and write the cache for future runs.
251
+
252
+ **Bootstrap mode.** If you don't have any reference data at all and
253
+ want to experiment, pass `--nn-bootstrap` to generate everything from
254
+ the current video's own description CSVs. Bootstrap cluster IDs are
255
+ **not comparable** across videos or to canonical references, so this
256
+ mode is opt-in and intended for initial exploration only.
257
+
258
+ **Buddy chains.** The NN stage pairs each animal's behavioral features
259
+ with one other animal's features before encoding. The default pairings
260
+ are Red↔Yellow, White↔Blue, Blue↔White, and Yellow↔Red (with a
261
+ secondary fallback if the primary's description CSV isn't present).
262
+ Override per-animal with the `--<color>-buddy` flags — each takes one
263
+ or more short color keys (`r`, `w`, `b`, `y`) in preference order:
264
+
265
+ ```bash
266
+ # Always pair Red with Blue instead of Yellow:
267
+ python -m marm_behavior video.avi --red-buddy b
268
+
269
+ # Pair Yellow with White first, falling back to Blue:
270
+ python -m marm_behavior video.avi --yellow-buddy w b
271
+
272
+ # Multiple overrides at once:
273
+ python -m marm_behavior video.avi --red-buddy b --blue-buddy y r
274
+ ```
275
+
276
+ From Python, pass `nn_buddies={'r': ['b'], 'y': ['w', 'b']}` to
277
+ `marm_behavior.run()`.
278
+
279
+ ## One-animal mode
280
+
281
+ When exactly one of the four animals is marked present (via three
282
+ `--no-<color>` flags), the pipeline automatically switches into
283
+ **one-animal mode**. Four stages change behaviour:
284
+
285
+ 1. **extract stage** — every multi-CSV tracklet is assigned to the
286
+ focal animal regardless of which colour DLC's head classifier
287
+ predicted. This is the correct behaviour because DLC's head
288
+ classifier was trained on four-animal data and routinely
289
+ mislabels a single animal across colours; the four-animal
290
+ proximity-based assignment would otherwise scatter the focal
291
+ animal's tracklets across whichever absent colours happened to
292
+ get mislabelled. For each frame, the highest-quality tracklet
293
+ (most surviving body parts after the confidence threshold) is
294
+ picked, with ties broken on the lower track id.
295
+ 2. **process stage** — non-focal animals get a constant body-length
296
+ `bh = 30` instead of the per-frame movmedian + clamp +
297
+ forward-fill used in four-animal mode. The focal animal still
298
+ gets the full computation. This avoids producing meaningless
299
+ body-length estimates from F matrices that are all-NaN because
300
+ the colour isn't actually in the video.
301
+ 3. **depths stage** — only the focal animal's per-frame depth lookup
302
+ runs. The other three colours' inner loops are skipped entirely,
303
+ roughly 4× faster than four-animal mode for a typical video.
304
+ 4. **nn stage** — skipped automatically. The NN encoder pairs each
305
+ self animal with a buddy animal's behavioural features, and in
306
+ one-animal mode there is no buddy. Pass `--force-nn` to override
307
+ if you want to run the NN stage anyway (e.g. for bootstrapping
308
+ a one-animal-specific reference space).
309
+
310
+ The mode is detected automatically — there's no separate flag to
311
+ enable it. The CLI prints a clear banner up front so you can see
312
+ what's about to happen:
313
+
314
+ ```
315
+ $ python -m marm_behavior video.avi --no-white --no-blue --no-yellow
316
+ [marm_behavior] ONE-ANIMAL MODE: only Red present
317
+ [marm_behavior] extract: all multi-CSV tracklets are assigned to the focal animal (no proximity matching to colour-classified heads)
318
+ [marm_behavior] process: non-focal animals will use constant bh = 30
319
+ [marm_behavior] depths: per-frame lookup runs only for the focal animal
320
+ [marm_behavior] nn: stage will be skipped (no buddy animal available; pass force_nn=True / --force-nn to override)
321
+ [marm_behavior] dlc: ...
322
+ ```
323
+
324
+ When zero, two, three, or four animals are present, behaviour is
325
+ unchanged from the standard four-animal pipeline.
326
+
327
+
328
+
329
+ Everything is also available as a Python function:
330
+
331
+ ```python
332
+ from marm_behavior import run
333
+
334
+ result = run("path/to/video.avi")
335
+
336
+ print(result["stages_run"]) # ['dlc', 'extract', 'process', 'depths', 'labels', 'nn']
337
+ print(result["descriptions"]) # {'w': Path(...), 'b': Path(...), ...}
338
+ print(result["nn"]) # {'Red': (hcoord_path, hlabel_path), ...}
339
+ ```
340
+
341
+ See `help(marm_behavior.run)` for every parameter.
342
+
343
+ ## Dependencies
344
+
345
+ **Runtime** (always required):
346
+ - numpy ≥ 1.22, scipy ≥ 1.9, h5py ≥ 3.6
347
+
348
+ **Per-stage** (install the ones you need):
349
+
350
+ | Stage | Needs |
351
+ |---|---|
352
+ | dlc | `deeplabcut[tf]` or `deeplabcut[pytorch]` |
353
+ | depths | `opencv-python-headless` or `imageio` + `imageio-ffmpeg` |
354
+ | nn | `tensorflow`, `openTSNE`, `scikit-learn` |
355
+
356
+ The `extract`, `process`, and `labels` stages need only the core runtime
357
+ deps.
358
+
359
+ ## Layout
360
+
361
+ ```
362
+ marm_behavior/ <- the Python package
363
+ ├── run.py <- pipeline entry point
364
+ ├── __main__.py <- CLI
365
+ ├── dlc_inference.py <- DLC shell-out
366
+ ├── el_to_csv.py <- tracklet-pickle to CSV converter
367
+ ├── nn_postprocess.py <- NN stage
368
+ ├── io/ <- .mat and .csv I/O
369
+ ├── numerics/ <- hull, rolling reductions, NaN helpers
370
+ ├── extract/ <- body-part track extraction
371
+ ├── process/ <- posture and edge computation
372
+ ├── depths/ <- per-frame depth lookup
373
+ ├── features/ <- behavioral feature labelling
374
+ ├── pipeline/ <- batch orchestrators
375
+ └── data/ <- bundled models + canonical NN reference data
376
+ pyproject.toml
377
+ README.md
378
+ ```