hyperstudy 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/.github/workflows/sync-release-notes.yml +8 -12
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/PKG-INFO +1 -1
- hyperstudy-0.2.2/docs/superpowers/specs/2026-04-10-recording-downloads-design.md +128 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/pyproject.toml +1 -1
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/src/hyperstudy/__init__.py +1 -1
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/src/hyperstudy/_dataframe.py +47 -0
- hyperstudy-0.2.2/src/hyperstudy/_downloads.py +50 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/src/hyperstudy/client.py +154 -13
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/conftest.py +10 -0
- hyperstudy-0.2.2/tests/fixtures/recordings_response.json +71 -0
- hyperstudy-0.2.2/tests/fixtures/sparse_ratings_response.json +108 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/test_client.py +182 -0
- hyperstudy-0.2.2/tests/test_dataframe.py +182 -0
- hyperstudy-0.2.2/tests/test_downloads.py +105 -0
- hyperstudy-0.2.0/tests/test_dataframe.py +0 -78
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/.github/workflows/publish.yml +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/.github/workflows/test.yml +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/.gitignore +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/CHANGELOG.md +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/LICENSE +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/README.md +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/src/hyperstudy/_display.py +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/src/hyperstudy/_http.py +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/src/hyperstudy/_pagination.py +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/src/hyperstudy/_types.py +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/src/hyperstudy/exceptions.py +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/src/hyperstudy/experiments.py +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/__init__.py +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/fixtures/deployment_sessions_response.json +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/fixtures/deployment_single_response.json +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/fixtures/deployments_list_response.json +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/fixtures/error_401.json +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/fixtures/error_403.json +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/fixtures/events_response.json +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/fixtures/experiment_single_response.json +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/fixtures/experiments_list_response.json +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/fixtures/paginated_page1.json +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/fixtures/paginated_page2.json +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/fixtures/pre_experiment_response.json +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/fixtures/warnings_response.json +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/test_experiments.py +0 -0
- {hyperstudy-0.2.0 → hyperstudy-0.2.2}/tests/test_pagination.py +0 -0
|
@@ -53,15 +53,11 @@ jobs:
|
|
|
53
53
|
# Remove leading whitespace from heredoc
|
|
54
54
|
sed -i 's/^ //' "$FILE"
|
|
55
55
|
|
|
56
|
-
- name:
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
body: |
|
|
65
|
-
Auto-generated release notes for Python SDK ${{ steps.release.outputs.tag }}.
|
|
66
|
-
|
|
67
|
-
Source: ${{ github.event.release.html_url }}
|
|
56
|
+
- name: Commit and push
|
|
57
|
+
run: |
|
|
58
|
+
cd docs-repo
|
|
59
|
+
git config user.name "github-actions[bot]"
|
|
60
|
+
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
61
|
+
git add docs/release-notes/
|
|
62
|
+
git commit -m "Add Python SDK ${{ steps.release.outputs.tag }} release notes" || exit 0
|
|
63
|
+
git push
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# Recording Downloads via Python SDK
|
|
2
|
+
|
|
3
|
+
## Problem
|
|
4
|
+
|
|
5
|
+
The Python SDK's `get_recordings()` returns metadata only. Users need the actual audio/video files for offline analysis (ML models, manual review, archival). Currently they must manually extract `downloadUrl` from each record and fetch files themselves.
|
|
6
|
+
|
|
7
|
+
## Decision: SDK-only, no backend changes
|
|
8
|
+
|
|
9
|
+
The V3 API already returns signed GCS download URLs (7-day expiry) in the recording metadata. The SDK will fetch metadata and download files in the same call, so URL expiry is not a practical concern. This matches how the frontend downloads recordings.
|
|
10
|
+
|
|
11
|
+
## API Surface
|
|
12
|
+
|
|
13
|
+
### `download_recordings()` — Bulk download
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
df = hs.download_recordings(
|
|
17
|
+
"exp_abc123",
|
|
18
|
+
output_dir="./data/recordings",
|
|
19
|
+
scope="experiment", # "experiment" | "room" | "participant"
|
|
20
|
+
deployment_id=None, # optional filter
|
|
21
|
+
room_id=None, # optional filter
|
|
22
|
+
recording_type=None, # "audio" | "video" | None (both)
|
|
23
|
+
progress=True, # tqdm progress bar
|
|
24
|
+
skip_existing=True, # skip files already on disk with matching size
|
|
25
|
+
)
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
**Returns**: `pandas.DataFrame` with all recording metadata columns plus:
|
|
29
|
+
- `local_path` — absolute path to the downloaded file on disk
|
|
30
|
+
- `download_status` — `"downloaded"`, `"skipped"`, or `"failed"`
|
|
31
|
+
|
|
32
|
+
**Side effects**:
|
|
33
|
+
- Writes media files to `output_dir`
|
|
34
|
+
- Writes `recordings_metadata.csv` to `output_dir`
|
|
35
|
+
|
|
36
|
+
### `download_recording()` — Single recording
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
path = hs.download_recording(
|
|
40
|
+
recording, # dict from get_recordings(output="dict")
|
|
41
|
+
output_dir="./data/recordings",
|
|
42
|
+
)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
**Returns**: `pathlib.Path` to downloaded file.
|
|
46
|
+
|
|
47
|
+
## Directory Structure
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
output_dir/
|
|
51
|
+
recordings_metadata.csv
|
|
52
|
+
user1_video_EG_abc123.mp4
|
|
53
|
+
user1_audio_EG_def456.webm
|
|
54
|
+
user2_video_EG_ghi789.mp4
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Filename pattern**: `{participantName}_{recordingType}_{recordingId}.{ext}`
|
|
58
|
+
|
|
59
|
+
- `participantName`: from recording metadata, sanitized for filesystem safety
|
|
60
|
+
- `recordingType`: `"video"` or `"audio"` from `metadata.type`
|
|
61
|
+
- `recordingId`: egressId or recordingId
|
|
62
|
+
- `ext`: from `format` field, falling back to `mp4` (video) or `webm` (audio)
|
|
63
|
+
|
|
64
|
+
## Internal Design
|
|
65
|
+
|
|
66
|
+
### Download flow (`download_recordings`)
|
|
67
|
+
|
|
68
|
+
1. Call `self.get_recordings(scope_id, scope=scope, output="dict")` to get metadata
|
|
69
|
+
2. Filter by `recording_type` if specified (via `metadata.type`)
|
|
70
|
+
3. Create `output_dir` via `os.makedirs(exist_ok=True)`
|
|
71
|
+
4. For each recording:
|
|
72
|
+
- Build filename using pattern above
|
|
73
|
+
- If `skip_existing=True` and file exists with size matching `fileSize` metadata, mark as `"skipped"`
|
|
74
|
+
- Otherwise, fetch from `downloadUrl` (fallback: `url`) using streaming HTTP GET
|
|
75
|
+
- Write to disk in 8KB chunks
|
|
76
|
+
- Mark as `"downloaded"` or `"failed"` (with warning logged)
|
|
77
|
+
5. Build DataFrame from metadata, add `local_path` and `download_status` columns
|
|
78
|
+
6. Write `recordings_metadata.csv` to `output_dir`
|
|
79
|
+
7. Return DataFrame
|
|
80
|
+
|
|
81
|
+
### Streaming downloads
|
|
82
|
+
|
|
83
|
+
Use `requests.get(url, stream=True)` with chunked iteration to avoid loading large video files into memory. The SDK's existing `HttpTransport` handles JSON responses only, so file downloads use a standalone `requests.get()` — the signed GCS URLs don't need API key auth.
|
|
84
|
+
|
|
85
|
+
### Error handling
|
|
86
|
+
|
|
87
|
+
- Per-file failure tolerance: if one recording fails (404, timeout, network error), log a warning, set `download_status="failed"`, continue with remaining files
|
|
88
|
+
- If the metadata API call itself fails, raise normally (same as `get_recordings()`)
|
|
89
|
+
- Invalid/missing `downloadUrl`: set `download_status="failed"`, log warning
|
|
90
|
+
|
|
91
|
+
### Skip-existing logic
|
|
92
|
+
|
|
93
|
+
Compare `os.path.getsize(local_path)` against `fileSize` from metadata. If `fileSize` is `None` (metadata missing), fall back to checking file existence only (any existing file is considered complete).
|
|
94
|
+
|
|
95
|
+
## File Layout
|
|
96
|
+
|
|
97
|
+
| File | Change |
|
|
98
|
+
|------|--------|
|
|
99
|
+
| `src/hyperstudy/_downloads.py` | **New.** `build_filename()`, `download_file()` streaming helper |
|
|
100
|
+
| `src/hyperstudy/client.py` | Add `download_recordings()` and `download_recording()` methods |
|
|
101
|
+
| `tests/test_downloads.py` | **New.** Unit tests for filename building, skip logic, status tracking |
|
|
102
|
+
| `tests/test_client.py` | Integration test: mock API + GCS, verify files + DataFrame |
|
|
103
|
+
| `tests/fixtures/sparse_ratings_response.json` | Already exists (from prior work) |
|
|
104
|
+
|
|
105
|
+
## Testing
|
|
106
|
+
|
|
107
|
+
### Unit tests (`tests/test_downloads.py`)
|
|
108
|
+
- `test_build_filename` — video, audio, missing fields, filesystem-unsafe characters
|
|
109
|
+
- `test_build_filename_dedup` — duplicate names get numeric suffix
|
|
110
|
+
- `test_skip_existing_matching_size` — file with correct size is skipped
|
|
111
|
+
- `test_skip_existing_wrong_size` — file with wrong size is re-downloaded
|
|
112
|
+
|
|
113
|
+
### Integration tests (`tests/test_client.py`)
|
|
114
|
+
- `test_download_recordings` — mock API + GCS fetch, verify files on disk, CSV sidecar, DataFrame with `local_path` + `download_status`
|
|
115
|
+
- `test_download_recordings_filter_type` — `recording_type="audio"` only downloads audio
|
|
116
|
+
- `test_download_recording_single` — single recording download
|
|
117
|
+
|
|
118
|
+
### Mocking strategy
|
|
119
|
+
- V3 API: `responses` library (existing pattern)
|
|
120
|
+
- GCS signed URL: also `responses` (it's just an HTTP GET to a URL)
|
|
121
|
+
- File I/O: real writes to `pytest` `tmp_path`
|
|
122
|
+
|
|
123
|
+
## No Backend Changes Required
|
|
124
|
+
|
|
125
|
+
The existing V3 API endpoints return all necessary data:
|
|
126
|
+
- `GET /api/v3/data/recordings/{scope}/{scopeId}` returns metadata with `downloadUrl`
|
|
127
|
+
- Signed GCS URLs are valid for 7 days
|
|
128
|
+
- SDK downloads immediately after fetching metadata, so expiry is not an issue
|
|
@@ -6,6 +6,51 @@ from typing import Any
|
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
|
|
9
|
+
# Nested dict fields to flatten into top-level columns.
|
|
10
|
+
# Mapping of {field_name: prefix} — sub-keys become ``{prefix}_{sub_key}``.
|
|
11
|
+
FLATTEN_FIELDS: dict[str, str] = {
|
|
12
|
+
"sparseRatingData": "sparseRatingData",
|
|
13
|
+
"metadata": "metadata",
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _flatten_nested_dicts(
|
|
18
|
+
data: list[dict[str, Any]],
|
|
19
|
+
fields: dict[str, str] | None = None,
|
|
20
|
+
) -> list[dict[str, Any]]:
|
|
21
|
+
"""Promote sub-keys of nested dict fields to top-level keys.
|
|
22
|
+
|
|
23
|
+
For each *field* present in a record whose value is a ``dict``, every
|
|
24
|
+
sub-key is copied to ``{prefix}_{sub_key}``. The original nested dict
|
|
25
|
+
is preserved for backward compatibility.
|
|
26
|
+
|
|
27
|
+
Records where the target field is ``None`` or missing are left
|
|
28
|
+
untouched — downstream DataFrame construction fills those columns
|
|
29
|
+
with ``NaN`` / ``null``.
|
|
30
|
+
"""
|
|
31
|
+
if not data:
|
|
32
|
+
return data
|
|
33
|
+
|
|
34
|
+
fields = fields if fields is not None else FLATTEN_FIELDS
|
|
35
|
+
|
|
36
|
+
# Quick check on first record — skip work when no target fields exist.
|
|
37
|
+
sample = data[0]
|
|
38
|
+
targets = [f for f in fields if f in sample and isinstance(sample[f], dict)]
|
|
39
|
+
if not targets:
|
|
40
|
+
return data
|
|
41
|
+
|
|
42
|
+
out: list[dict[str, Any]] = []
|
|
43
|
+
for record in data:
|
|
44
|
+
record = dict(record) # shallow copy to avoid mutating caller's data
|
|
45
|
+
for field in targets:
|
|
46
|
+
nested = record.get(field)
|
|
47
|
+
if isinstance(nested, dict):
|
|
48
|
+
prefix = fields[field]
|
|
49
|
+
for sub_key, sub_val in nested.items():
|
|
50
|
+
record[f"{prefix}_{sub_key}"] = sub_val
|
|
51
|
+
out.append(record)
|
|
52
|
+
return out
|
|
53
|
+
|
|
9
54
|
|
|
10
55
|
def _post_process(df: pd.DataFrame) -> pd.DataFrame:
|
|
11
56
|
"""Shared post-processing for pandas DataFrames.
|
|
@@ -32,6 +77,7 @@ def to_pandas(data: list[dict[str, Any]]) -> pd.DataFrame:
|
|
|
32
77
|
"""Convert API response data to a pandas DataFrame with post-processing."""
|
|
33
78
|
if not data:
|
|
34
79
|
return pd.DataFrame()
|
|
80
|
+
data = _flatten_nested_dicts(data)
|
|
35
81
|
df = pd.DataFrame(data)
|
|
36
82
|
return _post_process(df)
|
|
37
83
|
|
|
@@ -51,6 +97,7 @@ def to_polars(data: list[dict[str, Any]]):
|
|
|
51
97
|
if not data:
|
|
52
98
|
return pl.DataFrame()
|
|
53
99
|
|
|
100
|
+
data = _flatten_nested_dicts(data)
|
|
54
101
|
df = pl.DataFrame(data)
|
|
55
102
|
|
|
56
103
|
# Parse timestamps
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Helpers for downloading recording files from signed URLs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
_CHUNK_SIZE = 65536 # 64 KB — good balance for large video files
|
|
12
|
+
_UNSAFE_RE = re.compile(r"[^\w\-]")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_download_url(recording: dict[str, Any]) -> str | None:
|
|
16
|
+
"""Return the best download URL from a recording dict, or ``None``."""
|
|
17
|
+
return recording.get("downloadUrl") or recording.get("url") or None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def build_filename(recording: dict[str, Any]) -> str:
|
|
21
|
+
"""Build a filesystem-safe filename from recording metadata.
|
|
22
|
+
|
|
23
|
+
Pattern: ``{participantName}_{type}_{recordingId}.{ext}``
|
|
24
|
+
"""
|
|
25
|
+
name = recording.get("participantName") or recording.get("participantId") or "unknown"
|
|
26
|
+
name = _UNSAFE_RE.sub("_", name)
|
|
27
|
+
|
|
28
|
+
meta = recording.get("metadata") or {}
|
|
29
|
+
rec_type = meta.get("type") or "recording"
|
|
30
|
+
|
|
31
|
+
rec_id = recording.get("recordingId") or recording.get("egressId") or "unknown"
|
|
32
|
+
|
|
33
|
+
fmt = recording.get("format")
|
|
34
|
+
if not fmt:
|
|
35
|
+
fmt = "webm" if rec_type == "audio" else "mp4"
|
|
36
|
+
|
|
37
|
+
return f"{name}_{rec_type}_{rec_id}.{fmt}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def download_file(url: str, dest: Path, timeout: int = 300) -> int:
|
|
41
|
+
"""Stream-download *url* to *dest* and return bytes written."""
|
|
42
|
+
resp = requests.get(url, stream=True, timeout=timeout)
|
|
43
|
+
resp.raise_for_status()
|
|
44
|
+
|
|
45
|
+
written = 0
|
|
46
|
+
with open(dest, "wb") as fh:
|
|
47
|
+
for chunk in resp.iter_content(chunk_size=_CHUNK_SIZE):
|
|
48
|
+
fh.write(chunk)
|
|
49
|
+
written += len(chunk)
|
|
50
|
+
return written
|
|
@@ -2,9 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import warnings
|
|
6
|
+
from pathlib import Path
|
|
5
7
|
from typing import Any
|
|
6
8
|
|
|
9
|
+
from tqdm.auto import tqdm
|
|
10
|
+
|
|
7
11
|
from ._dataframe import to_pandas, to_polars
|
|
12
|
+
from ._downloads import build_filename, download_file, get_download_url
|
|
8
13
|
from ._http import HttpTransport
|
|
9
14
|
from ._pagination import fetch_all_pages
|
|
10
15
|
from ._types import Scope
|
|
@@ -55,6 +60,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
55
60
|
scope_id: str,
|
|
56
61
|
*,
|
|
57
62
|
scope: str = "experiment",
|
|
63
|
+
deployment_id: str | None = None,
|
|
58
64
|
room_id: str | None = None,
|
|
59
65
|
start_time: str | None = None,
|
|
60
66
|
end_time: str | None = None,
|
|
@@ -71,6 +77,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
71
77
|
Args:
|
|
72
78
|
scope_id: ID of the experiment, room, or participant.
|
|
73
79
|
scope: ``"experiment"``, ``"room"``, or ``"participant"``.
|
|
80
|
+
deployment_id: Filter by deployment (experiment scope only).
|
|
74
81
|
room_id: Required when ``scope="participant"``.
|
|
75
82
|
start_time: ISO 8601 start filter.
|
|
76
83
|
end_time: ISO 8601 end filter.
|
|
@@ -84,7 +91,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
84
91
|
"""
|
|
85
92
|
return self._fetch_data(
|
|
86
93
|
"events", scope_id,
|
|
87
|
-
scope=scope, room_id=room_id,
|
|
94
|
+
scope=scope, deployment_id=deployment_id, room_id=room_id,
|
|
88
95
|
start_time=start_time, end_time=end_time,
|
|
89
96
|
category=category, sort=sort, order=order,
|
|
90
97
|
limit=limit, offset=offset,
|
|
@@ -96,6 +103,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
96
103
|
scope_id: str,
|
|
97
104
|
*,
|
|
98
105
|
scope: str = "experiment",
|
|
106
|
+
deployment_id: str | None = None,
|
|
99
107
|
room_id: str | None = None,
|
|
100
108
|
limit: int | None = None,
|
|
101
109
|
offset: int = 0,
|
|
@@ -105,7 +113,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
105
113
|
"""Fetch video/audio recording metadata."""
|
|
106
114
|
return self._fetch_data(
|
|
107
115
|
"recordings", scope_id,
|
|
108
|
-
scope=scope, room_id=room_id,
|
|
116
|
+
scope=scope, deployment_id=deployment_id, room_id=room_id,
|
|
109
117
|
limit=limit, offset=offset,
|
|
110
118
|
output=output, progress=progress,
|
|
111
119
|
)
|
|
@@ -115,6 +123,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
115
123
|
scope_id: str,
|
|
116
124
|
*,
|
|
117
125
|
scope: str = "experiment",
|
|
126
|
+
deployment_id: str | None = None,
|
|
118
127
|
room_id: str | None = None,
|
|
119
128
|
start_time: str | None = None,
|
|
120
129
|
end_time: str | None = None,
|
|
@@ -128,7 +137,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
128
137
|
"""Fetch text chat messages."""
|
|
129
138
|
return self._fetch_data(
|
|
130
139
|
"chat", scope_id,
|
|
131
|
-
scope=scope, room_id=room_id,
|
|
140
|
+
scope=scope, deployment_id=deployment_id, room_id=room_id,
|
|
132
141
|
start_time=start_time, end_time=end_time,
|
|
133
142
|
sort=sort, order=order,
|
|
134
143
|
limit=limit, offset=offset,
|
|
@@ -140,6 +149,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
140
149
|
scope_id: str,
|
|
141
150
|
*,
|
|
142
151
|
scope: str = "experiment",
|
|
152
|
+
deployment_id: str | None = None,
|
|
143
153
|
room_id: str | None = None,
|
|
144
154
|
start_time: str | None = None,
|
|
145
155
|
end_time: str | None = None,
|
|
@@ -153,7 +163,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
153
163
|
"""Fetch LiveKit video chat connection data."""
|
|
154
164
|
return self._fetch_data(
|
|
155
165
|
"videochat", scope_id,
|
|
156
|
-
scope=scope, room_id=room_id,
|
|
166
|
+
scope=scope, deployment_id=deployment_id, room_id=room_id,
|
|
157
167
|
start_time=start_time, end_time=end_time,
|
|
158
168
|
sort=sort, order=order,
|
|
159
169
|
limit=limit, offset=offset,
|
|
@@ -165,6 +175,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
165
175
|
scope_id: str,
|
|
166
176
|
*,
|
|
167
177
|
scope: str = "experiment",
|
|
178
|
+
deployment_id: str | None = None,
|
|
168
179
|
room_id: str | None = None,
|
|
169
180
|
start_time: str | None = None,
|
|
170
181
|
end_time: str | None = None,
|
|
@@ -182,7 +193,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
182
193
|
extra["aggregationWindow"] = aggregation_window
|
|
183
194
|
return self._fetch_data(
|
|
184
195
|
"sync", scope_id,
|
|
185
|
-
scope=scope, room_id=room_id,
|
|
196
|
+
scope=scope, deployment_id=deployment_id, room_id=room_id,
|
|
186
197
|
start_time=start_time, end_time=end_time,
|
|
187
198
|
sort=sort, order=order,
|
|
188
199
|
limit=limit, offset=offset,
|
|
@@ -196,6 +207,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
196
207
|
*,
|
|
197
208
|
kind: str = "continuous",
|
|
198
209
|
scope: str = "experiment",
|
|
210
|
+
deployment_id: str | None = None,
|
|
199
211
|
room_id: str | None = None,
|
|
200
212
|
start_time: str | None = None,
|
|
201
213
|
end_time: str | None = None,
|
|
@@ -214,7 +226,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
214
226
|
"""
|
|
215
227
|
return self._fetch_data(
|
|
216
228
|
f"ratings/{kind}", scope_id,
|
|
217
|
-
scope=scope, room_id=room_id,
|
|
229
|
+
scope=scope, deployment_id=deployment_id, room_id=room_id,
|
|
218
230
|
start_time=start_time, end_time=end_time,
|
|
219
231
|
sort=sort, order=order,
|
|
220
232
|
limit=limit, offset=offset,
|
|
@@ -226,6 +238,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
226
238
|
scope_id: str,
|
|
227
239
|
*,
|
|
228
240
|
scope: str = "experiment",
|
|
241
|
+
deployment_id: str | None = None,
|
|
229
242
|
room_id: str | None = None,
|
|
230
243
|
limit: int | None = None,
|
|
231
244
|
offset: int = 0,
|
|
@@ -235,7 +248,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
235
248
|
"""Fetch component response data."""
|
|
236
249
|
return self._fetch_data(
|
|
237
250
|
"components", scope_id,
|
|
238
|
-
scope=scope, room_id=room_id,
|
|
251
|
+
scope=scope, deployment_id=deployment_id, room_id=room_id,
|
|
239
252
|
limit=limit, offset=offset,
|
|
240
253
|
output=output, progress=progress,
|
|
241
254
|
)
|
|
@@ -245,6 +258,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
245
258
|
scope_id: str,
|
|
246
259
|
*,
|
|
247
260
|
scope: str = "experiment",
|
|
261
|
+
deployment_id: str | None = None,
|
|
248
262
|
room_id: str | None = None,
|
|
249
263
|
limit: int | None = None,
|
|
250
264
|
offset: int = 0,
|
|
@@ -254,7 +268,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
254
268
|
"""Fetch participant data."""
|
|
255
269
|
return self._fetch_data(
|
|
256
270
|
"participants", scope_id,
|
|
257
|
-
scope=scope, room_id=room_id,
|
|
271
|
+
scope=scope, deployment_id=deployment_id, room_id=room_id,
|
|
258
272
|
limit=limit, offset=offset,
|
|
259
273
|
output=output, progress=progress,
|
|
260
274
|
)
|
|
@@ -264,6 +278,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
264
278
|
scope_id: str,
|
|
265
279
|
*,
|
|
266
280
|
scope: str = "experiment",
|
|
281
|
+
deployment_id: str | None = None,
|
|
267
282
|
limit: int | None = None,
|
|
268
283
|
offset: int = 0,
|
|
269
284
|
output: str = "pandas",
|
|
@@ -272,7 +287,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
272
287
|
"""Fetch room/session data."""
|
|
273
288
|
return self._fetch_data(
|
|
274
289
|
"rooms", scope_id,
|
|
275
|
-
scope=scope,
|
|
290
|
+
scope=scope, deployment_id=deployment_id,
|
|
276
291
|
limit=limit, offset=offset,
|
|
277
292
|
output=output, progress=progress,
|
|
278
293
|
)
|
|
@@ -286,6 +301,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
286
301
|
scope_id: str,
|
|
287
302
|
*,
|
|
288
303
|
scope: str = "experiment",
|
|
304
|
+
deployment_id: str | None = None,
|
|
289
305
|
room_id: str | None = None,
|
|
290
306
|
start_time: str | None = None,
|
|
291
307
|
end_time: str | None = None,
|
|
@@ -303,7 +319,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
303
319
|
"""
|
|
304
320
|
return self._fetch_data(
|
|
305
321
|
"events", scope_id,
|
|
306
|
-
scope=scope, room_id=room_id,
|
|
322
|
+
scope=scope, deployment_id=deployment_id, room_id=room_id,
|
|
307
323
|
start_time=start_time, end_time=end_time,
|
|
308
324
|
category="questionnaire", sort=sort, order=order,
|
|
309
325
|
limit=limit, offset=offset,
|
|
@@ -315,6 +331,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
315
331
|
scope_id: str,
|
|
316
332
|
*,
|
|
317
333
|
scope: str = "experiment",
|
|
334
|
+
deployment_id: str | None = None,
|
|
318
335
|
room_id: str | None = None,
|
|
319
336
|
start_time: str | None = None,
|
|
320
337
|
end_time: str | None = None,
|
|
@@ -332,7 +349,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
332
349
|
"""
|
|
333
350
|
return self._fetch_and_filter(
|
|
334
351
|
"instructions.", scope_id,
|
|
335
|
-
scope=scope, room_id=room_id,
|
|
352
|
+
scope=scope, deployment_id=deployment_id, room_id=room_id,
|
|
336
353
|
start_time=start_time, end_time=end_time,
|
|
337
354
|
sort=sort, order=order,
|
|
338
355
|
limit=limit, offset=offset,
|
|
@@ -344,6 +361,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
344
361
|
scope_id: str,
|
|
345
362
|
*,
|
|
346
363
|
scope: str = "experiment",
|
|
364
|
+
deployment_id: str | None = None,
|
|
347
365
|
room_id: str | None = None,
|
|
348
366
|
start_time: str | None = None,
|
|
349
367
|
end_time: str | None = None,
|
|
@@ -361,7 +379,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
361
379
|
"""
|
|
362
380
|
return self._fetch_and_filter(
|
|
363
381
|
"consent.", scope_id,
|
|
364
|
-
scope=scope, room_id=room_id,
|
|
382
|
+
scope=scope, deployment_id=deployment_id, room_id=room_id,
|
|
365
383
|
start_time=start_time, end_time=end_time,
|
|
366
384
|
sort=sort, order=order,
|
|
367
385
|
limit=limit, offset=offset,
|
|
@@ -453,6 +471,125 @@ class HyperStudy(ExperimentMixin):
|
|
|
453
471
|
"consent": self.get_consent(participant_id, **common),
|
|
454
472
|
}
|
|
455
473
|
|
|
474
|
+
# ------------------------------------------------------------------
|
|
475
|
+
# Recording downloads
|
|
476
|
+
# ------------------------------------------------------------------
|
|
477
|
+
|
|
478
|
+
def download_recording(
|
|
479
|
+
self,
|
|
480
|
+
recording: dict[str, Any],
|
|
481
|
+
output_dir: str = ".",
|
|
482
|
+
) -> Path:
|
|
483
|
+
"""Download a single recording file to disk.
|
|
484
|
+
|
|
485
|
+
Args:
|
|
486
|
+
recording: A recording dict (from ``get_recordings(output="dict")``).
|
|
487
|
+
output_dir: Directory to save the file.
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
Path to the downloaded file.
|
|
491
|
+
"""
|
|
492
|
+
url = get_download_url(recording)
|
|
493
|
+
if not url:
|
|
494
|
+
raise ValueError("Recording has no downloadUrl or url field")
|
|
495
|
+
|
|
496
|
+
dest_dir = Path(output_dir)
|
|
497
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
498
|
+
|
|
499
|
+
filename = build_filename(recording)
|
|
500
|
+
dest = dest_dir / filename
|
|
501
|
+
download_file(url, dest)
|
|
502
|
+
return dest
|
|
503
|
+
|
|
504
|
+
def download_recordings(
|
|
505
|
+
self,
|
|
506
|
+
scope_id: str,
|
|
507
|
+
*,
|
|
508
|
+
output_dir: str,
|
|
509
|
+
scope: str = "experiment",
|
|
510
|
+
deployment_id: str | None = None,
|
|
511
|
+
room_id: str | None = None,
|
|
512
|
+
recording_type: str | None = None,
|
|
513
|
+
progress: bool = True,
|
|
514
|
+
skip_existing: bool = True,
|
|
515
|
+
):
|
|
516
|
+
"""Download recording files to disk.
|
|
517
|
+
|
|
518
|
+
Fetches recording metadata, downloads each file from its signed
|
|
519
|
+
URL, writes a ``recordings_metadata.csv`` sidecar, and returns a
|
|
520
|
+
DataFrame with a ``local_path`` column.
|
|
521
|
+
|
|
522
|
+
Args:
|
|
523
|
+
scope_id: Experiment, room, or participant ID.
|
|
524
|
+
output_dir: Directory to save files.
|
|
525
|
+
scope: ``"experiment"``, ``"room"``, or ``"participant"``.
|
|
526
|
+
deployment_id: Filter by deployment (experiment scope only).
|
|
527
|
+
room_id: Filter by room.
|
|
528
|
+
recording_type: ``"audio"``, ``"video"``, or ``None`` (both).
|
|
529
|
+
progress: Show progress bar.
|
|
530
|
+
skip_existing: Skip files already on disk with matching size.
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
pandas DataFrame with recording metadata plus ``local_path``
|
|
534
|
+
and ``download_status`` columns.
|
|
535
|
+
"""
|
|
536
|
+
recordings = self.get_recordings(
|
|
537
|
+
scope_id,
|
|
538
|
+
scope=scope,
|
|
539
|
+
deployment_id=deployment_id,
|
|
540
|
+
room_id=room_id,
|
|
541
|
+
output="dict",
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
if recording_type:
|
|
545
|
+
recordings = [
|
|
546
|
+
r for r in recordings
|
|
547
|
+
if (r.get("metadata") or {}).get("type") == recording_type
|
|
548
|
+
]
|
|
549
|
+
|
|
550
|
+
dest_dir = Path(output_dir)
|
|
551
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
552
|
+
|
|
553
|
+
local_paths: list[str | None] = []
|
|
554
|
+
statuses: list[str] = []
|
|
555
|
+
|
|
556
|
+
for rec in tqdm(recordings, desc="Downloading recordings", disable=not progress):
|
|
557
|
+
filename = build_filename(rec)
|
|
558
|
+
dest = dest_dir / filename
|
|
559
|
+
|
|
560
|
+
url = get_download_url(rec)
|
|
561
|
+
if not url:
|
|
562
|
+
local_paths.append(None)
|
|
563
|
+
statuses.append("failed")
|
|
564
|
+
warnings.warn(f"Recording {rec.get('recordingId')} has no download URL")
|
|
565
|
+
continue
|
|
566
|
+
|
|
567
|
+
if skip_existing and dest.exists():
|
|
568
|
+
expected_size = rec.get("fileSize")
|
|
569
|
+
if expected_size is None or dest.stat().st_size == expected_size:
|
|
570
|
+
local_paths.append(str(dest.resolve()))
|
|
571
|
+
statuses.append("skipped")
|
|
572
|
+
continue
|
|
573
|
+
|
|
574
|
+
try:
|
|
575
|
+
download_file(url, dest)
|
|
576
|
+
local_paths.append(str(dest.resolve()))
|
|
577
|
+
statuses.append("downloaded")
|
|
578
|
+
except Exception as exc:
|
|
579
|
+
local_paths.append(None)
|
|
580
|
+
statuses.append("failed")
|
|
581
|
+
warnings.warn(
|
|
582
|
+
f"Failed to download recording {rec.get('recordingId')}: {exc}"
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
df = to_pandas(recordings)
|
|
586
|
+
if not df.empty:
|
|
587
|
+
df["local_path"] = local_paths
|
|
588
|
+
df["download_status"] = statuses
|
|
589
|
+
df.to_csv(dest_dir / "recordings_metadata.csv", index=False)
|
|
590
|
+
|
|
591
|
+
return df
|
|
592
|
+
|
|
456
593
|
# ------------------------------------------------------------------
|
|
457
594
|
# Internal helpers
|
|
458
595
|
# ------------------------------------------------------------------
|
|
@@ -463,6 +600,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
463
600
|
scope_id: str,
|
|
464
601
|
*,
|
|
465
602
|
scope: str = "experiment",
|
|
603
|
+
deployment_id: str | None = None,
|
|
466
604
|
room_id: str | None = None,
|
|
467
605
|
start_time: str | None = None,
|
|
468
606
|
end_time: str | None = None,
|
|
@@ -482,7 +620,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
482
620
|
# Always fetch as dicts so we can filter before conversion
|
|
483
621
|
raw = self._fetch_data(
|
|
484
622
|
"events", scope_id,
|
|
485
|
-
scope=scope, room_id=room_id,
|
|
623
|
+
scope=scope, deployment_id=deployment_id, room_id=room_id,
|
|
486
624
|
start_time=start_time, end_time=end_time,
|
|
487
625
|
category="pre_experiment", sort=sort, order=order,
|
|
488
626
|
limit=limit, offset=offset,
|
|
@@ -500,6 +638,7 @@ class HyperStudy(ExperimentMixin):
|
|
|
500
638
|
scope_id: str,
|
|
501
639
|
*,
|
|
502
640
|
scope: str = "experiment",
|
|
641
|
+
deployment_id: str | None = None,
|
|
503
642
|
room_id: str | None = None,
|
|
504
643
|
start_time: str | None = None,
|
|
505
644
|
end_time: str | None = None,
|
|
@@ -517,6 +656,8 @@ class HyperStudy(ExperimentMixin):
|
|
|
517
656
|
path = f"data/{data_type}/{scope_val.value}/{scope_id}"
|
|
518
657
|
|
|
519
658
|
params: dict[str, Any] = {"offset": offset}
|
|
659
|
+
if deployment_id:
|
|
660
|
+
params["deploymentId"] = deployment_id
|
|
520
661
|
if room_id:
|
|
521
662
|
params["roomId"] = room_id
|
|
522
663
|
if start_time:
|