pearmut 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pearmut-0.2.2 → pearmut-0.2.3}/PKG-INFO +59 -7
- {pearmut-0.2.2 → pearmut-0.2.3}/README.md +58 -5
- {pearmut-0.2.2 → pearmut-0.2.3}/pearmut.egg-info/PKG-INFO +59 -7
- {pearmut-0.2.2 → pearmut-0.2.3}/pearmut.egg-info/requires.txt +0 -1
- {pearmut-0.2.2 → pearmut-0.2.3}/pyproject.toml +2 -2
- {pearmut-0.2.2 → pearmut-0.2.3}/server/app.py +19 -2
- {pearmut-0.2.2 → pearmut-0.2.3}/server/assignment.py +29 -11
- pearmut-0.2.3/server/cli.py +346 -0
- {pearmut-0.2.2 → pearmut-0.2.3}/server/static/dashboard.bundle.js +1 -1
- {pearmut-0.2.2 → pearmut-0.2.3}/server/static/dashboard.html +1 -1
- pearmut-0.2.3/server/static/listwise.html +77 -0
- pearmut-0.2.3/server/static/pointwise.html +69 -0
- {pearmut-0.2.2 → pearmut-0.2.3}/server/utils.py +72 -4
- pearmut-0.2.2/server/cli.py +0 -226
- pearmut-0.2.2/server/static/listwise.html +0 -77
- pearmut-0.2.2/server/static/pointwise.html +0 -69
- {pearmut-0.2.2 → pearmut-0.2.3}/LICENSE +0 -0
- {pearmut-0.2.2 → pearmut-0.2.3}/pearmut.egg-info/SOURCES.txt +0 -0
- {pearmut-0.2.2 → pearmut-0.2.3}/pearmut.egg-info/dependency_links.txt +0 -0
- {pearmut-0.2.2 → pearmut-0.2.3}/pearmut.egg-info/entry_points.txt +0 -0
- {pearmut-0.2.2 → pearmut-0.2.3}/pearmut.egg-info/top_level.txt +0 -0
- {pearmut-0.2.2 → pearmut-0.2.3}/server/static/assets/favicon.svg +0 -0
- {pearmut-0.2.2 → pearmut-0.2.3}/server/static/assets/style.css +0 -0
- {pearmut-0.2.2 → pearmut-0.2.3}/server/static/index.html +0 -0
- {pearmut-0.2.2 → pearmut-0.2.3}/server/static/listwise.bundle.js +0 -0
- {pearmut-0.2.2 → pearmut-0.2.3}/server/static/pointwise.bundle.js +0 -0
- {pearmut-0.2.2 → pearmut-0.2.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pearmut
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: A tool for evaluation of model outputs, primarily MT.
|
|
5
5
|
Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
|
|
6
6
|
License: apache-2.0
|
|
@@ -16,7 +16,6 @@ Requires-Dist: wonderwords>=3.0.0
|
|
|
16
16
|
Requires-Dist: psutil>=7.1.0
|
|
17
17
|
Provides-Extra: dev
|
|
18
18
|
Requires-Dist: pytest; extra == "dev"
|
|
19
|
-
Requires-Dist: pynpm>=0.3.0; extra == "dev"
|
|
20
19
|
Dynamic: license-file
|
|
21
20
|
|
|
22
21
|
# Pearmut 🍐
|
|
@@ -165,8 +164,10 @@ You can add validation rules to items for tutorials or attention checks. Items w
|
|
|
165
164
|
- Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
|
|
166
165
|
- Loud attention checks: Include `warning` without `allow_skip` to force users to retry
|
|
167
166
|
- Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
|
|
167
|
+
|
|
168
168
|
For listwise template, `validation` is an array where each element corresponds to a candidate.
|
|
169
|
-
The dashboard shows failed/total validation checks per user.
|
|
169
|
+
The dashboard shows failed/total validation checks per user, and ✅/❌ based on whether they pass the threshold.
|
|
170
|
+
Set `validation_threshold` in `info` to control pass/fail: integer for max failed count, float in [0,1) for max failed proportion.
|
|
170
171
|
See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
|
|
171
172
|
|
|
172
173
|
## Single-stream Assignment
|
|
@@ -181,7 +182,7 @@ We also support a simple allocation where all annotators draw from the same pool
|
|
|
181
182
|
"protocol_score": True, # collect scores
|
|
182
183
|
"protocol_error_spans": True, # collect error spans
|
|
183
184
|
"protocol_error_categories": False, # do not collect MQM categories, so ESA
|
|
184
|
-
"
|
|
185
|
+
"users": 50, # number of annotators (can also be a list, see below)
|
|
185
186
|
},
|
|
186
187
|
"data": [...], # list of all items (shared among all annotators)
|
|
187
188
|
}
|
|
@@ -196,12 +197,31 @@ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), w
|
|
|
196
197
|
"assignment": "dynamic",
|
|
197
198
|
"template": "listwise",
|
|
198
199
|
"protocol_k": 5,
|
|
199
|
-
"
|
|
200
|
+
"users": 50,
|
|
200
201
|
},
|
|
201
202
|
"data": [...], # list of all items
|
|
202
203
|
}
|
|
203
204
|
```
|
|
204
205
|
|
|
206
|
+
## Pre-defined User IDs and Tokens
|
|
207
|
+
|
|
208
|
+
By default, user IDs and completion tokens are automatically generated. The `users` field can be:
|
|
209
|
+
- A number (e.g., `50`) to generate that many random user IDs
|
|
210
|
+
- A list of strings (e.g., `["alice", "bob"]`) to use specific user IDs
|
|
211
|
+
- A list of dictionaries to specify user IDs with custom tokens:
|
|
212
|
+
```python
|
|
213
|
+
{
|
|
214
|
+
"info": {
|
|
215
|
+
...
|
|
216
|
+
"users": [
|
|
217
|
+
{"user_id": "alice", "token_pass": "alice_done", "token_fail": "alice_fail"},
|
|
218
|
+
{"user_id": "bob", "token_pass": "bob_done"} # missing tokens are auto-generated
|
|
219
|
+
],
|
|
220
|
+
},
|
|
221
|
+
...
|
|
222
|
+
}
|
|
223
|
+
```
|
|
224
|
+
|
|
205
225
|
To load a campaign into the server, run the following.
|
|
206
226
|
It will fail if an existing campaign with the same `campaign_id` already exists, unless you specify `-o/--overwrite`.
|
|
207
227
|
It will also output a secret management link. Then, launch the server:
|
|
@@ -234,8 +254,7 @@ and independently of that select your protocol template:
|
|
|
234
254
|
When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
|
|
235
255
|
This is also the place where you can download all progress and collected annotations (these files exist also locally but this might be more convenient).
|
|
236
256
|
|
|
237
|
-
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/
|
|
238
|
-
|
|
257
|
+
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/800a1741-5f41-47ac-9d5d-5cbf6abfc0e6" />
|
|
239
258
|
|
|
240
259
|
Additionally, at the end of an annotation, a token of completion is shown which can be compared to the correct one that you can download in metadat from the dashboard.
|
|
241
260
|
An intentionally incorrect token can be shown if the annotations don't pass quality control.
|
|
@@ -252,6 +271,39 @@ Tip: make sure the elements are already appropriately styled.
|
|
|
252
271
|
|
|
253
272
|
<img width="1000" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/77c4fa96-ee62-4e46-8e78-fd16e9007956" />
|
|
254
273
|
|
|
274
|
+
## CLI Commands
|
|
275
|
+
|
|
276
|
+
Pearmut provides the following commands:
|
|
277
|
+
|
|
278
|
+
- `pearmut add <file(s)>`: Add one or more campaign JSON files. Supports wildcards (e.g., `pearmut add examples/*.json`).
|
|
279
|
+
- `-o/--overwrite`: Overwrite existing campaigns with the same ID.
|
|
280
|
+
- `--server <url>`: Prefix server URL for protocol links (default: `http://localhost:8001`).
|
|
281
|
+
- `pearmut run`: Start the Pearmut server.
|
|
282
|
+
- `--port <port>`: Port to run the server on (default: 8001).
|
|
283
|
+
- `--server <url>`: Prefix server URL for protocol links.
|
|
284
|
+
- `pearmut purge [campaign]`: Remove campaign data.
|
|
285
|
+
- Without arguments: Purges all campaigns (tasks, outputs, progress).
|
|
286
|
+
- With campaign name: Purges only the specified campaign's data.
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
## Hosting Assets
|
|
290
|
+
|
|
291
|
+
If you need to host local assets (e.g., audio files, images, videos) via Pearmut, you can use the `assets` key in your campaign file.
|
|
292
|
+
When present, this directory is symlinked to the `static/` directory so its contents become accessible from the server.
|
|
293
|
+
|
|
294
|
+
```python
|
|
295
|
+
{
|
|
296
|
+
"campaign_id": "my_campaign",
|
|
297
|
+
"info": {
|
|
298
|
+
"assets": "videos", # path to directory containing assets
|
|
299
|
+
...
|
|
300
|
+
},
|
|
301
|
+
"data": [ ... ]
|
|
302
|
+
}
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
For example, if `videos` contains `audio.mp3`, it will be accessible at `localhost:8001/assets/videos/audio.mp3`.
|
|
306
|
+
The path can be absolute or relative to your current working directory.
|
|
255
307
|
|
|
256
308
|
## Development
|
|
257
309
|
|
|
@@ -144,8 +144,10 @@ You can add validation rules to items for tutorials or attention checks. Items w
|
|
|
144
144
|
- Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
|
|
145
145
|
- Loud attention checks: Include `warning` without `allow_skip` to force users to retry
|
|
146
146
|
- Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
|
|
147
|
+
|
|
147
148
|
For listwise template, `validation` is an array where each element corresponds to a candidate.
|
|
148
|
-
The dashboard shows failed/total validation checks per user.
|
|
149
|
+
The dashboard shows failed/total validation checks per user, and ✅/❌ based on whether they pass the threshold.
|
|
150
|
+
Set `validation_threshold` in `info` to control pass/fail: integer for max failed count, float in [0,1) for max failed proportion.
|
|
149
151
|
See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
|
|
150
152
|
|
|
151
153
|
## Single-stream Assignment
|
|
@@ -160,7 +162,7 @@ We also support a simple allocation where all annotators draw from the same pool
|
|
|
160
162
|
"protocol_score": True, # collect scores
|
|
161
163
|
"protocol_error_spans": True, # collect error spans
|
|
162
164
|
"protocol_error_categories": False, # do not collect MQM categories, so ESA
|
|
163
|
-
"
|
|
165
|
+
"users": 50, # number of annotators (can also be a list, see below)
|
|
164
166
|
},
|
|
165
167
|
"data": [...], # list of all items (shared among all annotators)
|
|
166
168
|
}
|
|
@@ -175,12 +177,31 @@ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), w
|
|
|
175
177
|
"assignment": "dynamic",
|
|
176
178
|
"template": "listwise",
|
|
177
179
|
"protocol_k": 5,
|
|
178
|
-
"
|
|
180
|
+
"users": 50,
|
|
179
181
|
},
|
|
180
182
|
"data": [...], # list of all items
|
|
181
183
|
}
|
|
182
184
|
```
|
|
183
185
|
|
|
186
|
+
## Pre-defined User IDs and Tokens
|
|
187
|
+
|
|
188
|
+
By default, user IDs and completion tokens are automatically generated. The `users` field can be:
|
|
189
|
+
- A number (e.g., `50`) to generate that many random user IDs
|
|
190
|
+
- A list of strings (e.g., `["alice", "bob"]`) to use specific user IDs
|
|
191
|
+
- A list of dictionaries to specify user IDs with custom tokens:
|
|
192
|
+
```python
|
|
193
|
+
{
|
|
194
|
+
"info": {
|
|
195
|
+
...
|
|
196
|
+
"users": [
|
|
197
|
+
{"user_id": "alice", "token_pass": "alice_done", "token_fail": "alice_fail"},
|
|
198
|
+
{"user_id": "bob", "token_pass": "bob_done"} # missing tokens are auto-generated
|
|
199
|
+
],
|
|
200
|
+
},
|
|
201
|
+
...
|
|
202
|
+
}
|
|
203
|
+
```
|
|
204
|
+
|
|
184
205
|
To load a campaign into the server, run the following.
|
|
185
206
|
It will fail if an existing campaign with the same `campaign_id` already exists, unless you specify `-o/--overwrite`.
|
|
186
207
|
It will also output a secret management link. Then, launch the server:
|
|
@@ -213,8 +234,7 @@ and independently of that select your protocol template:
|
|
|
213
234
|
When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
|
|
214
235
|
This is also the place where you can download all progress and collected annotations (these files exist also locally but this might be more convenient).
|
|
215
236
|
|
|
216
|
-
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/
|
|
217
|
-
|
|
237
|
+
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/800a1741-5f41-47ac-9d5d-5cbf6abfc0e6" />
|
|
218
238
|
|
|
219
239
|
Additionally, at the end of an annotation, a token of completion is shown which can be compared to the correct one that you can download in metadat from the dashboard.
|
|
220
240
|
An intentionally incorrect token can be shown if the annotations don't pass quality control.
|
|
@@ -231,6 +251,39 @@ Tip: make sure the elements are already appropriately styled.
|
|
|
231
251
|
|
|
232
252
|
<img width="1000" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/77c4fa96-ee62-4e46-8e78-fd16e9007956" />
|
|
233
253
|
|
|
254
|
+
## CLI Commands
|
|
255
|
+
|
|
256
|
+
Pearmut provides the following commands:
|
|
257
|
+
|
|
258
|
+
- `pearmut add <file(s)>`: Add one or more campaign JSON files. Supports wildcards (e.g., `pearmut add examples/*.json`).
|
|
259
|
+
- `-o/--overwrite`: Overwrite existing campaigns with the same ID.
|
|
260
|
+
- `--server <url>`: Prefix server URL for protocol links (default: `http://localhost:8001`).
|
|
261
|
+
- `pearmut run`: Start the Pearmut server.
|
|
262
|
+
- `--port <port>`: Port to run the server on (default: 8001).
|
|
263
|
+
- `--server <url>`: Prefix server URL for protocol links.
|
|
264
|
+
- `pearmut purge [campaign]`: Remove campaign data.
|
|
265
|
+
- Without arguments: Purges all campaigns (tasks, outputs, progress).
|
|
266
|
+
- With campaign name: Purges only the specified campaign's data.
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
## Hosting Assets
|
|
270
|
+
|
|
271
|
+
If you need to host local assets (e.g., audio files, images, videos) via Pearmut, you can use the `assets` key in your campaign file.
|
|
272
|
+
When present, this directory is symlinked to the `static/` directory so its contents become accessible from the server.
|
|
273
|
+
|
|
274
|
+
```python
|
|
275
|
+
{
|
|
276
|
+
"campaign_id": "my_campaign",
|
|
277
|
+
"info": {
|
|
278
|
+
"assets": "videos", # path to directory containing assets
|
|
279
|
+
...
|
|
280
|
+
},
|
|
281
|
+
"data": [ ... ]
|
|
282
|
+
}
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
For example, if `videos` contains `audio.mp3`, it will be accessible at `localhost:8001/assets/videos/audio.mp3`.
|
|
286
|
+
The path can be absolute or relative to your current working directory.
|
|
234
287
|
|
|
235
288
|
## Development
|
|
236
289
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pearmut
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: A tool for evaluation of model outputs, primarily MT.
|
|
5
5
|
Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
|
|
6
6
|
License: apache-2.0
|
|
@@ -16,7 +16,6 @@ Requires-Dist: wonderwords>=3.0.0
|
|
|
16
16
|
Requires-Dist: psutil>=7.1.0
|
|
17
17
|
Provides-Extra: dev
|
|
18
18
|
Requires-Dist: pytest; extra == "dev"
|
|
19
|
-
Requires-Dist: pynpm>=0.3.0; extra == "dev"
|
|
20
19
|
Dynamic: license-file
|
|
21
20
|
|
|
22
21
|
# Pearmut 🍐
|
|
@@ -165,8 +164,10 @@ You can add validation rules to items for tutorials or attention checks. Items w
|
|
|
165
164
|
- Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
|
|
166
165
|
- Loud attention checks: Include `warning` without `allow_skip` to force users to retry
|
|
167
166
|
- Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
|
|
167
|
+
|
|
168
168
|
For listwise template, `validation` is an array where each element corresponds to a candidate.
|
|
169
|
-
The dashboard shows failed/total validation checks per user.
|
|
169
|
+
The dashboard shows failed/total validation checks per user, and ✅/❌ based on whether they pass the threshold.
|
|
170
|
+
Set `validation_threshold` in `info` to control pass/fail: integer for max failed count, float in [0,1) for max failed proportion.
|
|
170
171
|
See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
|
|
171
172
|
|
|
172
173
|
## Single-stream Assignment
|
|
@@ -181,7 +182,7 @@ We also support a simple allocation where all annotators draw from the same pool
|
|
|
181
182
|
"protocol_score": True, # collect scores
|
|
182
183
|
"protocol_error_spans": True, # collect error spans
|
|
183
184
|
"protocol_error_categories": False, # do not collect MQM categories, so ESA
|
|
184
|
-
"
|
|
185
|
+
"users": 50, # number of annotators (can also be a list, see below)
|
|
185
186
|
},
|
|
186
187
|
"data": [...], # list of all items (shared among all annotators)
|
|
187
188
|
}
|
|
@@ -196,12 +197,31 @@ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), w
|
|
|
196
197
|
"assignment": "dynamic",
|
|
197
198
|
"template": "listwise",
|
|
198
199
|
"protocol_k": 5,
|
|
199
|
-
"
|
|
200
|
+
"users": 50,
|
|
200
201
|
},
|
|
201
202
|
"data": [...], # list of all items
|
|
202
203
|
}
|
|
203
204
|
```
|
|
204
205
|
|
|
206
|
+
## Pre-defined User IDs and Tokens
|
|
207
|
+
|
|
208
|
+
By default, user IDs and completion tokens are automatically generated. The `users` field can be:
|
|
209
|
+
- A number (e.g., `50`) to generate that many random user IDs
|
|
210
|
+
- A list of strings (e.g., `["alice", "bob"]`) to use specific user IDs
|
|
211
|
+
- A list of dictionaries to specify user IDs with custom tokens:
|
|
212
|
+
```python
|
|
213
|
+
{
|
|
214
|
+
"info": {
|
|
215
|
+
...
|
|
216
|
+
"users": [
|
|
217
|
+
{"user_id": "alice", "token_pass": "alice_done", "token_fail": "alice_fail"},
|
|
218
|
+
{"user_id": "bob", "token_pass": "bob_done"} # missing tokens are auto-generated
|
|
219
|
+
],
|
|
220
|
+
},
|
|
221
|
+
...
|
|
222
|
+
}
|
|
223
|
+
```
|
|
224
|
+
|
|
205
225
|
To load a campaign into the server, run the following.
|
|
206
226
|
It will fail if an existing campaign with the same `campaign_id` already exists, unless you specify `-o/--overwrite`.
|
|
207
227
|
It will also output a secret management link. Then, launch the server:
|
|
@@ -234,8 +254,7 @@ and independently of that select your protocol template:
|
|
|
234
254
|
When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
|
|
235
255
|
This is also the place where you can download all progress and collected annotations (these files exist also locally but this might be more convenient).
|
|
236
256
|
|
|
237
|
-
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/
|
|
238
|
-
|
|
257
|
+
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/800a1741-5f41-47ac-9d5d-5cbf6abfc0e6" />
|
|
239
258
|
|
|
240
259
|
Additionally, at the end of an annotation, a token of completion is shown which can be compared to the correct one that you can download in metadat from the dashboard.
|
|
241
260
|
An intentionally incorrect token can be shown if the annotations don't pass quality control.
|
|
@@ -252,6 +271,39 @@ Tip: make sure the elements are already appropriately styled.
|
|
|
252
271
|
|
|
253
272
|
<img width="1000" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/77c4fa96-ee62-4e46-8e78-fd16e9007956" />
|
|
254
273
|
|
|
274
|
+
## CLI Commands
|
|
275
|
+
|
|
276
|
+
Pearmut provides the following commands:
|
|
277
|
+
|
|
278
|
+
- `pearmut add <file(s)>`: Add one or more campaign JSON files. Supports wildcards (e.g., `pearmut add examples/*.json`).
|
|
279
|
+
- `-o/--overwrite`: Overwrite existing campaigns with the same ID.
|
|
280
|
+
- `--server <url>`: Prefix server URL for protocol links (default: `http://localhost:8001`).
|
|
281
|
+
- `pearmut run`: Start the Pearmut server.
|
|
282
|
+
- `--port <port>`: Port to run the server on (default: 8001).
|
|
283
|
+
- `--server <url>`: Prefix server URL for protocol links.
|
|
284
|
+
- `pearmut purge [campaign]`: Remove campaign data.
|
|
285
|
+
- Without arguments: Purges all campaigns (tasks, outputs, progress).
|
|
286
|
+
- With campaign name: Purges only the specified campaign's data.
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
## Hosting Assets
|
|
290
|
+
|
|
291
|
+
If you need to host local assets (e.g., audio files, images, videos) via Pearmut, you can use the `assets` key in your campaign file.
|
|
292
|
+
When present, this directory is symlinked to the `static/` directory so its contents become accessible from the server.
|
|
293
|
+
|
|
294
|
+
```python
|
|
295
|
+
{
|
|
296
|
+
"campaign_id": "my_campaign",
|
|
297
|
+
"info": {
|
|
298
|
+
"assets": "videos", # path to directory containing assets
|
|
299
|
+
...
|
|
300
|
+
},
|
|
301
|
+
"data": [ ... ]
|
|
302
|
+
}
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
For example, if `videos` contains `audio.mp3`, it will be accessible at `localhost:8001/assets/videos/audio.mp3`.
|
|
306
|
+
The path can be absolute or relative to your current working directory.
|
|
255
307
|
|
|
256
308
|
## Development
|
|
257
309
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "pearmut"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.3"
|
|
4
4
|
description = "A tool for evaluation of model outputs, primarily MT."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { text = "apache-2.0" }
|
|
@@ -20,7 +20,7 @@ dependencies = [
|
|
|
20
20
|
]
|
|
21
21
|
|
|
22
22
|
[project.optional-dependencies]
|
|
23
|
-
dev = ["pytest"
|
|
23
|
+
dev = ["pytest"]
|
|
24
24
|
|
|
25
25
|
[project.scripts]
|
|
26
26
|
pearmut = "pearmut.cli:main"
|
|
@@ -9,7 +9,13 @@ from fastapi.staticfiles import StaticFiles
|
|
|
9
9
|
from pydantic import BaseModel
|
|
10
10
|
|
|
11
11
|
from .assignment import get_i_item, get_next_item, reset_task, update_progress
|
|
12
|
-
from .utils import
|
|
12
|
+
from .utils import (
|
|
13
|
+
ROOT,
|
|
14
|
+
check_validation_threshold,
|
|
15
|
+
load_progress_data,
|
|
16
|
+
save_db_payload,
|
|
17
|
+
save_progress_data,
|
|
18
|
+
)
|
|
13
19
|
|
|
14
20
|
os.makedirs(f"{ROOT}/data/outputs", exist_ok=True)
|
|
15
21
|
|
|
@@ -151,6 +157,9 @@ async def _dashboard_data(request: DashboardDataRequest):
|
|
|
151
157
|
if assignment not in ["task-based", "single-stream"]:
|
|
152
158
|
return JSONResponse(content={"error": "Unsupported campaign assignment type"}, status_code=400)
|
|
153
159
|
|
|
160
|
+
# Get threshold info for the campaign
|
|
161
|
+
validation_threshold = tasks_data[campaign_id]["info"].get("validation_threshold")
|
|
162
|
+
|
|
154
163
|
for user_id, user_val in progress_data[campaign_id].items():
|
|
155
164
|
# shallow copy
|
|
156
165
|
entry = dict(user_val)
|
|
@@ -159,6 +168,13 @@ async def _dashboard_data(request: DashboardDataRequest):
|
|
|
159
168
|
for v in list(entry.get("validations", {}).values())
|
|
160
169
|
]
|
|
161
170
|
|
|
171
|
+
# Add threshold pass/fail status (only when user is complete)
|
|
172
|
+
if all(entry["progress"]):
|
|
173
|
+
entry["threshold_passed"] = check_validation_threshold(
|
|
174
|
+
tasks_data, progress_data, campaign_id, user_id
|
|
175
|
+
)
|
|
176
|
+
else:
|
|
177
|
+
entry["threshold_passed"] = None
|
|
162
178
|
|
|
163
179
|
if not is_privileged:
|
|
164
180
|
entry["token_correct"] = None
|
|
@@ -169,7 +185,8 @@ async def _dashboard_data(request: DashboardDataRequest):
|
|
|
169
185
|
return JSONResponse(
|
|
170
186
|
content={
|
|
171
187
|
"status": "ok",
|
|
172
|
-
"data": progress_new
|
|
188
|
+
"data": progress_new,
|
|
189
|
+
"validation_threshold": validation_threshold
|
|
173
190
|
},
|
|
174
191
|
status_code=200
|
|
175
192
|
)
|
|
@@ -3,18 +3,23 @@ from typing import Any
|
|
|
3
3
|
|
|
4
4
|
from fastapi.responses import JSONResponse
|
|
5
5
|
|
|
6
|
-
from .utils import
|
|
6
|
+
from .utils import (
|
|
7
|
+
RESET_MARKER,
|
|
8
|
+
check_validation_threshold,
|
|
9
|
+
get_db_log_item,
|
|
10
|
+
save_db_payload,
|
|
11
|
+
)
|
|
7
12
|
|
|
8
13
|
|
|
9
14
|
def _completed_response(
|
|
15
|
+
tasks_data: dict,
|
|
10
16
|
progress_data: dict,
|
|
11
17
|
campaign_id: str,
|
|
12
18
|
user_id: str,
|
|
13
19
|
) -> JSONResponse:
|
|
14
20
|
"""Build a completed response with progress, time, and token."""
|
|
15
21
|
user_progress = progress_data[campaign_id][user_id]
|
|
16
|
-
|
|
17
|
-
is_ok = True
|
|
22
|
+
is_ok = check_validation_threshold(tasks_data, progress_data, campaign_id, user_id)
|
|
18
23
|
return JSONResponse(
|
|
19
24
|
content={
|
|
20
25
|
"status": "completed",
|
|
@@ -161,7 +166,7 @@ def get_next_item_taskbased(
|
|
|
161
166
|
"""
|
|
162
167
|
user_progress = progress_data[campaign_id][user_id]
|
|
163
168
|
if all(user_progress["progress"]):
|
|
164
|
-
return _completed_response(progress_data, campaign_id, user_id)
|
|
169
|
+
return _completed_response(data_all, progress_data, campaign_id, user_id)
|
|
165
170
|
|
|
166
171
|
# find first incomplete item
|
|
167
172
|
item_i = min([i for i, v in enumerate(user_progress["progress"]) if not v])
|
|
@@ -208,7 +213,7 @@ def get_next_item_singlestream(
|
|
|
208
213
|
progress = user_progress["progress"]
|
|
209
214
|
|
|
210
215
|
if all(progress):
|
|
211
|
-
return _completed_response(progress_data, campaign_id, user_id)
|
|
216
|
+
return _completed_response(data_all, progress_data, campaign_id, user_id)
|
|
212
217
|
|
|
213
218
|
# find a random incomplete item
|
|
214
219
|
incomplete_indices = [i for i, v in enumerate(progress) if not v]
|
|
@@ -261,20 +266,33 @@ def reset_task(
|
|
|
261
266
|
) -> JSONResponse:
|
|
262
267
|
"""
|
|
263
268
|
Reset the task progress for the user in the specified campaign.
|
|
269
|
+
Saves a reset marker to mask existing annotations.
|
|
264
270
|
"""
|
|
265
271
|
assignment = tasks_data[campaign_id]["info"]["assignment"]
|
|
266
272
|
if assignment == "task-based":
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
)
|
|
273
|
+
# Save reset marker for this user to mask existing annotations
|
|
274
|
+
num_items = len(tasks_data[campaign_id]["data"][user_id])
|
|
275
|
+
for item_i in range(num_items):
|
|
276
|
+
save_db_payload(campaign_id, {
|
|
277
|
+
"user_id": user_id,
|
|
278
|
+
"item_i": item_i,
|
|
279
|
+
"annotations": RESET_MARKER
|
|
280
|
+
})
|
|
281
|
+
progress_data[campaign_id][user_id]["progress"] = [False] * num_items
|
|
270
282
|
_reset_user_time(progress_data, campaign_id, user_id)
|
|
271
283
|
return JSONResponse(content={"status": "ok"}, status_code=200)
|
|
272
284
|
elif assignment == "single-stream":
|
|
285
|
+
# Save reset markers for all items (shared pool)
|
|
286
|
+
num_items = len(tasks_data[campaign_id]["data"])
|
|
287
|
+
for item_i in range(num_items):
|
|
288
|
+
save_db_payload(campaign_id, {
|
|
289
|
+
"user_id": None,
|
|
290
|
+
"item_i": item_i,
|
|
291
|
+
"annotations": RESET_MARKER
|
|
292
|
+
})
|
|
273
293
|
# for single-stream reset all progress
|
|
274
294
|
for uid in progress_data[campaign_id]:
|
|
275
|
-
progress_data[campaign_id][uid]["progress"] =
|
|
276
|
-
[False]*len(tasks_data[campaign_id]["data"])
|
|
277
|
-
)
|
|
295
|
+
progress_data[campaign_id][uid]["progress"] = [False] * num_items
|
|
278
296
|
_reset_user_time(progress_data, campaign_id, user_id)
|
|
279
297
|
return JSONResponse(content={"status": "ok"}, status_code=200)
|
|
280
298
|
else:
|