pearmut 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: pearmut
3
+ Version: 0.0.1
4
+ Summary: A tool for evaluation of model outputs, primarily MT.
5
+ Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
+ License: MIT
7
+ Project-URL: Repository, https://github.com/zouharvi/pearmut
8
+ Project-URL: Issues, https://github.com/zouharvi/pearmut/issues
9
+ Keywords: evaluation,machine translation,human evaluation,annotation
10
+ Requires-Python: >=3.12
11
+ Description-Content-Type: text/markdown
12
+ Requires-Dist: fastapi>=0.110.0
13
+ Requires-Dist: uvicorn>=0.29.0
14
+ Requires-Dist: wonderwords>=3.0.0
15
+ Requires-Dist: pynpm>=0.3.0
16
+ Provides-Extra: dev
17
+ Requires-Dist: pytest; extra == "dev"
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: pearmut
3
+ Version: 0.0.1
4
+ Summary: A tool for evaluation of model outputs, primarily MT.
5
+ Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
+ License: MIT
7
+ Project-URL: Repository, https://github.com/zouharvi/pearmut
8
+ Project-URL: Issues, https://github.com/zouharvi/pearmut/issues
9
+ Keywords: evaluation,machine translation,human evaluation,annotation
10
+ Requires-Python: >=3.12
11
+ Description-Content-Type: text/markdown
12
+ Requires-Dist: fastapi>=0.110.0
13
+ Requires-Dist: uvicorn>=0.29.0
14
+ Requires-Dist: wonderwords>=3.0.0
15
+ Requires-Dist: pynpm>=0.3.0
16
+ Provides-Extra: dev
17
+ Requires-Dist: pytest; extra == "dev"
@@ -0,0 +1,12 @@
1
+ pyproject.toml
2
+ pearmut.egg-info/PKG-INFO
3
+ pearmut.egg-info/SOURCES.txt
4
+ pearmut.egg-info/dependency_links.txt
5
+ pearmut.egg-info/entry_points.txt
6
+ pearmut.egg-info/requires.txt
7
+ pearmut.egg-info/top_level.txt
8
+ server/cli.py
9
+ server/model.py
10
+ server/protocols.py
11
+ server/run.py
12
+ server/utils.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ pearmut = pearmut.cli:main
@@ -0,0 +1,7 @@
1
+ fastapi>=0.110.0
2
+ uvicorn>=0.29.0
3
+ wonderwords>=3.0.0
4
+ pynpm>=0.3.0
5
+
6
+ [dev]
7
+ pytest
@@ -0,0 +1 @@
1
+ pearmut
@@ -0,0 +1,43 @@
1
+ [project]
2
+ name = "pearmut"
3
+ version = "0.0.1"
4
+ description = "A tool for evaluation of model outputs, primarily MT."
5
+ readme = "README.md"
6
+ license = { text = "MIT" }
7
+ requires-python = ">=3.12"
8
+ authors = [{ name = "Vilém Zouhar", email = "vilem.zouhar@gmail.com" }]
9
+ keywords = [
10
+ "evaluation",
11
+ "machine translation",
12
+ "human evaluation",
13
+ "annotation",
14
+ ]
15
+ dependencies = [
16
+ "fastapi >= 0.110.0",
17
+ "uvicorn >= 0.29.0",
18
+ "wonderwords >= 3.0.0",
19
+ "pynpm >= 0.3.0",
20
+ ]
21
+
22
+ [project.optional-dependencies]
23
+ dev = ["pytest"]
24
+
25
+ [project.scripts]
26
+ pearmut = "pearmut.cli:main"
27
+
28
+ [project.urls]
29
+ Repository = "https://github.com/zouharvi/pearmut"
30
+ Issues = "https://github.com/zouharvi/pearmut/issues"
31
+
32
+ [tool.setuptools]
33
+ package-dir = { "pearmut" = "server" }
34
+ packages = ["pearmut"]
35
+
36
+ [build-system]
37
+ requires = ["setuptools>=61.0", "wheel"]
38
+ build-backend = "setuptools.build_meta"
39
+
40
+ # python3 -m build
41
+ # python3 -m twine upload dist/* -u __token__
42
+
43
+ # pip install -e src/ --config-settings editable_mode=strict
@@ -0,0 +1,150 @@
1
+ import argparse
2
+ import hashlib
3
+ import json
4
+ import os
5
+ import urllib.parse
6
+
7
+ from .utils import ROOT, load_progress_data
8
+
9
+ os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
10
+ load_progress_data(warn=None)
11
+
12
+
13
+ def _run():
14
+ import uvicorn
15
+
16
+ from .run import app
17
+ uvicorn.run(
18
+ app,
19
+ host="127.0.0.1",
20
+ port=8001,
21
+ # reload=reload_enabled,
22
+ # log_level="info",
23
+ # app_dir="src",
24
+ # factory=False # factory=False means it expects 'app' to be a variable
25
+ )
26
+
27
+
28
+ def _add_campaign(args_unknown):
29
+ import argparse
30
+ import random
31
+
32
+ import wonderwords
33
+
34
+ args = argparse.ArgumentParser()
35
+ args.add_argument('data_file', type=str,
36
+ help='Path to the campaign data file')
37
+ args.add_argument("-o", "--overwrite", action="store_true",
38
+ help="Overwrite existing campaign if it exists")
39
+ args = args.parse_args(args_unknown)
40
+
41
+ with open(args.data_file, 'r') as f:
42
+ campaign_data = json.load(f)
43
+
44
+ with open(f"{ROOT}/data/progress.json", "r") as f:
45
+ progress_data = json.load(f)
46
+
47
+ if campaign_data['campaign_id'] in progress_data and not args.overwrite:
48
+ print(
49
+ f"Campaign {campaign_data['campaign_id']} already exists.",
50
+ "Use -o to overwrite."
51
+ )
52
+ exit(1)
53
+
54
+ # use random words for identifying users
55
+ rng = random.Random(campaign_data["campaign_id"])
56
+ rword = wonderwords.RandomWord(rng=rng)
57
+ if campaign_data["info"]["type"] == "task-based":
58
+ tasks = campaign_data["data"]
59
+ amount = len(tasks)
60
+ elif campaign_data["info"]["type"] == "dynamic":
61
+ amount = campaign_data["num_users"]
62
+ else:
63
+ raise ValueError(
64
+ f"Unknown campaign type: {campaign_data["info"]['type']}")
65
+
66
+ user_ids = []
67
+ while len(user_ids) < amount:
68
+ new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
69
+ if new_id not in user_ids:
70
+ user_ids.append(new_id)
71
+ user_ids = [
72
+ f"{user_id}-{rng.randint(0, 999):03d}"
73
+ for user_id in user_ids
74
+ ]
75
+
76
+ server_url = campaign_data["info"].get(
77
+ "url",
78
+ "127.0.0.1:8001", # by default local server
79
+ ).removesuffix("/")
80
+
81
+ campaign_data["data"] = {
82
+ user_id: task
83
+ for user_id, task in zip(user_ids, tasks)
84
+ }
85
+
86
+ # generate a token for dashboard access if not present
87
+ if "token" not in campaign_data:
88
+ campaign_data["token"] = (
89
+ hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
90
+ )
91
+
92
+ user_progress = {
93
+ user_id: {
94
+ "progress": [False]*len(campaign_data["data"][user_id]) if campaign_data["info"]["type"] == "task-based" else [],
95
+ "time_start": None,
96
+ "time_end": None,
97
+ "time": 0,
98
+ "url": (
99
+ f"{server_url}/{campaign_data["info"]["template"]}.html"
100
+ f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
101
+ f"&user_id={user_id}"
102
+ ),
103
+ "token_correct": hashlib.sha256(random.randbytes(16)).hexdigest()[:10],
104
+ "token_incorrect": hashlib.sha256(random.randbytes(16)).hexdigest()[:10],
105
+ }
106
+ for user_id in user_ids
107
+ }
108
+
109
+ with open(f"{ROOT}/data/tasks/{campaign_data['campaign_id']}.json", "w") as f:
110
+ json.dump(campaign_data, f, indent=2, ensure_ascii=False)
111
+
112
+ progress_data[campaign_data['campaign_id']] = user_progress
113
+
114
+ with open(f"{ROOT}/data/progress.json", "w") as f:
115
+ json.dump(progress_data, f, indent=2, ensure_ascii=False)
116
+
117
+ print(
118
+ f"{server_url}/dashboard.html"
119
+ f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
120
+ f"&token={campaign_data['token']}"
121
+ )
122
+ print("-"*10)
123
+ for user_id, user_val in user_progress.items():
124
+ # point to the protocol URL
125
+ print(user_val["url"])
126
+
127
+
128
+ def main():
129
+ args = argparse.ArgumentParser()
130
+ args.add_argument('command', type=str, choices=['run', 'add', 'purge'])
131
+ args, args_unknown = args.parse_known_args()
132
+
133
+ if args.command == 'run':
134
+ _run()
135
+ elif args.command == 'add':
136
+ _add_campaign(args_unknown)
137
+ elif args.command == 'purge':
138
+ import shutil
139
+
140
+ confirm = input(
141
+ "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n]"
142
+ )
143
+ if confirm.lower() == 'y':
144
+ shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
145
+ shutil.rmtree(f"{ROOT}/data/outputs", ignore_errors=True)
146
+ if os.path.exists(f"{ROOT}/data/progress.json"):
147
+ os.remove(f"{ROOT}/data/progress.json")
148
+ print("All campaign data purged.")
149
+ else:
150
+ print("Cancelled.")
@@ -0,0 +1,61 @@
1
+ # ruff: noqa
2
+
3
+ raise Exception("Deprecated")
4
+ """
5
+ See scripts/models.py for a list of possible competition models.
6
+ """
7
+
8
+ import json
9
+ import os
10
+ import random
11
+
12
+ sys1, sys2 = random.sample(systems, 2)
13
+ segment_registry[(sys1, sys2)] += 1
14
+ # TODO: handle overflow better
15
+ if segment_registry[(sys1, sys2)] >= len(data):
16
+ segment_registry[(sys1, sys2)] = 0
17
+
18
+ line = data[segment_registry[(sys1, sys2)]]
19
+
20
+ texts = [highlight_differences(a, b) for a, b in zip(
21
+ line["tgt_text"][sys1],
22
+ line["tgt_text"][sys2],
23
+ )]
24
+
25
+ return JSONResponse(content={
26
+ "doc_id": line["doc_id"],
27
+ # TODO: this is not good sentence splitting
28
+ "src": [line.replace(". ", ".<br><br>") for line in line["src_text"]],
29
+ "sys_a": sys1,
30
+ "out_a": [line_a.replace(". ", ".<br><br>") for line_a, line_b in texts],
31
+ "sys_b": sys2,
32
+ "out_b": [line_b.replace(". ", ".<br><br>") for line_a, line_b in texts],
33
+ })
34
+
35
+ class CompetitionModel():
36
+ def __init__(self, systems):
37
+ if os.path.exists("data/model_elo.json"):
38
+ with open("data/model_elo.json", "r") as f:
39
+ self.scores = json.load(f)
40
+ else:
41
+ print("Initializing new ELO model")
42
+ self.scores = {sys: [] for sys in systems}
43
+
44
+ def system_score(self, sys):
45
+ out = 1000
46
+ for opponent, result in self.scores[sys]:
47
+ out += opponent + result
48
+ return out/len(self.scores[sys]) if self.scores[sys] else out
49
+
50
+ def future_information(self, sys1, sys2):
51
+ pass
52
+
53
+ def record_result(self, sys1, sys2, result):
54
+ self.scores[sys1].append((self.system_score(sys2), 1600*result - 800))
55
+ self.scores[sys2].append((self.system_score(sys1), 1600*(1-result) - 800))
56
+
57
+ self.save()
58
+
59
+ def save(self):
60
+ with open("data/model_elo.json", "w") as f:
61
+ json.dump(self.scores, f)
@@ -0,0 +1,108 @@
1
+ from typing import Any
2
+ from fastapi.responses import JSONResponse
3
+
4
+
5
+ def get_next_item(
6
+ campaign_id: str,
7
+ user_id: str,
8
+ tasks_data: dict,
9
+ progress_data: dict,
10
+ ) -> JSONResponse:
11
+ if tasks_data[campaign_id]["info"]["type"] == "task-based":
12
+ return get_next_item_taskbased(campaign_id, user_id, tasks_data, progress_data)
13
+ elif tasks_data[campaign_id]["info"]["type"] == "dynamic":
14
+ return get_next_item_dynamic(campaign_id, user_id, tasks_data, progress_data)
15
+ else:
16
+ return JSONResponse(content={"error": "Unknown campaign type"}, status_code=400)
17
+
18
+
19
+ def get_next_item_taskbased(
20
+ campaign_id: str,
21
+ user_id: str,
22
+ data_all: dict,
23
+ progress_data: dict,
24
+ ) -> JSONResponse:
25
+ if all(progress_data[campaign_id][user_id]["progress"]):
26
+ # all items completed
27
+ # TODO: add check for data quality
28
+ is_ok = True
29
+ return JSONResponse(
30
+ content={
31
+ "status": "completed",
32
+ "progress": {
33
+ "completed": sum(progress_data[campaign_id][user_id]["progress"]),
34
+ "time": progress_data[campaign_id][user_id]["time"],
35
+ "total": len(data_all[campaign_id]["data"][user_id]),
36
+ },
37
+ "token": progress_data[campaign_id][user_id]["token_correct" if is_ok else "token_incorrect"],
38
+ },
39
+ status_code=200
40
+ )
41
+
42
+ # find first incomplete item
43
+ item_i = min([i for i, v in enumerate(progress_data[campaign_id][user_id]["progress"]) if not v])
44
+ return JSONResponse(
45
+ content={
46
+ "status": "ok",
47
+ "progress": {
48
+ "completed": sum(progress_data[campaign_id][user_id]["progress"]),
49
+ "time": progress_data[campaign_id][user_id]["time"],
50
+ "total": len(data_all[campaign_id]["data"][user_id]),
51
+ },
52
+ "info": {
53
+ "status_message": data_all[campaign_id]["info"].get("status_message", ""),
54
+ "item_i": item_i,
55
+ } | {
56
+ k: v
57
+ for k, v in data_all[campaign_id]["info"].items()
58
+ if k.startswith("protocol")
59
+ },
60
+ "payload": data_all[campaign_id]["data"][user_id][item_i]},
61
+ status_code=200
62
+ )
63
+
64
+
65
+ def get_next_item_dynamic(campaign_data: dict, user_id: str, progress_data: dict, data_all: dict):
66
+ raise NotImplementedError("Dynamic protocol is not implemented yet.")
67
+ pass
68
+
69
+
70
+ def reset_task(
71
+ campaign_id: str,
72
+ user_id: str,
73
+ tasks_data: dict,
74
+ progress_data: dict,
75
+ ) -> JSONResponse:
76
+ if tasks_data[campaign_id]["info"]["type"] == "task-based":
77
+ progress_data[campaign_id][user_id]["progress"] = [False]*len(tasks_data[campaign_id]["data"][user_id])
78
+ progress_data[campaign_id][user_id]["time"] = 0.0
79
+ progress_data[campaign_id][user_id]["time_start"] = None
80
+ progress_data[campaign_id][user_id]["time_end"] = None
81
+ return JSONResponse(content={"status": "ok"}, status_code=200)
82
+ else:
83
+ progress_data[campaign_id][user_id]["progress"] = []
84
+ progress_data[campaign_id][user_id]["time"] = 0.0
85
+ progress_data[campaign_id][user_id]["time_start"] = None
86
+ progress_data[campaign_id][user_id]["time_end"] = None
87
+ return JSONResponse(content={"status": "ok"}, status_code=200)
88
+
89
+
90
+
91
+ def log_response(
92
+ campaign_id: str,
93
+ user_id: str,
94
+ tasks_data: dict,
95
+ progress_data: dict,
96
+ item_i: int,
97
+ payload: Any,
98
+ ) -> JSONResponse:
99
+ if tasks_data[campaign_id]["info"]["type"] == "task-based":
100
+ # even if it's already set it should be fine
101
+ progress_data[campaign_id][user_id]["progress"][item_i] = True
102
+ return JSONResponse(content={"status": "ok"}, status_code=200)
103
+ elif tasks_data[campaign_id]["info"]["type"] == "dynamic":
104
+ return JSONResponse(content={"status": "error", "message": "Dynamic protocol logging not implemented yet."}, status_code=400)
105
+ elif tasks_data[campaign_id]["info"]["type"] == "task-single":
106
+ return JSONResponse(content={"status": "error", "message": "Task-single protocol logging not implemented yet."}, status_code=400)
107
+ else:
108
+ return JSONResponse(content={"status": "error", "message": "Unknown campaign type"}, status_code=400)
@@ -0,0 +1,217 @@
1
+ import json
2
+ import os
3
+ import urllib
4
+ from typing import Any
5
+
6
+ from fastapi import FastAPI, Query
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.responses import JSONResponse
9
+ from fastapi.staticfiles import StaticFiles
10
+ from pydantic import BaseModel
11
+ from pynpm import NPMPackage
12
+
13
+ from .protocols import get_next_item, reset_task, log_response
14
+ from .utils import ROOT, load_progress_data, save_progress_data
15
+
16
+ os.makedirs("data/outputs", exist_ok=True)
17
+
18
+ # build frontend
19
+ pkg = NPMPackage('src/web/package.json')
20
+ pkg.install()
21
+ pkg.run_script('build')
22
+
23
+ app = FastAPI()
24
+ app.add_middleware(
25
+ CORSMiddleware,
26
+ allow_origins=["*"],
27
+ allow_credentials=True,
28
+ allow_methods=["*"],
29
+ allow_headers=["*"],
30
+ )
31
+
32
+ tasks_data = {}
33
+ progress_data = load_progress_data(
34
+ warn="No progress.json found. Running, but no campaign will be available.")
35
+
36
+ # load all tasks into data_all
37
+ for campaign_id in progress_data.keys():
38
+ with open(f"{ROOT}/data/tasks/{campaign_id}.json", "r") as f:
39
+ tasks_data[campaign_id] = json.load(f)
40
+
41
+ # print access dashboard URL for all campaigns
42
+ print(
43
+ list(tasks_data.values())[0]["info"]["url"] + "/dashboard.html?" + "&".join([
44
+ f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
45
+ for campaign_id, campaign_data in tasks_data.items()
46
+ ])
47
+ )
48
+
49
+
50
+ class LogResponseRequest(BaseModel):
51
+ campaign_id: str
52
+ user_id: str
53
+ item_i: int
54
+ payload: Any
55
+
56
+
57
+ @app.post("/log-response")
58
+ async def _log_response(request: LogResponseRequest):
59
+ global progress_data
60
+
61
+ campaign_id = request.campaign_id
62
+ user_id = request.user_id
63
+
64
+ if campaign_id not in progress_data:
65
+ return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
66
+ if user_id not in progress_data[campaign_id]:
67
+ return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
68
+
69
+ with open(f"{ROOT}/data/outputs/{campaign_id}.jsonl", "a") as log_file:
70
+ log_file.write(json.dumps(request.payload, ensure_ascii=False) + "\n")
71
+
72
+ # if actions were submitted, we can log time data
73
+ if "actions" in request.payload:
74
+ times = [
75
+ x["time"] for x in request.payload["actions"]
76
+ ]
77
+ if progress_data[campaign_id][user_id]["time_start"] is None:
78
+ progress_data[campaign_id][user_id]["time_start"] = min(times)
79
+ progress_data[campaign_id][user_id]["time_end"] = max(times)
80
+ progress_data[campaign_id][user_id]["time"] += sum([
81
+ min(b - a, 60)
82
+ for a, b in zip(times, times[1:])
83
+ ])
84
+
85
+ log_response(campaign_id, user_id, tasks_data, progress_data, request.item_i, request.payload)
86
+ save_progress_data(progress_data)
87
+
88
+ return JSONResponse(content={"status": "ok"}, status_code=200)
89
+
90
+
91
+ class NextItemRequest(BaseModel):
92
+ campaign_id: str
93
+ user_id: str
94
+
95
+
96
+ @app.post("/get-next-item")
97
+ async def _get_next_item(request: NextItemRequest):
98
+ campaign_id = request.campaign_id
99
+ user_id = request.user_id
100
+
101
+ if campaign_id not in progress_data:
102
+ return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
103
+ if user_id not in progress_data[campaign_id]:
104
+ return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
105
+
106
+ return get_next_item(
107
+ campaign_id,
108
+ user_id,
109
+ tasks_data,
110
+ progress_data,
111
+ )
112
+
113
+
114
+ class DashboardDataRequest(BaseModel):
115
+ campaign_id: str
116
+ token: str | None = None
117
+
118
+
119
+ @app.post("/dashboard-data")
120
+ async def _dashboard_data(request: DashboardDataRequest):
121
+ campaign_id = request.campaign_id
122
+
123
+ is_privileged = (request.token == tasks_data[campaign_id]["token"])
124
+
125
+ if campaign_id not in progress_data:
126
+ return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
127
+
128
+ progress_new = {
129
+ user_id: {
130
+ **user_val,
131
+ "total": len(tasks_data[campaign_id]["data"][user_id]),
132
+ } | (
133
+ # override if not privileged
134
+ {
135
+ "token_correct": None,
136
+ "token_incorrect": None,
137
+ } if not is_privileged else {}
138
+ )
139
+ for user_id, user_val in progress_data[campaign_id].items()
140
+ }
141
+
142
+ return JSONResponse(
143
+ content={
144
+ "status": "ok",
145
+ "data": progress_new
146
+ },
147
+ status_code=200
148
+ )
149
+
150
+
151
+ class ResetTaskRequest(BaseModel):
152
+ campaign_id: str
153
+ user_id: str
154
+ token: str
155
+
156
+
157
+ @app.post("/reset-task")
158
+ async def _reset_task(request: ResetTaskRequest):
159
+ # ruff: noqa: F841
160
+ campaign_id = request.campaign_id
161
+ user_id = request.user_id
162
+ token = request.token
163
+
164
+ if campaign_id not in progress_data:
165
+ return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
166
+ if token != tasks_data[campaign_id]["token"]:
167
+ return JSONResponse(content={"error": "Invalid token"}, status_code=400)
168
+ if user_id not in progress_data[campaign_id]:
169
+ return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
170
+
171
+ response = reset_task(campaign_id, user_id, tasks_data, progress_data)
172
+ save_progress_data(progress_data)
173
+ return response
174
+
175
+
176
+ @app.get("/download-annotations")
177
+ async def _download_annotations(
178
+ campaign_id: list[str] = Query(),
179
+ # NOTE: currently not checking tokens for progress download as it is non-destructive
180
+ # token: list[str] = Query()
181
+ ):
182
+
183
+ output = {}
184
+ for campaign_id in campaign_id:
185
+ output_path = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
186
+ if campaign_id not in progress_data:
187
+ return JSONResponse(content={"error": f"Unknown campaign ID {campaign_id}"}, status_code=400)
188
+ if not os.path.exists(output_path):
189
+ output[campaign_id] = []
190
+ else:
191
+ with open(output_path, "r") as f:
192
+ output[campaign_id] = [json.loads(x) for x in f.readlines()]
193
+
194
+ return JSONResponse(content=output, status_code=200)
195
+
196
+
197
+ @app.get("/download-progress")
198
+ async def _download_progress(
199
+ campaign_id: list[str] = Query(),
200
+ token: list[str] = Query()
201
+ ):
202
+
203
+ if len(campaign_id) != len(token):
204
+ return JSONResponse(content={"error": "Mismatched campaign_id and token count"}, status_code=400)
205
+
206
+ output = {}
207
+ for campaign_id, campaign_id in enumerate(campaign_id):
208
+ if campaign_id not in progress_data:
209
+ return JSONResponse(content={"error": f"Unknown campaign ID {campaign_id}"}, status_code=400)
210
+ if token[campaign_id] != tasks_data[campaign_id]["token"]:
211
+ return JSONResponse(content={"error": f"Invalid token for campaign ID {campaign_id}"}, status_code=400)
212
+
213
+ output[campaign_id] = progress_data[campaign_id]
214
+
215
+ return JSONResponse(content=output, status_code=200)
216
+
217
+ app.mount("/", StaticFiles(directory="src/static", html=True), name="static")
@@ -0,0 +1,48 @@
1
+ import json
2
+ import os
3
+
4
+ ROOT = "."
5
+
6
+ def highlight_differences(a, b):
7
+ """
8
+ Compares two strings and wraps their differences in HTML span tags.
9
+
10
+ Args:
11
+ a: The first string.
12
+ b: The second string.
13
+
14
+ Returns:
15
+ A tuple containing the two strings with their differences highlighted.
16
+ """
17
+ import difflib
18
+ # TODO: maybe on the level of words?
19
+ s = difflib.SequenceMatcher(None, a, b)
20
+ res_a, res_b = [], []
21
+ span_open = '<span class="difference">'
22
+ span_close = '</span>'
23
+
24
+ for tag, i1, i2, j1, j2 in s.get_opcodes():
25
+ if tag == 'equal' or (i2-i1 <= 2 and j2-j1 <= 2):
26
+ res_a.append(a[i1:i2])
27
+ res_b.append(b[j1:j2])
28
+ else:
29
+ if tag in ('replace', 'delete'):
30
+ res_a.append(f"{span_open}{a[i1:i2]}{span_close}")
31
+ if tag in ('replace', 'insert'):
32
+ res_b.append(f"{span_open}{b[j1:j2]}{span_close}")
33
+
34
+ return "".join(res_a), "".join(res_b)
35
+
36
+
37
+ def load_progress_data(warn: str | None = None):
38
+ if not os.path.exists(f"{ROOT}/data/progress.json"):
39
+ if warn is not None:
40
+ print(warn)
41
+ with open(f"{ROOT}/data/progress.json", "w") as f:
42
+ f.write(json.dumps({}))
43
+ with open(f"{ROOT}/data/progress.json", "r") as f:
44
+ return json.load(f)
45
+
46
+ def save_progress_data(data):
47
+ with open(f"{ROOT}/data/progress.json", "w") as f:
48
+ json.dump(data, f, indent=2)
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+