embedkombinat-annotator 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- annotator/__init__.py +6 -0
- annotator/auth.py +236 -0
- annotator/cli.py +134 -0
- annotator/client.py +174 -0
- annotator/config.py +38 -0
- annotator/engine/__init__.py +34 -0
- annotator/engine/base.py +54 -0
- annotator/engine/llama_cpp.py +28 -0
- annotator/engine/mlx.py +87 -0
- annotator/engine/vllm.py +112 -0
- annotator/errors.py +23 -0
- annotator/labeler.py +89 -0
- annotator/resolver.py +229 -0
- annotator/runner.py +258 -0
- embedkombinat_annotator-0.3.0.dist-info/METADATA +203 -0
- embedkombinat_annotator-0.3.0.dist-info/RECORD +19 -0
- embedkombinat_annotator-0.3.0.dist-info/WHEEL +4 -0
- embedkombinat_annotator-0.3.0.dist-info/entry_points.txt +2 -0
- embedkombinat_annotator-0.3.0.dist-info/licenses/LICENSE +190 -0
annotator/__init__.py
ADDED
annotator/auth.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""GitHub OAuth Device Flow and token management.
|
|
2
|
+
|
|
3
|
+
Why device flow: the previous web-flow design ran a localhost callback server
|
|
4
|
+
inside the CLI process, then handed GitHub a `redirect_uri=http://localhost:PORT/callback`
|
|
5
|
+
URI. That works on a single machine where browser and server are colocated
|
|
6
|
+
(your laptop), but breaks the moment the CLI runs on a remote host the user
|
|
7
|
+
SSH'd into (Runpod, Lambda, EC2, etc.) — the user's browser hits localhost
|
|
8
|
+
on *their machine*, not the remote one. The device flow has no callback at
|
|
9
|
+
all: the CLI prints a short user code, the user enters it in any browser on
|
|
10
|
+
any device, the CLI polls GitHub directly. Same auth, no machine-boundary
|
|
11
|
+
assumption.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import contextlib
|
|
17
|
+
import json
|
|
18
|
+
import os
|
|
19
|
+
import stat
|
|
20
|
+
import time
|
|
21
|
+
import webbrowser
|
|
22
|
+
from datetime import UTC, datetime, timedelta
|
|
23
|
+
from typing import TYPE_CHECKING, Any
|
|
24
|
+
|
|
25
|
+
import httpx
|
|
26
|
+
from pydantic import BaseModel
|
|
27
|
+
|
|
28
|
+
from annotator import TEAL
|
|
29
|
+
from annotator.errors import AuthError
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
|
|
34
|
+
from rich.console import Console
|
|
35
|
+
|
|
36
|
+
from annotator.config import Settings
|
|
37
|
+
|
|
38
|
+
AUTH_FILE = "auth.json"
|
|
39
|
+
DEVICE_CODE_URL = "https://github.com/login/device/code"
|
|
40
|
+
TOKEN_URL = "https://github.com/login/oauth/access_token"
|
|
41
|
+
DEFAULT_SCOPE = "read:user"
|
|
42
|
+
EXPIRY_BUFFER = timedelta(minutes=5)
|
|
43
|
+
MIN_POLL_INTERVAL = 5.0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ContributorInfo(BaseModel):
|
|
47
|
+
id: str
|
|
48
|
+
github_username: str
|
|
49
|
+
github_avatar_url: str | None = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class AuthToken(BaseModel):
|
|
53
|
+
kombinat_url: str
|
|
54
|
+
access_token: str
|
|
55
|
+
expires_at: datetime
|
|
56
|
+
contributor: ContributorInfo
|
|
57
|
+
|
|
58
|
+
def is_expired(self) -> bool:
|
|
59
|
+
return datetime.now(tz=UTC) >= (self.expires_at - EXPIRY_BUFFER)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def save_token(token: AuthToken, home: Path) -> None:
|
|
63
|
+
"""Save auth token to disk with restricted permissions."""
|
|
64
|
+
home.mkdir(parents=True, exist_ok=True)
|
|
65
|
+
path = home / AUTH_FILE
|
|
66
|
+
path.write_text(token.model_dump_json(indent=2))
|
|
67
|
+
os.chmod(path, stat.S_IRUSR | stat.S_IWUSR)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def load_token(home: Path) -> AuthToken | None:
|
|
71
|
+
"""Load auth token from disk. Returns None if missing or expired."""
|
|
72
|
+
path = home / AUTH_FILE
|
|
73
|
+
if not path.exists():
|
|
74
|
+
return None
|
|
75
|
+
try:
|
|
76
|
+
token = AuthToken.model_validate_json(path.read_text())
|
|
77
|
+
except (json.JSONDecodeError, ValueError):
|
|
78
|
+
return None
|
|
79
|
+
if token.is_expired():
|
|
80
|
+
return None
|
|
81
|
+
return token
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def delete_token(home: Path) -> None:
|
|
85
|
+
"""Remove auth token from disk."""
|
|
86
|
+
path = home / AUTH_FILE
|
|
87
|
+
if path.exists():
|
|
88
|
+
path.unlink()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def fetch_client_id(kombinat_url: str) -> str:
|
|
92
|
+
"""Fetch the public GitHub OAuth client_id from kombinat."""
|
|
93
|
+
try:
|
|
94
|
+
with httpx.Client() as client:
|
|
95
|
+
resp = client.get(f"{kombinat_url}/v1/auth/config", timeout=10.0)
|
|
96
|
+
except httpx.HTTPError as exc:
|
|
97
|
+
raise AuthError(f"could not reach kombinat at {kombinat_url}: {exc}") from exc
|
|
98
|
+
if resp.status_code != 200:
|
|
99
|
+
raise AuthError(f"kombinat auth config fetch failed: {resp.status_code} {resp.text}")
|
|
100
|
+
client_id = resp.json().get("client_id")
|
|
101
|
+
if not isinstance(client_id, str) or not client_id:
|
|
102
|
+
raise AuthError("kombinat returned empty client_id — server is misconfigured")
|
|
103
|
+
return client_id
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def request_device_code(client_id: str) -> dict[str, Any]:
|
|
107
|
+
"""Ask GitHub for a device code + user code. Returns the full response payload."""
|
|
108
|
+
try:
|
|
109
|
+
with httpx.Client() as client:
|
|
110
|
+
resp = client.post(
|
|
111
|
+
DEVICE_CODE_URL,
|
|
112
|
+
data={"client_id": client_id, "scope": DEFAULT_SCOPE},
|
|
113
|
+
headers={"Accept": "application/json"},
|
|
114
|
+
timeout=10.0,
|
|
115
|
+
)
|
|
116
|
+
except httpx.HTTPError as exc:
|
|
117
|
+
raise AuthError(f"could not reach GitHub: {exc}") from exc
|
|
118
|
+
if resp.status_code != 200:
|
|
119
|
+
raise AuthError(f"device code request failed: {resp.status_code} {resp.text}")
|
|
120
|
+
data: dict[str, Any] = resp.json()
|
|
121
|
+
required = {"device_code", "user_code", "verification_uri", "expires_in", "interval"}
|
|
122
|
+
if not required.issubset(data):
|
|
123
|
+
raise AuthError(
|
|
124
|
+
"device code response is missing required fields. "
|
|
125
|
+
"This usually means the OAuth app does not have Device Flow enabled — "
|
|
126
|
+
"check 'Enable Device Flow' in the OAuth app settings on GitHub."
|
|
127
|
+
)
|
|
128
|
+
return data
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def poll_for_access_token(
|
|
132
|
+
client_id: str,
|
|
133
|
+
device_code: str,
|
|
134
|
+
interval: float,
|
|
135
|
+
expires_in: float,
|
|
136
|
+
) -> str:
|
|
137
|
+
"""Poll GitHub's token endpoint until the user authorizes. Returns the access token."""
|
|
138
|
+
deadline = time.monotonic() + expires_in
|
|
139
|
+
poll_interval = max(interval, MIN_POLL_INTERVAL)
|
|
140
|
+
|
|
141
|
+
while time.monotonic() < deadline:
|
|
142
|
+
time.sleep(poll_interval)
|
|
143
|
+
try:
|
|
144
|
+
with httpx.Client() as client:
|
|
145
|
+
resp = client.post(
|
|
146
|
+
TOKEN_URL,
|
|
147
|
+
data={
|
|
148
|
+
"client_id": client_id,
|
|
149
|
+
"device_code": device_code,
|
|
150
|
+
"grant_type": "urn:ietf:params:oauth:grant-type:device_code",
|
|
151
|
+
},
|
|
152
|
+
headers={"Accept": "application/json"},
|
|
153
|
+
timeout=10.0,
|
|
154
|
+
)
|
|
155
|
+
except httpx.HTTPError as exc:
|
|
156
|
+
raise AuthError(f"polling GitHub failed: {exc}") from exc
|
|
157
|
+
|
|
158
|
+
data = resp.json()
|
|
159
|
+
if "access_token" in data:
|
|
160
|
+
return str(data["access_token"])
|
|
161
|
+
|
|
162
|
+
error = data.get("error")
|
|
163
|
+
if error == "authorization_pending":
|
|
164
|
+
continue
|
|
165
|
+
if error == "slow_down":
|
|
166
|
+
poll_interval += 5.0
|
|
167
|
+
continue
|
|
168
|
+
if error == "expired_token":
|
|
169
|
+
raise AuthError("device code expired before authorization completed")
|
|
170
|
+
if error == "access_denied":
|
|
171
|
+
raise AuthError("authorization was denied")
|
|
172
|
+
raise AuthError(f"unexpected error during device flow: {data}")
|
|
173
|
+
|
|
174
|
+
raise AuthError(f"device flow timed out after {expires_in:.0f}s")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def exchange_github_token(github_access_token: str, kombinat_url: str) -> AuthToken:
|
|
178
|
+
"""Exchange a GitHub access token for a kombinat JWT."""
|
|
179
|
+
try:
|
|
180
|
+
with httpx.Client() as client:
|
|
181
|
+
resp = client.post(
|
|
182
|
+
f"{kombinat_url}/v1/auth/github-device",
|
|
183
|
+
json={"access_token": github_access_token},
|
|
184
|
+
timeout=30.0,
|
|
185
|
+
)
|
|
186
|
+
except httpx.HTTPError as exc:
|
|
187
|
+
raise AuthError(f"could not reach kombinat at {kombinat_url}: {exc}") from exc
|
|
188
|
+
if resp.status_code == 401:
|
|
189
|
+
raise AuthError("kombinat rejected the GitHub access token")
|
|
190
|
+
if resp.status_code != 200:
|
|
191
|
+
raise AuthError(f"kombinat auth failed: {resp.status_code} {resp.text}")
|
|
192
|
+
data = resp.json()
|
|
193
|
+
expires_at = datetime.now(tz=UTC) + timedelta(seconds=data["expires_in"])
|
|
194
|
+
contributor_data = data["contributor"]
|
|
195
|
+
return AuthToken(
|
|
196
|
+
kombinat_url=kombinat_url,
|
|
197
|
+
access_token=data["access_token"],
|
|
198
|
+
expires_at=expires_at,
|
|
199
|
+
contributor=ContributorInfo(
|
|
200
|
+
id=contributor_data["id"],
|
|
201
|
+
github_username=contributor_data["github_username"],
|
|
202
|
+
github_avatar_url=contributor_data.get("github_avatar_url"),
|
|
203
|
+
),
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def login(settings: Settings, console: Console) -> AuthToken:
|
|
208
|
+
"""Run the GitHub Device Flow and exchange the access token for a kombinat JWT."""
|
|
209
|
+
console.print(" No credentials found. Starting login...\n")
|
|
210
|
+
|
|
211
|
+
client_id = fetch_client_id(settings.kombinat_url)
|
|
212
|
+
device_data = request_device_code(client_id)
|
|
213
|
+
|
|
214
|
+
user_code = device_data["user_code"]
|
|
215
|
+
verification_uri = device_data["verification_uri"]
|
|
216
|
+
device_code = device_data["device_code"]
|
|
217
|
+
interval = float(device_data["interval"])
|
|
218
|
+
expires_in = float(device_data["expires_in"])
|
|
219
|
+
|
|
220
|
+
console.print(f" -> Open in any browser: [bold]{verification_uri}[/bold]")
|
|
221
|
+
console.print(f" -> Enter code: [bold {TEAL}]{user_code}[/bold {TEAL}]\n")
|
|
222
|
+
|
|
223
|
+
# Best-effort browser open as a convenience on machines that have one;
|
|
224
|
+
# silently a no-op on headless hosts (Runpod, etc.) which is the whole
|
|
225
|
+
# point of the device flow.
|
|
226
|
+
with contextlib.suppress(Exception):
|
|
227
|
+
webbrowser.open(verification_uri)
|
|
228
|
+
|
|
229
|
+
console.print(" -> Waiting for authorization...\n")
|
|
230
|
+
|
|
231
|
+
github_token = poll_for_access_token(client_id, device_code, interval, expires_in)
|
|
232
|
+
token = exchange_github_token(github_token, settings.kombinat_url)
|
|
233
|
+
save_token(token, settings.annotator_home)
|
|
234
|
+
|
|
235
|
+
console.print(f" [{TEAL}]✓[/{TEAL}] Authenticated as {token.contributor.github_username}")
|
|
236
|
+
return token
|
annotator/cli.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""CLI interface for annotator."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Annotated
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.panel import Panel
|
|
10
|
+
from rich.text import Text
|
|
11
|
+
|
|
12
|
+
from annotator import AMBER, TEAL, __version__
|
|
13
|
+
from annotator.config import ExitCode, Settings
|
|
14
|
+
|
|
15
|
+
app = typer.Typer(
|
|
16
|
+
name="annotator",
|
|
17
|
+
no_args_is_help=False,
|
|
18
|
+
invoke_without_command=True,
|
|
19
|
+
add_completion=False,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _print_banner(console: Console) -> None:
|
|
24
|
+
"""Print the EmbedKombinat branded header."""
|
|
25
|
+
logo = Text()
|
|
26
|
+
logo.append(" EEEEE K K\n", style=f"bold {TEAL}")
|
|
27
|
+
logo.append(" E K K\n", style=f"bold {TEAL}")
|
|
28
|
+
logo.append(" EEEE KKK\n", style=f"bold {TEAL}")
|
|
29
|
+
logo.append(" E K K\n", style=f"bold {TEAL}")
|
|
30
|
+
logo.append(" EEEEE K K\n", style=f"bold {TEAL}")
|
|
31
|
+
|
|
32
|
+
title = f"embed kombinat \u00b7 annotator v{__version__}"
|
|
33
|
+
console.print()
|
|
34
|
+
console.print(logo, end="")
|
|
35
|
+
console.print(Panel(title, style=f"bold {TEAL}", width=len(title) + 6))
|
|
36
|
+
console.print()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@app.callback(invoke_without_command=True)
|
|
40
|
+
def main(
|
|
41
|
+
ctx: typer.Context,
|
|
42
|
+
batch_size: Annotated[
|
|
43
|
+
int | None, typer.Option("--batch-size", help="Pairs per batch claimed from kombinat")
|
|
44
|
+
] = None,
|
|
45
|
+
model: Annotated[
|
|
46
|
+
str | None, typer.Option("--model", help="Override auto-selected model (HuggingFace ID)")
|
|
47
|
+
] = None,
|
|
48
|
+
quantization: Annotated[
|
|
49
|
+
str | None, typer.Option("--quantization", help="Override quantization (awq, fp16, etc.)")
|
|
50
|
+
] = None,
|
|
51
|
+
backend: Annotated[
|
|
52
|
+
str | None, typer.Option("--backend", help="Override backend (vllm, mlx, llama_cpp)")
|
|
53
|
+
] = None,
|
|
54
|
+
gpu_memory_utilization: Annotated[
|
|
55
|
+
float, typer.Option("--gpu-memory-utilization", help="GPU memory fraction (vLLM only)")
|
|
56
|
+
] = 0.9,
|
|
57
|
+
dry_run: Annotated[
|
|
58
|
+
bool, typer.Option("--dry-run", help="Process one pair without submitting")
|
|
59
|
+
] = False,
|
|
60
|
+
) -> None:
|
|
61
|
+
"""Start the labeling loop. Default command."""
|
|
62
|
+
if ctx.invoked_subcommand is not None:
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
console = Console()
|
|
66
|
+
_print_banner(console)
|
|
67
|
+
|
|
68
|
+
from annotator.runner import AnnotatorRunner
|
|
69
|
+
|
|
70
|
+
settings = Settings()
|
|
71
|
+
runner = AnnotatorRunner(settings, console)
|
|
72
|
+
exit_code = runner.run(
|
|
73
|
+
batch_size=batch_size if batch_size is not None else settings.batch_size,
|
|
74
|
+
model_override=model,
|
|
75
|
+
quantization_override=quantization,
|
|
76
|
+
backend_override=backend,
|
|
77
|
+
gpu_memory_utilization=gpu_memory_utilization,
|
|
78
|
+
dry_run=dry_run,
|
|
79
|
+
)
|
|
80
|
+
raise typer.Exit(code=exit_code)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@app.command()
|
|
84
|
+
def login() -> None:
|
|
85
|
+
"""Authenticate with GitHub."""
|
|
86
|
+
from annotator import auth
|
|
87
|
+
|
|
88
|
+
console = Console()
|
|
89
|
+
settings = Settings()
|
|
90
|
+
try:
|
|
91
|
+
token = auth.login(settings, console)
|
|
92
|
+
console.print(
|
|
93
|
+
f" [{TEAL}]\u2713[/{TEAL}] Authenticated as {token.contributor.github_username}"
|
|
94
|
+
)
|
|
95
|
+
except Exception as e:
|
|
96
|
+
console.print(f" [{AMBER}]\u2717[/{AMBER}] Login failed: {e}")
|
|
97
|
+
raise typer.Exit(code=ExitCode.AUTH_FAILURE) from e
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@app.command()
|
|
101
|
+
def status() -> None:
|
|
102
|
+
"""Show contributor profile and stats."""
|
|
103
|
+
from annotator import auth
|
|
104
|
+
from annotator.client import KombinatClient
|
|
105
|
+
|
|
106
|
+
console = Console()
|
|
107
|
+
settings = Settings()
|
|
108
|
+
token = auth.load_token(settings.annotator_home)
|
|
109
|
+
if token is None:
|
|
110
|
+
console.print(f" [{AMBER}]Not logged in.[/{AMBER}] Run 'annotator login'.")
|
|
111
|
+
raise typer.Exit(code=ExitCode.AUTH_FAILURE)
|
|
112
|
+
|
|
113
|
+
client = KombinatClient(token.kombinat_url, token.access_token)
|
|
114
|
+
try:
|
|
115
|
+
profile = client.get_profile()
|
|
116
|
+
console.print(f" [{TEAL}]\u2713[/{TEAL}] Logged in as {profile.github_username}")
|
|
117
|
+
console.print(f" Total annotations: {profile.total_annotations}")
|
|
118
|
+
console.print(f" Reputation score: {profile.reputation_score:.2f}")
|
|
119
|
+
except Exception as e:
|
|
120
|
+
console.print(f" [{AMBER}]\u2717[/{AMBER}] Failed to fetch status: {e}")
|
|
121
|
+
raise typer.Exit(code=ExitCode.KOMBINAT_UNREACHABLE) from e
|
|
122
|
+
finally:
|
|
123
|
+
client.close()
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@app.command()
|
|
127
|
+
def logout() -> None:
|
|
128
|
+
"""Remove stored credentials."""
|
|
129
|
+
from annotator import auth
|
|
130
|
+
|
|
131
|
+
console = Console()
|
|
132
|
+
settings = Settings()
|
|
133
|
+
auth.delete_token(settings.annotator_home)
|
|
134
|
+
console.print(f" [{TEAL}]\u2713[/{TEAL}] Logged out.")
|
annotator/client.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""HTTP client for kombinat API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import random
|
|
7
|
+
import time
|
|
8
|
+
from datetime import datetime # noqa: TC003 - needed at runtime by Pydantic
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
|
|
13
|
+
from annotator.errors import AuthError, KombinatError
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
MAX_RETRIES = 3
|
|
18
|
+
MIN_RETRY_DELAY = 1.0
|
|
19
|
+
MAX_RETRY_DELAY = 30.0
|
|
20
|
+
NO_PAIRS_INITIAL_WAIT = 30.0
|
|
21
|
+
NO_PAIRS_MAX_WAIT = 600.0
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class PairData(BaseModel):
|
|
25
|
+
pair_id: str
|
|
26
|
+
query_text: str
|
|
27
|
+
doc_text: str
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class BatchResponse(BaseModel):
|
|
31
|
+
batch_id: str
|
|
32
|
+
expires_at: datetime
|
|
33
|
+
pairs: list[PairData]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class AnnotationPayload(BaseModel):
|
|
37
|
+
pair_id: str
|
|
38
|
+
label: int
|
|
39
|
+
input_tokens: int
|
|
40
|
+
output_tokens: int
|
|
41
|
+
raw_response_hash: str
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class AnnotationSubmission(BaseModel):
|
|
45
|
+
batch_id: str
|
|
46
|
+
model_id: str
|
|
47
|
+
quantization: str
|
|
48
|
+
annotations: list[AnnotationPayload]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class AnnotationResult(BaseModel):
|
|
52
|
+
accepted: int
|
|
53
|
+
rejected: int
|
|
54
|
+
honeypot_accuracy: float | None = None
|
|
55
|
+
pairs_verified: int = 0
|
|
56
|
+
contributor_tokens: dict[str, int] = {}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ContributorProfile(BaseModel):
|
|
60
|
+
id: str
|
|
61
|
+
github_username: str
|
|
62
|
+
github_avatar_url: str | None = None
|
|
63
|
+
total_annotations: int = 0
|
|
64
|
+
reputation_score: float = 0.0
|
|
65
|
+
total_input_tokens: int = 0
|
|
66
|
+
total_output_tokens: int = 0
|
|
67
|
+
created_at: datetime | None = None
|
|
68
|
+
last_seen_at: datetime | None = None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class NoPairsBackoff:
|
|
72
|
+
"""Tracks consecutive 204s and computes wait duration."""
|
|
73
|
+
|
|
74
|
+
def __init__(self) -> None:
|
|
75
|
+
self._consecutive_empty = 0
|
|
76
|
+
|
|
77
|
+
def wait_duration(self) -> float:
|
|
78
|
+
"""Get the next wait duration in seconds."""
|
|
79
|
+
duration = NO_PAIRS_INITIAL_WAIT * (2**self._consecutive_empty)
|
|
80
|
+
result: float = min(duration, NO_PAIRS_MAX_WAIT)
|
|
81
|
+
return result
|
|
82
|
+
|
|
83
|
+
def record_empty(self) -> None:
|
|
84
|
+
self._consecutive_empty += 1
|
|
85
|
+
|
|
86
|
+
def reset(self) -> None:
|
|
87
|
+
self._consecutive_empty = 0
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def consecutive_empty(self) -> int:
|
|
91
|
+
return self._consecutive_empty
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
SUBMIT_TIMEOUT = httpx.Timeout(120.0, connect=10.0)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class KombinatClient:
|
|
98
|
+
def __init__(self, base_url: str, access_token: str) -> None:
|
|
99
|
+
self.http = httpx.Client(
|
|
100
|
+
base_url=base_url,
|
|
101
|
+
headers={"Authorization": f"Bearer {access_token}"},
|
|
102
|
+
timeout=httpx.Timeout(30.0, connect=10.0),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def claim_batch(self, size: int = 100) -> BatchResponse | None:
|
|
106
|
+
"""Claim a batch of pairs. Returns None if no pairs available (204)."""
|
|
107
|
+
resp = self._request_with_retry("POST", "/v1/batches/claim", json={"size": size})
|
|
108
|
+
if resp.status_code == 204:
|
|
109
|
+
return None
|
|
110
|
+
return BatchResponse.model_validate(resp.json())
|
|
111
|
+
|
|
112
|
+
def submit_annotations(self, submission: AnnotationSubmission) -> AnnotationResult:
|
|
113
|
+
"""Submit a batch of annotations. Uses a longer timeout since processing
|
|
114
|
+
100 annotations with honeypot checks can be slow over Railway's proxy."""
|
|
115
|
+
resp = self._request_with_retry(
|
|
116
|
+
"POST",
|
|
117
|
+
"/v1/annotations",
|
|
118
|
+
json=submission.model_dump(),
|
|
119
|
+
timeout=SUBMIT_TIMEOUT,
|
|
120
|
+
)
|
|
121
|
+
return AnnotationResult.model_validate(resp.json())
|
|
122
|
+
|
|
123
|
+
def release_batch(self, batch_id: str) -> None:
|
|
124
|
+
"""Release an unfinished batch back to the pool."""
|
|
125
|
+
self._request_with_retry("DELETE", f"/v1/batches/{batch_id}")
|
|
126
|
+
|
|
127
|
+
def get_profile(self) -> ContributorProfile:
|
|
128
|
+
"""Get the contributor's profile and stats."""
|
|
129
|
+
resp = self._request_with_retry("GET", "/v1/contributors/me")
|
|
130
|
+
return ContributorProfile.model_validate(resp.json())
|
|
131
|
+
|
|
132
|
+
def close(self) -> None:
|
|
133
|
+
self.http.close()
|
|
134
|
+
|
|
135
|
+
def _request_with_retry(self, method: str, url: str, **kwargs: object) -> httpx.Response:
|
|
136
|
+
"""Make an HTTP request with exponential backoff retry on 5xx/network errors."""
|
|
137
|
+
last_exception: Exception | None = None
|
|
138
|
+
|
|
139
|
+
for attempt in range(MAX_RETRIES + 1):
|
|
140
|
+
try:
|
|
141
|
+
resp = self.http.request(method, url, **kwargs) # type: ignore[arg-type]
|
|
142
|
+
|
|
143
|
+
if resp.status_code == 401:
|
|
144
|
+
raise AuthError("Authentication failed (401). Run 'annotator login'.")
|
|
145
|
+
if resp.status_code == 204:
|
|
146
|
+
return resp
|
|
147
|
+
if 400 <= resp.status_code < 500:
|
|
148
|
+
raise KombinatError(f"kombinat error {resp.status_code}: {resp.text}")
|
|
149
|
+
if resp.status_code >= 500:
|
|
150
|
+
last_exception = KombinatError(
|
|
151
|
+
f"kombinat server error {resp.status_code}: {resp.text}"
|
|
152
|
+
)
|
|
153
|
+
if attempt < MAX_RETRIES:
|
|
154
|
+
self._backoff_sleep(attempt)
|
|
155
|
+
continue
|
|
156
|
+
raise last_exception
|
|
157
|
+
|
|
158
|
+
return resp
|
|
159
|
+
|
|
160
|
+
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
|
161
|
+
last_exception = KombinatError(f"kombinat unreachable: {e}")
|
|
162
|
+
last_exception.__cause__ = e
|
|
163
|
+
if attempt < MAX_RETRIES:
|
|
164
|
+
self._backoff_sleep(attempt)
|
|
165
|
+
continue
|
|
166
|
+
raise KombinatError(f"kombinat unreachable after {MAX_RETRIES} retries: {e}") from e
|
|
167
|
+
|
|
168
|
+
msg = "Request failed after all retries"
|
|
169
|
+
raise KombinatError(msg) if last_exception is None else last_exception
|
|
170
|
+
|
|
171
|
+
def _backoff_sleep(self, attempt: int) -> None:
|
|
172
|
+
delay = min(MIN_RETRY_DELAY * (2**attempt) + random.uniform(0, 1), MAX_RETRY_DELAY)
|
|
173
|
+
logger.debug("Retrying in %.1fs (attempt %d/%d)", delay, attempt + 1, MAX_RETRIES)
|
|
174
|
+
time.sleep(delay)
|
annotator/config.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Annotator configuration via environment variables."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from enum import IntEnum
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ExitCode(IntEnum):
|
|
12
|
+
"""CLI exit codes."""
|
|
13
|
+
|
|
14
|
+
SUCCESS = 0
|
|
15
|
+
AUTH_FAILURE = 1
|
|
16
|
+
NO_COMPATIBLE_HARDWARE = 2
|
|
17
|
+
MODEL_LOADING_FAILED = 3
|
|
18
|
+
KOMBINAT_UNREACHABLE = 4
|
|
19
|
+
UNRECOVERABLE = 5
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Settings(BaseSettings):
|
|
23
|
+
model_config = SettingsConfigDict(
|
|
24
|
+
env_prefix="ANNOTATOR_",
|
|
25
|
+
env_file=Path.cwd() / ".env",
|
|
26
|
+
extra="ignore",
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Override with ANNOTATOR_KOMBINAT_URL for local dev against a non-production hub.
|
|
30
|
+
kombinat_url: str = "https://kombinat-production.up.railway.app"
|
|
31
|
+
|
|
32
|
+
batch_size: int = 100
|
|
33
|
+
chunk_size: int = 50
|
|
34
|
+
gpu_memory_utilization: float = 0.9
|
|
35
|
+
max_model_len: int = 4096
|
|
36
|
+
max_output_tokens: int = 256
|
|
37
|
+
|
|
38
|
+
annotator_home: Path = Path.home() / ".annotator"
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Inference engine backends."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from annotator.engine.base import BaseEngine
|
|
9
|
+
from annotator.resolver import ResolvedRuntime
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def create_engine(
|
|
13
|
+
runtime: ResolvedRuntime,
|
|
14
|
+
gpu_memory_utilization: float = 0.9,
|
|
15
|
+
max_model_len: int = 4096,
|
|
16
|
+
max_output_tokens: int = 256,
|
|
17
|
+
) -> BaseEngine:
|
|
18
|
+
"""Create the appropriate engine for the resolved runtime."""
|
|
19
|
+
if runtime.backend == "vllm":
|
|
20
|
+
from annotator.engine.vllm import VLLMEngine
|
|
21
|
+
|
|
22
|
+
return VLLMEngine(
|
|
23
|
+
runtime.model_spec, gpu_memory_utilization, max_model_len, max_output_tokens
|
|
24
|
+
)
|
|
25
|
+
elif runtime.backend == "mlx":
|
|
26
|
+
from annotator.engine.mlx import MLXEngine
|
|
27
|
+
|
|
28
|
+
return MLXEngine(runtime.model_spec, max_output_tokens=max_output_tokens)
|
|
29
|
+
elif runtime.backend == "llama_cpp":
|
|
30
|
+
from annotator.engine.llama_cpp import LlamaCppEngine
|
|
31
|
+
|
|
32
|
+
return LlamaCppEngine(runtime.model_spec)
|
|
33
|
+
else:
|
|
34
|
+
raise ValueError(f"Unknown backend: {runtime.backend}")
|
annotator/engine/base.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Base engine interface and data structures."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LabelingInput(BaseModel):
|
|
11
|
+
"""A single (query, document) pair to label."""
|
|
12
|
+
|
|
13
|
+
pair_id: str
|
|
14
|
+
query_text: str
|
|
15
|
+
doc_text: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class LabelingOutput(BaseModel):
|
|
19
|
+
"""LLM response + engine metadata. Ready for submission to kombinat."""
|
|
20
|
+
|
|
21
|
+
pair_id: str
|
|
22
|
+
label: int
|
|
23
|
+
reasoning: str
|
|
24
|
+
input_tokens: int
|
|
25
|
+
output_tokens: int
|
|
26
|
+
raw_response_hash: str
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class EngineInfo(BaseModel):
|
|
30
|
+
"""Model metadata — submitted to kombinat with every annotation."""
|
|
31
|
+
|
|
32
|
+
model_id: str
|
|
33
|
+
quantization: str
|
|
34
|
+
backend: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class BaseEngine(ABC):
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def load(self) -> None:
|
|
40
|
+
"""Download model (if not cached) and load into memory."""
|
|
41
|
+
...
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def label_batch(self, pairs: list[LabelingInput]) -> list[LabelingOutput]:
|
|
45
|
+
"""Run inference on a batch. Returns results for successfully labeled pairs only.
|
|
46
|
+
|
|
47
|
+
Pairs that fail parsing/validation after retry are silently dropped.
|
|
48
|
+
"""
|
|
49
|
+
...
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def info(self) -> EngineInfo:
|
|
53
|
+
"""Return model metadata for submission to kombinat."""
|
|
54
|
+
...
|