embedkombinat-annotator 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
annotator/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Distributed annotation worker for embedkombinat."""
2
+
3
+ __version__ = "0.3.0"
4
+
5
+ TEAL = "#00E5B0"
6
+ AMBER = "#c05d3b"
annotator/auth.py ADDED
@@ -0,0 +1,236 @@
1
+ """GitHub OAuth Device Flow and token management.
2
+
3
+ Why device flow: the previous web-flow design ran a localhost callback server
4
+ inside the CLI process, then handed GitHub a `redirect_uri=http://localhost:PORT/callback`
5
+ URI. That works on a single machine where browser and server are colocated
6
+ (your laptop), but breaks the moment the CLI runs on a remote host the user
7
+ SSH'd into (Runpod, Lambda, EC2, etc.) — the user's browser hits localhost
8
+ on *their machine*, not the remote one. The device flow has no callback at
9
+ all: the CLI prints a short user code, the user enters it in any browser on
10
+ any device, the CLI polls GitHub directly. Same auth, no machine-boundary
11
+ assumption.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import contextlib
17
+ import json
18
+ import os
19
+ import stat
20
+ import time
21
+ import webbrowser
22
+ from datetime import UTC, datetime, timedelta
23
+ from typing import TYPE_CHECKING, Any
24
+
25
+ import httpx
26
+ from pydantic import BaseModel
27
+
28
+ from annotator import TEAL
29
+ from annotator.errors import AuthError
30
+
31
+ if TYPE_CHECKING:
32
+ from pathlib import Path
33
+
34
+ from rich.console import Console
35
+
36
+ from annotator.config import Settings
37
+
38
+ AUTH_FILE = "auth.json"
39
+ DEVICE_CODE_URL = "https://github.com/login/device/code"
40
+ TOKEN_URL = "https://github.com/login/oauth/access_token"
41
+ DEFAULT_SCOPE = "read:user"
42
+ EXPIRY_BUFFER = timedelta(minutes=5)
43
+ MIN_POLL_INTERVAL = 5.0
44
+
45
+
46
+ class ContributorInfo(BaseModel):
47
+ id: str
48
+ github_username: str
49
+ github_avatar_url: str | None = None
50
+
51
+
52
+ class AuthToken(BaseModel):
53
+ kombinat_url: str
54
+ access_token: str
55
+ expires_at: datetime
56
+ contributor: ContributorInfo
57
+
58
+ def is_expired(self) -> bool:
59
+ return datetime.now(tz=UTC) >= (self.expires_at - EXPIRY_BUFFER)
60
+
61
+
62
+ def save_token(token: AuthToken, home: Path) -> None:
63
+ """Save auth token to disk with restricted permissions."""
64
+ home.mkdir(parents=True, exist_ok=True)
65
+ path = home / AUTH_FILE
66
+ path.write_text(token.model_dump_json(indent=2))
67
+ os.chmod(path, stat.S_IRUSR | stat.S_IWUSR)
68
+
69
+
70
+ def load_token(home: Path) -> AuthToken | None:
71
+ """Load auth token from disk. Returns None if missing or expired."""
72
+ path = home / AUTH_FILE
73
+ if not path.exists():
74
+ return None
75
+ try:
76
+ token = AuthToken.model_validate_json(path.read_text())
77
+ except (json.JSONDecodeError, ValueError):
78
+ return None
79
+ if token.is_expired():
80
+ return None
81
+ return token
82
+
83
+
84
+ def delete_token(home: Path) -> None:
85
+ """Remove auth token from disk."""
86
+ path = home / AUTH_FILE
87
+ if path.exists():
88
+ path.unlink()
89
+
90
+
91
+ def fetch_client_id(kombinat_url: str) -> str:
92
+ """Fetch the public GitHub OAuth client_id from kombinat."""
93
+ try:
94
+ with httpx.Client() as client:
95
+ resp = client.get(f"{kombinat_url}/v1/auth/config", timeout=10.0)
96
+ except httpx.HTTPError as exc:
97
+ raise AuthError(f"could not reach kombinat at {kombinat_url}: {exc}") from exc
98
+ if resp.status_code != 200:
99
+ raise AuthError(f"kombinat auth config fetch failed: {resp.status_code} {resp.text}")
100
+ client_id = resp.json().get("client_id")
101
+ if not isinstance(client_id, str) or not client_id:
102
+ raise AuthError("kombinat returned empty client_id — server is misconfigured")
103
+ return client_id
104
+
105
+
106
+ def request_device_code(client_id: str) -> dict[str, Any]:
107
+ """Ask GitHub for a device code + user code. Returns the full response payload."""
108
+ try:
109
+ with httpx.Client() as client:
110
+ resp = client.post(
111
+ DEVICE_CODE_URL,
112
+ data={"client_id": client_id, "scope": DEFAULT_SCOPE},
113
+ headers={"Accept": "application/json"},
114
+ timeout=10.0,
115
+ )
116
+ except httpx.HTTPError as exc:
117
+ raise AuthError(f"could not reach GitHub: {exc}") from exc
118
+ if resp.status_code != 200:
119
+ raise AuthError(f"device code request failed: {resp.status_code} {resp.text}")
120
+ data: dict[str, Any] = resp.json()
121
+ required = {"device_code", "user_code", "verification_uri", "expires_in", "interval"}
122
+ if not required.issubset(data):
123
+ raise AuthError(
124
+ "device code response is missing required fields. "
125
+ "This usually means the OAuth app does not have Device Flow enabled — "
126
+ "check 'Enable Device Flow' in the OAuth app settings on GitHub."
127
+ )
128
+ return data
129
+
130
+
131
+ def poll_for_access_token(
132
+ client_id: str,
133
+ device_code: str,
134
+ interval: float,
135
+ expires_in: float,
136
+ ) -> str:
137
+ """Poll GitHub's token endpoint until the user authorizes. Returns the access token."""
138
+ deadline = time.monotonic() + expires_in
139
+ poll_interval = max(interval, MIN_POLL_INTERVAL)
140
+
141
+ while time.monotonic() < deadline:
142
+ time.sleep(poll_interval)
143
+ try:
144
+ with httpx.Client() as client:
145
+ resp = client.post(
146
+ TOKEN_URL,
147
+ data={
148
+ "client_id": client_id,
149
+ "device_code": device_code,
150
+ "grant_type": "urn:ietf:params:oauth:grant-type:device_code",
151
+ },
152
+ headers={"Accept": "application/json"},
153
+ timeout=10.0,
154
+ )
155
+ except httpx.HTTPError as exc:
156
+ raise AuthError(f"polling GitHub failed: {exc}") from exc
157
+
158
+ data = resp.json()
159
+ if "access_token" in data:
160
+ return str(data["access_token"])
161
+
162
+ error = data.get("error")
163
+ if error == "authorization_pending":
164
+ continue
165
+ if error == "slow_down":
166
+ poll_interval += 5.0
167
+ continue
168
+ if error == "expired_token":
169
+ raise AuthError("device code expired before authorization completed")
170
+ if error == "access_denied":
171
+ raise AuthError("authorization was denied")
172
+ raise AuthError(f"unexpected error during device flow: {data}")
173
+
174
+ raise AuthError(f"device flow timed out after {expires_in:.0f}s")
175
+
176
+
177
+ def exchange_github_token(github_access_token: str, kombinat_url: str) -> AuthToken:
178
+ """Exchange a GitHub access token for a kombinat JWT."""
179
+ try:
180
+ with httpx.Client() as client:
181
+ resp = client.post(
182
+ f"{kombinat_url}/v1/auth/github-device",
183
+ json={"access_token": github_access_token},
184
+ timeout=30.0,
185
+ )
186
+ except httpx.HTTPError as exc:
187
+ raise AuthError(f"could not reach kombinat at {kombinat_url}: {exc}") from exc
188
+ if resp.status_code == 401:
189
+ raise AuthError("kombinat rejected the GitHub access token")
190
+ if resp.status_code != 200:
191
+ raise AuthError(f"kombinat auth failed: {resp.status_code} {resp.text}")
192
+ data = resp.json()
193
+ expires_at = datetime.now(tz=UTC) + timedelta(seconds=data["expires_in"])
194
+ contributor_data = data["contributor"]
195
+ return AuthToken(
196
+ kombinat_url=kombinat_url,
197
+ access_token=data["access_token"],
198
+ expires_at=expires_at,
199
+ contributor=ContributorInfo(
200
+ id=contributor_data["id"],
201
+ github_username=contributor_data["github_username"],
202
+ github_avatar_url=contributor_data.get("github_avatar_url"),
203
+ ),
204
+ )
205
+
206
+
207
+ def login(settings: Settings, console: Console) -> AuthToken:
208
+ """Run the GitHub Device Flow and exchange the access token for a kombinat JWT."""
209
+ console.print(" No credentials found. Starting login...\n")
210
+
211
+ client_id = fetch_client_id(settings.kombinat_url)
212
+ device_data = request_device_code(client_id)
213
+
214
+ user_code = device_data["user_code"]
215
+ verification_uri = device_data["verification_uri"]
216
+ device_code = device_data["device_code"]
217
+ interval = float(device_data["interval"])
218
+ expires_in = float(device_data["expires_in"])
219
+
220
+ console.print(f" -> Open in any browser: [bold]{verification_uri}[/bold]")
221
+ console.print(f" -> Enter code: [bold {TEAL}]{user_code}[/bold {TEAL}]\n")
222
+
223
+ # Best-effort browser open as a convenience on machines that have one;
224
+ # silently a no-op on headless hosts (Runpod, etc.) which is the whole
225
+ # point of the device flow.
226
+ with contextlib.suppress(Exception):
227
+ webbrowser.open(verification_uri)
228
+
229
+ console.print(" -> Waiting for authorization...\n")
230
+
231
+ github_token = poll_for_access_token(client_id, device_code, interval, expires_in)
232
+ token = exchange_github_token(github_token, settings.kombinat_url)
233
+ save_token(token, settings.annotator_home)
234
+
235
+ console.print(f" [{TEAL}]✓[/{TEAL}] Authenticated as {token.contributor.github_username}")
236
+ return token
annotator/cli.py ADDED
@@ -0,0 +1,134 @@
1
+ """CLI interface for annotator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Annotated
6
+
7
+ import typer
8
+ from rich.console import Console
9
+ from rich.panel import Panel
10
+ from rich.text import Text
11
+
12
+ from annotator import AMBER, TEAL, __version__
13
+ from annotator.config import ExitCode, Settings
14
+
15
+ app = typer.Typer(
16
+ name="annotator",
17
+ no_args_is_help=False,
18
+ invoke_without_command=True,
19
+ add_completion=False,
20
+ )
21
+
22
+
23
+ def _print_banner(console: Console) -> None:
24
+ """Print the EmbedKombinat branded header."""
25
+ logo = Text()
26
+ logo.append(" EEEEE K K\n", style=f"bold {TEAL}")
27
+ logo.append(" E K K\n", style=f"bold {TEAL}")
28
+ logo.append(" EEEE KKK\n", style=f"bold {TEAL}")
29
+ logo.append(" E K K\n", style=f"bold {TEAL}")
30
+ logo.append(" EEEEE K K\n", style=f"bold {TEAL}")
31
+
32
+ title = f"embed kombinat \u00b7 annotator v{__version__}"
33
+ console.print()
34
+ console.print(logo, end="")
35
+ console.print(Panel(title, style=f"bold {TEAL}", width=len(title) + 6))
36
+ console.print()
37
+
38
+
39
+ @app.callback(invoke_without_command=True)
40
+ def main(
41
+ ctx: typer.Context,
42
+ batch_size: Annotated[
43
+ int | None, typer.Option("--batch-size", help="Pairs per batch claimed from kombinat")
44
+ ] = None,
45
+ model: Annotated[
46
+ str | None, typer.Option("--model", help="Override auto-selected model (HuggingFace ID)")
47
+ ] = None,
48
+ quantization: Annotated[
49
+ str | None, typer.Option("--quantization", help="Override quantization (awq, fp16, etc.)")
50
+ ] = None,
51
+ backend: Annotated[
52
+ str | None, typer.Option("--backend", help="Override backend (vllm, mlx, llama_cpp)")
53
+ ] = None,
54
+ gpu_memory_utilization: Annotated[
55
+ float, typer.Option("--gpu-memory-utilization", help="GPU memory fraction (vLLM only)")
56
+ ] = 0.9,
57
+ dry_run: Annotated[
58
+ bool, typer.Option("--dry-run", help="Process one pair without submitting")
59
+ ] = False,
60
+ ) -> None:
61
+ """Start the labeling loop. Default command."""
62
+ if ctx.invoked_subcommand is not None:
63
+ return
64
+
65
+ console = Console()
66
+ _print_banner(console)
67
+
68
+ from annotator.runner import AnnotatorRunner
69
+
70
+ settings = Settings()
71
+ runner = AnnotatorRunner(settings, console)
72
+ exit_code = runner.run(
73
+ batch_size=batch_size if batch_size is not None else settings.batch_size,
74
+ model_override=model,
75
+ quantization_override=quantization,
76
+ backend_override=backend,
77
+ gpu_memory_utilization=gpu_memory_utilization,
78
+ dry_run=dry_run,
79
+ )
80
+ raise typer.Exit(code=exit_code)
81
+
82
+
83
+ @app.command()
84
+ def login() -> None:
85
+ """Authenticate with GitHub."""
86
+ from annotator import auth
87
+
88
+ console = Console()
89
+ settings = Settings()
90
+ try:
91
+ token = auth.login(settings, console)
92
+ console.print(
93
+ f" [{TEAL}]\u2713[/{TEAL}] Authenticated as {token.contributor.github_username}"
94
+ )
95
+ except Exception as e:
96
+ console.print(f" [{AMBER}]\u2717[/{AMBER}] Login failed: {e}")
97
+ raise typer.Exit(code=ExitCode.AUTH_FAILURE) from e
98
+
99
+
100
+ @app.command()
101
+ def status() -> None:
102
+ """Show contributor profile and stats."""
103
+ from annotator import auth
104
+ from annotator.client import KombinatClient
105
+
106
+ console = Console()
107
+ settings = Settings()
108
+ token = auth.load_token(settings.annotator_home)
109
+ if token is None:
110
+ console.print(f" [{AMBER}]Not logged in.[/{AMBER}] Run 'annotator login'.")
111
+ raise typer.Exit(code=ExitCode.AUTH_FAILURE)
112
+
113
+ client = KombinatClient(token.kombinat_url, token.access_token)
114
+ try:
115
+ profile = client.get_profile()
116
+ console.print(f" [{TEAL}]\u2713[/{TEAL}] Logged in as {profile.github_username}")
117
+ console.print(f" Total annotations: {profile.total_annotations}")
118
+ console.print(f" Reputation score: {profile.reputation_score:.2f}")
119
+ except Exception as e:
120
+ console.print(f" [{AMBER}]\u2717[/{AMBER}] Failed to fetch status: {e}")
121
+ raise typer.Exit(code=ExitCode.KOMBINAT_UNREACHABLE) from e
122
+ finally:
123
+ client.close()
124
+
125
+
126
+ @app.command()
127
+ def logout() -> None:
128
+ """Remove stored credentials."""
129
+ from annotator import auth
130
+
131
+ console = Console()
132
+ settings = Settings()
133
+ auth.delete_token(settings.annotator_home)
134
+ console.print(f" [{TEAL}]\u2713[/{TEAL}] Logged out.")
annotator/client.py ADDED
@@ -0,0 +1,174 @@
1
+ """HTTP client for kombinat API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import random
7
+ import time
8
+ from datetime import datetime # noqa: TC003 - needed at runtime by Pydantic
9
+
10
+ import httpx
11
+ from pydantic import BaseModel
12
+
13
+ from annotator.errors import AuthError, KombinatError
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ MAX_RETRIES = 3
18
+ MIN_RETRY_DELAY = 1.0
19
+ MAX_RETRY_DELAY = 30.0
20
+ NO_PAIRS_INITIAL_WAIT = 30.0
21
+ NO_PAIRS_MAX_WAIT = 600.0
22
+
23
+
24
+ class PairData(BaseModel):
25
+ pair_id: str
26
+ query_text: str
27
+ doc_text: str
28
+
29
+
30
+ class BatchResponse(BaseModel):
31
+ batch_id: str
32
+ expires_at: datetime
33
+ pairs: list[PairData]
34
+
35
+
36
+ class AnnotationPayload(BaseModel):
37
+ pair_id: str
38
+ label: int
39
+ input_tokens: int
40
+ output_tokens: int
41
+ raw_response_hash: str
42
+
43
+
44
+ class AnnotationSubmission(BaseModel):
45
+ batch_id: str
46
+ model_id: str
47
+ quantization: str
48
+ annotations: list[AnnotationPayload]
49
+
50
+
51
+ class AnnotationResult(BaseModel):
52
+ accepted: int
53
+ rejected: int
54
+ honeypot_accuracy: float | None = None
55
+ pairs_verified: int = 0
56
+ contributor_tokens: dict[str, int] = {}
57
+
58
+
59
+ class ContributorProfile(BaseModel):
60
+ id: str
61
+ github_username: str
62
+ github_avatar_url: str | None = None
63
+ total_annotations: int = 0
64
+ reputation_score: float = 0.0
65
+ total_input_tokens: int = 0
66
+ total_output_tokens: int = 0
67
+ created_at: datetime | None = None
68
+ last_seen_at: datetime | None = None
69
+
70
+
71
+ class NoPairsBackoff:
72
+ """Tracks consecutive 204s and computes wait duration."""
73
+
74
+ def __init__(self) -> None:
75
+ self._consecutive_empty = 0
76
+
77
+ def wait_duration(self) -> float:
78
+ """Get the next wait duration in seconds."""
79
+ duration = NO_PAIRS_INITIAL_WAIT * (2**self._consecutive_empty)
80
+ result: float = min(duration, NO_PAIRS_MAX_WAIT)
81
+ return result
82
+
83
+ def record_empty(self) -> None:
84
+ self._consecutive_empty += 1
85
+
86
+ def reset(self) -> None:
87
+ self._consecutive_empty = 0
88
+
89
+ @property
90
+ def consecutive_empty(self) -> int:
91
+ return self._consecutive_empty
92
+
93
+
94
+ SUBMIT_TIMEOUT = httpx.Timeout(120.0, connect=10.0)
95
+
96
+
97
+ class KombinatClient:
98
+ def __init__(self, base_url: str, access_token: str) -> None:
99
+ self.http = httpx.Client(
100
+ base_url=base_url,
101
+ headers={"Authorization": f"Bearer {access_token}"},
102
+ timeout=httpx.Timeout(30.0, connect=10.0),
103
+ )
104
+
105
+ def claim_batch(self, size: int = 100) -> BatchResponse | None:
106
+ """Claim a batch of pairs. Returns None if no pairs available (204)."""
107
+ resp = self._request_with_retry("POST", "/v1/batches/claim", json={"size": size})
108
+ if resp.status_code == 204:
109
+ return None
110
+ return BatchResponse.model_validate(resp.json())
111
+
112
+ def submit_annotations(self, submission: AnnotationSubmission) -> AnnotationResult:
113
+ """Submit a batch of annotations. Uses a longer timeout since processing
114
+ 100 annotations with honeypot checks can be slow over Railway's proxy."""
115
+ resp = self._request_with_retry(
116
+ "POST",
117
+ "/v1/annotations",
118
+ json=submission.model_dump(),
119
+ timeout=SUBMIT_TIMEOUT,
120
+ )
121
+ return AnnotationResult.model_validate(resp.json())
122
+
123
+ def release_batch(self, batch_id: str) -> None:
124
+ """Release an unfinished batch back to the pool."""
125
+ self._request_with_retry("DELETE", f"/v1/batches/{batch_id}")
126
+
127
+ def get_profile(self) -> ContributorProfile:
128
+ """Get the contributor's profile and stats."""
129
+ resp = self._request_with_retry("GET", "/v1/contributors/me")
130
+ return ContributorProfile.model_validate(resp.json())
131
+
132
+ def close(self) -> None:
133
+ self.http.close()
134
+
135
+ def _request_with_retry(self, method: str, url: str, **kwargs: object) -> httpx.Response:
136
+ """Make an HTTP request with exponential backoff retry on 5xx/network errors."""
137
+ last_exception: Exception | None = None
138
+
139
+ for attempt in range(MAX_RETRIES + 1):
140
+ try:
141
+ resp = self.http.request(method, url, **kwargs) # type: ignore[arg-type]
142
+
143
+ if resp.status_code == 401:
144
+ raise AuthError("Authentication failed (401). Run 'annotator login'.")
145
+ if resp.status_code == 204:
146
+ return resp
147
+ if 400 <= resp.status_code < 500:
148
+ raise KombinatError(f"kombinat error {resp.status_code}: {resp.text}")
149
+ if resp.status_code >= 500:
150
+ last_exception = KombinatError(
151
+ f"kombinat server error {resp.status_code}: {resp.text}"
152
+ )
153
+ if attempt < MAX_RETRIES:
154
+ self._backoff_sleep(attempt)
155
+ continue
156
+ raise last_exception
157
+
158
+ return resp
159
+
160
+ except (httpx.ConnectError, httpx.TimeoutException) as e:
161
+ last_exception = KombinatError(f"kombinat unreachable: {e}")
162
+ last_exception.__cause__ = e
163
+ if attempt < MAX_RETRIES:
164
+ self._backoff_sleep(attempt)
165
+ continue
166
+ raise KombinatError(f"kombinat unreachable after {MAX_RETRIES} retries: {e}") from e
167
+
168
+ msg = "Request failed after all retries"
169
+ raise KombinatError(msg) if last_exception is None else last_exception
170
+
171
+ def _backoff_sleep(self, attempt: int) -> None:
172
+ delay = min(MIN_RETRY_DELAY * (2**attempt) + random.uniform(0, 1), MAX_RETRY_DELAY)
173
+ logger.debug("Retrying in %.1fs (attempt %d/%d)", delay, attempt + 1, MAX_RETRIES)
174
+ time.sleep(delay)
annotator/config.py ADDED
@@ -0,0 +1,38 @@
1
+ """Annotator configuration via environment variables."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from enum import IntEnum
6
+ from pathlib import Path
7
+
8
+ from pydantic_settings import BaseSettings, SettingsConfigDict
9
+
10
+
11
+ class ExitCode(IntEnum):
12
+ """CLI exit codes."""
13
+
14
+ SUCCESS = 0
15
+ AUTH_FAILURE = 1
16
+ NO_COMPATIBLE_HARDWARE = 2
17
+ MODEL_LOADING_FAILED = 3
18
+ KOMBINAT_UNREACHABLE = 4
19
+ UNRECOVERABLE = 5
20
+
21
+
22
+ class Settings(BaseSettings):
23
+ model_config = SettingsConfigDict(
24
+ env_prefix="ANNOTATOR_",
25
+ env_file=Path.cwd() / ".env",
26
+ extra="ignore",
27
+ )
28
+
29
+ # Override with ANNOTATOR_KOMBINAT_URL for local dev against a non-production hub.
30
+ kombinat_url: str = "https://kombinat-production.up.railway.app"
31
+
32
+ batch_size: int = 100
33
+ chunk_size: int = 50
34
+ gpu_memory_utilization: float = 0.9
35
+ max_model_len: int = 4096
36
+ max_output_tokens: int = 256
37
+
38
+ annotator_home: Path = Path.home() / ".annotator"
@@ -0,0 +1,34 @@
1
+ """Inference engine backends."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ if TYPE_CHECKING:
8
+ from annotator.engine.base import BaseEngine
9
+ from annotator.resolver import ResolvedRuntime
10
+
11
+
12
+ def create_engine(
13
+ runtime: ResolvedRuntime,
14
+ gpu_memory_utilization: float = 0.9,
15
+ max_model_len: int = 4096,
16
+ max_output_tokens: int = 256,
17
+ ) -> BaseEngine:
18
+ """Create the appropriate engine for the resolved runtime."""
19
+ if runtime.backend == "vllm":
20
+ from annotator.engine.vllm import VLLMEngine
21
+
22
+ return VLLMEngine(
23
+ runtime.model_spec, gpu_memory_utilization, max_model_len, max_output_tokens
24
+ )
25
+ elif runtime.backend == "mlx":
26
+ from annotator.engine.mlx import MLXEngine
27
+
28
+ return MLXEngine(runtime.model_spec, max_output_tokens=max_output_tokens)
29
+ elif runtime.backend == "llama_cpp":
30
+ from annotator.engine.llama_cpp import LlamaCppEngine
31
+
32
+ return LlamaCppEngine(runtime.model_spec)
33
+ else:
34
+ raise ValueError(f"Unknown backend: {runtime.backend}")
@@ -0,0 +1,54 @@
1
+ """Base engine interface and data structures."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+
7
+ from pydantic import BaseModel
8
+
9
+
10
+ class LabelingInput(BaseModel):
11
+ """A single (query, document) pair to label."""
12
+
13
+ pair_id: str
14
+ query_text: str
15
+ doc_text: str
16
+
17
+
18
+ class LabelingOutput(BaseModel):
19
+ """LLM response + engine metadata. Ready for submission to kombinat."""
20
+
21
+ pair_id: str
22
+ label: int
23
+ reasoning: str
24
+ input_tokens: int
25
+ output_tokens: int
26
+ raw_response_hash: str
27
+
28
+
29
+ class EngineInfo(BaseModel):
30
+ """Model metadata — submitted to kombinat with every annotation."""
31
+
32
+ model_id: str
33
+ quantization: str
34
+ backend: str
35
+
36
+
37
+ class BaseEngine(ABC):
38
+ @abstractmethod
39
+ def load(self) -> None:
40
+ """Download model (if not cached) and load into memory."""
41
+ ...
42
+
43
+ @abstractmethod
44
+ def label_batch(self, pairs: list[LabelingInput]) -> list[LabelingOutput]:
45
+ """Run inference on a batch. Returns results for successfully labeled pairs only.
46
+
47
+ Pairs that fail parsing/validation after retry are silently dropped.
48
+ """
49
+ ...
50
+
51
+ @abstractmethod
52
+ def info(self) -> EngineInfo:
53
+ """Return model metadata for submission to kombinat."""
54
+ ...