gaston 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gaston/__init__.py ADDED
@@ -0,0 +1,60 @@
1
+ """Python client library for the Gaston API.
2
+
3
+ Transcription, translation and full-text search of sentences within
4
+ transcribed recordings.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from .client import GastonClient
10
+ from .constants import (
11
+ SUPPORTED_LANGUAGES,
12
+ TRANSLATION_LANGUAGES,
13
+ TRANSLATION_OPTIONS,
14
+ )
15
+ from .exceptions import (
16
+ AuthenticationError,
17
+ BadRequestError,
18
+ GastonAPIError,
19
+ GastonError,
20
+ NotFoundError,
21
+ RateLimitError,
22
+ )
23
+ from .models import (
24
+ Directory,
25
+ Media,
26
+ MediaList,
27
+ SearchResults,
28
+ Sentence,
29
+ TranscribeResult,
30
+ TranslateResult,
31
+ Usage,
32
+ User,
33
+ )
34
+
35
+ __version__ = "0.2.0"
36
+
37
+ __all__ = [
38
+ "GastonClient",
39
+ # exceptions
40
+ "GastonError",
41
+ "GastonAPIError",
42
+ "AuthenticationError",
43
+ "BadRequestError",
44
+ "NotFoundError",
45
+ "RateLimitError",
46
+ # models
47
+ "User",
48
+ "Usage",
49
+ "Media",
50
+ "MediaList",
51
+ "Sentence",
52
+ "Directory",
53
+ "TranscribeResult",
54
+ "TranslateResult",
55
+ "SearchResults",
56
+ # constants
57
+ "SUPPORTED_LANGUAGES",
58
+ "TRANSLATION_LANGUAGES",
59
+ "TRANSLATION_OPTIONS",
60
+ ]
gaston/client.py ADDED
@@ -0,0 +1,395 @@
1
+ """Synchronous client for the Gaston API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from typing import IO, Any, BinaryIO, Iterable, Mapping, Tuple, Union
7
+
8
+ import requests
9
+
10
+ from .constants import SUPPORTED_LANGUAGES, TRANSLATION_LANGUAGES
11
+ from .exceptions import (
12
+ AuthenticationError,
13
+ BadRequestError,
14
+ GastonAPIError,
15
+ GastonError,
16
+ NotFoundError,
17
+ RateLimitError,
18
+ )
19
+ from .models import (
20
+ Directory,
21
+ Media,
22
+ MediaList,
23
+ SearchResults,
24
+ TranscribeResult,
25
+ TranslateResult,
26
+ User,
27
+ )
28
+
29
+ BASE_URL = "https://api.gaston.live"
30
+ # Undocumented escape hatch for development/testing only.
31
+ _BASE_URL_OVERRIDE_ENV = "GASTON_API_URL_OVERRIDE"
32
+
33
+ # Timeouts are passed straight to requests: a single value covers both connect
34
+ # and read; a (connect, read) tuple sets them separately; None waits forever.
35
+ TimeoutType = Union[float, Tuple[float, float], None]
36
+
37
+ # Quick metadata calls.
38
+ DEFAULT_TIMEOUT: TimeoutType = 30.0
39
+ # Endpoints that upload a file or fetch a remote URL can legitimately take
40
+ # minutes, so they get a much more generous read timeout by default.
41
+ DEFAULT_UPLOAD_TIMEOUT: TimeoutType = (10.0, 600.0)
42
+
43
+
44
+ class _Unset:
45
+ """Sentinel for "argument not provided" (since None is a valid timeout)."""
46
+
47
+
48
+ _UNSET = _Unset()
49
+
50
+ _STATUS_EXCEPTIONS = {
51
+ 400: BadRequestError,
52
+ 403: AuthenticationError,
53
+ 404: NotFoundError,
54
+ 429: RateLimitError,
55
+ }
56
+
57
+
58
+ class GastonClient:
59
+ """A client for the Gaston transcription/translation/search API.
60
+
61
+ Args:
62
+ token: API token (the ``gapi-...`` token issued by the platform).
63
+ Falls back to the ``GASTON_API_TOKEN`` environment variable.
64
+ timeout: Timeout for ordinary requests. A single float covers both
65
+ connect and read; a ``(connect, read)`` tuple sets them separately;
66
+ ``None`` waits indefinitely. Defaults to 30s.
67
+ upload_timeout: Timeout for the file-upload endpoint (``transcribe``),
68
+ which can take minutes for large files. Defaults to
69
+ ``(10s connect, 600s read)``.
70
+ session: An optional pre-configured :class:`requests.Session`.
71
+
72
+ Example::
73
+
74
+ from gaston import GastonClient
75
+
76
+ client = GastonClient(token="gapi-...")
77
+ me = client.me()
78
+ print(me.email, me.usage.files_left)
79
+
80
+ The client can also be used as a context manager to ensure the underlying
81
+ HTTP session is closed::
82
+
83
+ with GastonClient(token="gapi-...") as client:
84
+ client.transcribe("interview.mp4", lang="en")
85
+ """
86
+
87
+ def __init__(
88
+ self,
89
+ token: str | None = None,
90
+ timeout: TimeoutType = DEFAULT_TIMEOUT,
91
+ upload_timeout: TimeoutType = DEFAULT_UPLOAD_TIMEOUT,
92
+ session: requests.Session | None = None,
93
+ ) -> None:
94
+ token = token or os.getenv("GASTON_API_TOKEN")
95
+ if not token:
96
+ raise GastonError(
97
+ "An API token is required (pass token=... or set GASTON_API_TOKEN)."
98
+ )
99
+ self.token = token
100
+ self.base_url = (os.getenv(_BASE_URL_OVERRIDE_ENV) or BASE_URL).rstrip("/")
101
+ self.timeout = timeout
102
+ self.upload_timeout = upload_timeout
103
+ self._session = session or requests.Session()
104
+ self._session.headers.update({"token": token})
105
+
106
+ # -- context manager -------------------------------------------------
107
+
108
+ def __enter__(self) -> "GastonClient":
109
+ return self
110
+
111
+ def __exit__(self, *_exc: object) -> None:
112
+ self.close()
113
+
114
+ def close(self) -> None:
115
+ """Close the underlying HTTP session."""
116
+ self._session.close()
117
+
118
+ # -- low level -------------------------------------------------------
119
+
120
+ def _request(
121
+ self,
122
+ method: str,
123
+ path: str,
124
+ *,
125
+ params: Mapping[str, Any] | None = None,
126
+ files: Mapping[str, Any] | None = None,
127
+ timeout: TimeoutType | _Unset = _UNSET,
128
+ ) -> Any:
129
+ url = f"{self.base_url}{path}"
130
+ # Drop params that are None so we don't send "dir_id=None" literally.
131
+ clean_params = None
132
+ if params is not None:
133
+ clean_params = {k: v for k, v in params.items() if v is not None}
134
+
135
+ try:
136
+ resp = self._session.request(
137
+ method,
138
+ url,
139
+ params=clean_params,
140
+ files=files,
141
+ timeout=self.timeout if isinstance(timeout, _Unset) else timeout,
142
+ )
143
+ except requests.RequestException as exc:
144
+ raise GastonError(f"Request to {url} failed: {exc}") from exc
145
+
146
+ return self._handle_response(resp)
147
+
148
+ @staticmethod
149
+ def _handle_response(resp: requests.Response) -> Any:
150
+ try:
151
+ body = resp.json()
152
+ except ValueError:
153
+ if resp.ok:
154
+ return resp.text
155
+ raise GastonAPIError(
156
+ f"Non-JSON response from server: {resp.text[:200]}",
157
+ status_code=resp.status_code,
158
+ )
159
+
160
+ # The API signals failures both via HTTP status codes and via an
161
+ # ``error`` key in an otherwise 200 response. Handle both.
162
+ error_message = body.get("error") if isinstance(body, dict) else None
163
+
164
+ if not resp.ok or error_message is not None:
165
+ exc_cls = _STATUS_EXCEPTIONS.get(resp.status_code, GastonAPIError)
166
+ details = None
167
+ if isinstance(body, dict):
168
+ details = (
169
+ body.get("details")
170
+ or body.get("supported_languages")
171
+ or body.get("supportedLanguages")
172
+ )
173
+ raise exc_cls(
174
+ message=error_message or f"Request failed with status {resp.status_code}",
175
+ status_code=resp.status_code,
176
+ details=details,
177
+ payload=body,
178
+ )
179
+
180
+ return body
181
+
182
+ # -- user ------------------------------------------------------------
183
+
184
+ def me(self) -> User:
185
+ """Return the authenticated user and remaining usage."""
186
+ return User.from_dict(self._request("GET", "/user/me"))
187
+
188
+ # -- media -----------------------------------------------------------
189
+
190
+ def list_media(self, page: int = 1, dir_id: int | None = None) -> MediaList:
191
+ """List media in the library, paginated.
192
+
193
+ Args:
194
+ page: 1-based page number.
195
+ dir_id: Restrict to a directory (``None`` for the root listing).
196
+ """
197
+ return MediaList.from_dict(
198
+ self._request("GET", "/media/list", params={"page": page, "dir_id": dir_id})
199
+ )
200
+
201
+ def get_media(self, media_id: str, lang: str | None = None) -> Media:
202
+ """Fetch a single media item including its sentences.
203
+
204
+ Args:
205
+ media_id: The public media id (``uid``).
206
+ lang: Return sentences in this language (defaults to the original).
207
+ """
208
+ return Media.from_dict(
209
+ self._request("GET", "/media", params={"media_id": media_id, "lang": lang})
210
+ )
211
+
212
+ def move_media(self, media_id: str, dir_id: int | None = None) -> dict[str, Any]:
213
+ """Move a media item into a directory (``dir_id=None`` for root)."""
214
+ return self._request("PATCH", "/media", params={"media_id": media_id, "dir_id": dir_id})
215
+
216
+ def transcribe(
217
+ self,
218
+ file: str | os.PathLike[str] | BinaryIO | IO[bytes],
219
+ lang: str | None = None,
220
+ dir_id: int | None = None,
221
+ title: str | None = None,
222
+ timeout: TimeoutType | _Unset = _UNSET,
223
+ ) -> TranscribeResult:
224
+ """Upload a media file and queue it for transcription.
225
+
226
+ Args:
227
+ file: Path to a file, or an already-open binary file object.
228
+ lang: Source language hint (see :data:`SUPPORTED_LANGUAGES`). If
229
+ omitted the language is auto-detected.
230
+ dir_id: Directory to place the media into.
231
+ title: Display title (defaults to the file name).
232
+ timeout: Override the client's ``upload_timeout`` for this call.
233
+
234
+ Returns:
235
+ A :class:`TranscribeResult` with the new media id and state.
236
+ """
237
+ if lang is not None and lang not in SUPPORTED_LANGUAGES:
238
+ raise BadRequestError(
239
+ f"Language '{lang}' is not supported.", details=list(SUPPORTED_LANGUAGES)
240
+ )
241
+
242
+ params = {"lang": lang, "dir_id": dir_id, "title": title}
243
+
244
+ should_close = False
245
+ if isinstance(file, (str, os.PathLike)):
246
+ fh: IO[bytes] = open(file, "rb")
247
+ should_close = True
248
+ else:
249
+ fh = file
250
+ effective_timeout = self.upload_timeout if isinstance(timeout, _Unset) else timeout
251
+ try:
252
+ files = {"file": fh}
253
+ data = self._request(
254
+ "POST", "/media/transcribe", params=params, files=files, timeout=effective_timeout
255
+ )
256
+ finally:
257
+ if should_close:
258
+ fh.close()
259
+ return TranscribeResult.from_dict(data)
260
+
261
+ def transcribe_url(
262
+ self,
263
+ url: str,
264
+ lang: str | None = None,
265
+ dir_id: int | None = None,
266
+ ) -> TranscribeResult:
267
+ """Queue transcription of a remote media URL (YouTube or web).
268
+
269
+ Args:
270
+ url: The media URL to download and transcribe.
271
+ lang: Source language hint (see :data:`SUPPORTED_LANGUAGES`).
272
+ dir_id: Directory to place the media into.
273
+ """
274
+ if lang is not None and lang not in SUPPORTED_LANGUAGES:
275
+ raise BadRequestError(
276
+ f"Language '{lang}' is not supported.", details=list(SUPPORTED_LANGUAGES)
277
+ )
278
+ data = self._request(
279
+ "POST", "/media/transcribe-url", params={"url": url, "lang": lang, "dir_id": dir_id}
280
+ )
281
+ return TranscribeResult.from_dict(data)
282
+
283
+ def translate(self, media_id: str, target_lang: str) -> TranslateResult:
284
+ """Queue a translation of a transcribed media into ``target_lang``.
285
+
286
+ Args:
287
+ media_id: The public media id.
288
+ target_lang: Target language short code (see
289
+ :data:`TRANSLATION_LANGUAGES`).
290
+ """
291
+ target_lang = target_lang.lower().strip()
292
+ if target_lang not in TRANSLATION_LANGUAGES:
293
+ raise BadRequestError(
294
+ f"Language '{target_lang}' is not a supported translation target.",
295
+ details=list(TRANSLATION_LANGUAGES),
296
+ )
297
+ data = self._request(
298
+ "PATCH", "/media/translate", params={"media_id": media_id, "target_lang": target_lang}
299
+ )
300
+ return TranslateResult.from_dict(data)
301
+
302
+ def diarize(
303
+ self,
304
+ media_id: str,
305
+ lang: str,
306
+ speakers: int | None = None,
307
+ ) -> TranscribeResult:
308
+ """Queue speaker diarization for a (translated) media in ``lang``.
309
+
310
+ Args:
311
+ media_id: The public media id.
312
+ lang: Language of the transcript to diarize (must be fully
313
+ translated first).
314
+ speakers: Optional expected number of speakers.
315
+ """
316
+ data = self._request(
317
+ "PATCH",
318
+ "/media/diarize",
319
+ params={"media_id": media_id, "lang": lang, "speakers": speakers},
320
+ )
321
+ return TranscribeResult.from_dict(data)
322
+
323
+ # -- directories -----------------------------------------------------
324
+
325
+ def directory_tree(self) -> dict[str, Any]:
326
+ """Return the full nested directory tree for the user."""
327
+ return self._request("GET", "/directory/tree")
328
+
329
+ def create_directory(self, title: str, dir_id: int | None = None) -> Directory:
330
+ """Create a directory, optionally nested under ``dir_id``."""
331
+ return Directory.from_dict(
332
+ self._request("POST", "/directory", params={"title": title, "dir_id": dir_id})
333
+ )
334
+
335
+ def delete_directory(self, dir_id: int) -> bool:
336
+ """Delete a directory. Returns ``True`` on success."""
337
+ data = self._request("DELETE", "/directory", params={"dir_id": dir_id})
338
+ return bool(data.get("result")) if isinstance(data, dict) else bool(data)
339
+
340
+ def update_directory(
341
+ self,
342
+ dir_id: int,
343
+ title: str,
344
+ parent_id: int | None = None,
345
+ ) -> Directory:
346
+ """Rename a directory and/or move it under ``parent_id``."""
347
+ return Directory.from_dict(
348
+ self._request(
349
+ "PATCH",
350
+ "/directory",
351
+ params={"dir_id": dir_id, "title": title, "parent_id": parent_id},
352
+ )
353
+ )
354
+
355
+ # -- search ----------------------------------------------------------
356
+
357
+ def search(
358
+ self,
359
+ query: str,
360
+ from_: int = 0,
361
+ max_: int = 50,
362
+ dir_ids: Iterable[str | int] | None = None,
363
+ lang: str | None = None,
364
+ ) -> SearchResults:
365
+ """Search for sentences across all transcribed media.
366
+
367
+ The query supports a subset of the Lucene ``query_string`` syntax:
368
+
369
+ * Boolean operators ``AND`` / ``OR`` / ``NOT`` (e.g. ``cats AND dogs``).
370
+ * Grouping with parentheses (e.g. ``(cats OR dogs) AND vet``).
371
+ * Quoted phrases for exact matches (e.g. ``"climate change"``).
372
+ * Trailing wildcards (e.g. ``transcri*``). Leading wildcards
373
+ (``*tion``) are not allowed and are stripped.
374
+
375
+ Field selectors, fuzzy/proximity (``~``), boosts (``^``) and ranges are
376
+ stripped server-side.
377
+
378
+ Args:
379
+ query: Full text query (must be at least 3 characters).
380
+ from_: Offset of the first result (for pagination).
381
+ max_: Maximum number of results to return.
382
+ dir_ids: Restrict the search to one or more directory ids.
383
+ lang: Restrict the search to a single language.
384
+ """
385
+ if len(query) < 3:
386
+ raise BadRequestError("Query must be at least 3 characters.")
387
+ params: dict[str, Any] = {
388
+ "query": query,
389
+ "_from": from_,
390
+ "_max": max_,
391
+ "lang": lang,
392
+ }
393
+ if dir_ids is not None:
394
+ params["dir_ids"] = [str(d) for d in dir_ids]
395
+ return SearchResults.from_dict(self._request("GET", "/sentence/search", params=params))
gaston/constants.py ADDED
@@ -0,0 +1,43 @@
1
+ """Constants mirrored from the Gaston API.
2
+
3
+ These are kept in sync with the server so the client can validate input
4
+ locally before issuing a request.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ #: Languages accepted by the transcription endpoints (Whisper language codes).
10
+ SUPPORTED_LANGUAGES: tuple[str, ...] = (
11
+ "af", "am", "ar", "as", "az", "ba", "be", "bg", "bn", "bo", "br", "bs", "ca", "cs", "cy", "da", "de",
12
+ "el", "en", "es", "et", "eu", "fa", "fi", "fo", "fr", "gl", "gu", "ha", "haw", "he", "hi", "hr", "ht",
13
+ "hu", "hy", "id", "is", "it", "ja", "jw", "ka", "kk", "km", "kn", "ko", "la", "lb", "ln", "lo", "lt",
14
+ "lv", "mg", "mi", "mk", "ml", "mn", "mr", "ms", "mt", "my", "ne", "nl", "nn", "no", "oc", "pa", "pl",
15
+ "ps", "pt", "ro", "ru", "sa", "sd", "si", "sk", "sl", "sn", "so", "sq", "sr", "su", "sv", "sw", "ta",
16
+ "te", "tg", "th", "tk", "tl", "tr", "tt", "uk", "ur", "uz", "vi", "yi", "yo", "zh", "yue",
17
+ )
18
+
19
+ #: Languages the translation endpoint can translate into (short code -> FLORES code).
20
+ TRANSLATION_OPTIONS: dict[str, str] = {
21
+ "en": "eng_Latn", "de": "deu_Latn", "es": "spa_Latn", "pl": "pol_Latn", "hu": "hun_Latn",
22
+ "cs": "ces_Latn", "sk": "slk_Latn", "uk": "ukr_Cyrl", "bg": "bul_Cyrl", "hr": "hrv_Latn",
23
+ "da": "dan_Latn", "nl": "nld_Latn", "et": "est_Latn", "fi": "fin_Latn", "fr": "fra_Latn",
24
+ "el": "ell_Grek", "it": "ita_Latn", "lv": "lav_Latn", "lt": "lit_Latn", "mt": "mlt_Latn",
25
+ "pt": "por_Latn", "ro": "ron_Latn", "sl": "slv_Latn", "sv": "swe_Latn", "zh": "zho_Hans",
26
+ "ar": "arb_Arab", "hi": "hin_Deva", "ja": "jpn_Jpan", "id": "ind_Latn", "is": "isl_Latn",
27
+ "he": "heb_Hebr", "kk": "kaz_Cyrl", "ko": "kor_Hang", "lb": "ltz_Latn", "mk": "mkd_Cyrl",
28
+ "tr": "tur_Latn", "vi": "vie_Latn", "bn": "ben_Beng", "be": "bel_Cyrl", "ka": "kat_Geor",
29
+ "fa": "pes_Arab", "ur": "urd_Arab", "te": "tel_Telu", "ru": "rus_Cyrl",
30
+ }
31
+
32
+ #: Languages available as translation targets.
33
+ TRANSLATION_LANGUAGES: tuple[str, ...] = tuple(TRANSLATION_OPTIONS.keys())
34
+
35
+ #: Possible values of ``Media.state``.
36
+ MEDIA_STATE_PENDING = "pending"
37
+ MEDIA_STATE_UPLOADED = "uploaded"
38
+ MEDIA_STATE_TRANSCRIBED = "transcribed"
39
+
40
+ #: Possible values of ``Media.origin``.
41
+ MEDIA_ORIGIN_UPLOADED = "up"
42
+ MEDIA_ORIGIN_YOUTUBE = "yt"
43
+ MEDIA_ORIGIN_WEB = "web"
gaston/exceptions.py ADDED
@@ -0,0 +1,50 @@
1
+ """Exceptions raised by the Gaston API client."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+
8
+ class GastonError(Exception):
9
+ """Base class for all errors raised by the client."""
10
+
11
+
12
+ class GastonAPIError(GastonError):
13
+ """Raised when the API returns an error response.
14
+
15
+ Attributes:
16
+ message: Human readable error message returned by the API.
17
+ status_code: HTTP status code of the response (if any).
18
+ details: Optional extra payload returned alongside the error.
19
+ payload: The full decoded response body.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ message: str,
25
+ status_code: int | None = None,
26
+ details: Any | None = None,
27
+ payload: Any | None = None,
28
+ ) -> None:
29
+ self.message = message
30
+ self.status_code = status_code
31
+ self.details = details
32
+ self.payload = payload
33
+ prefix = f"[{status_code}] " if status_code is not None else ""
34
+ super().__init__(f"{prefix}{message}")
35
+
36
+
37
+ class AuthenticationError(GastonAPIError):
38
+ """Raised when the token is invalid or the user is disabled (HTTP 403)."""
39
+
40
+
41
+ class NotFoundError(GastonAPIError):
42
+ """Raised when a requested resource does not exist (HTTP 404)."""
43
+
44
+
45
+ class RateLimitError(GastonAPIError):
46
+ """Raised when the monthly file/API limit is exhausted (HTTP 429)."""
47
+
48
+
49
+ class BadRequestError(GastonAPIError):
50
+ """Raised for invalid requests (HTTP 400)."""
gaston/models.py ADDED
@@ -0,0 +1,209 @@
1
+ """Typed models for Gaston API responses.
2
+
3
+ Every model keeps the original decoded payload in ``raw`` so that fields not
4
+ explicitly mapped here are still accessible.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+ from typing import Any
11
+
12
+
13
+ @dataclass
14
+ class Usage:
15
+ """Account usage information."""
16
+
17
+ files_left: int
18
+ raw: dict[str, Any] = field(default_factory=dict, repr=False)
19
+
20
+ @classmethod
21
+ def from_dict(cls, data: dict[str, Any]) -> "Usage":
22
+ return cls(files_left=data.get("filesLeft", 0), raw=data)
23
+
24
+
25
+ @dataclass
26
+ class User:
27
+ """The authenticated user (``GET /user/me``)."""
28
+
29
+ id: str
30
+ email: str
31
+ enabled: bool
32
+ usage: Usage
33
+ raw: dict[str, Any] = field(default_factory=dict, repr=False)
34
+
35
+ @classmethod
36
+ def from_dict(cls, data: dict[str, Any]) -> "User":
37
+ return cls(
38
+ id=data.get("id"),
39
+ email=data.get("email"),
40
+ enabled=data.get("enabled", False),
41
+ usage=Usage.from_dict(data.get("usage", {})),
42
+ raw=data,
43
+ )
44
+
45
+
46
+ @dataclass
47
+ class Sentence:
48
+ """A single transcribed (or translated) sentence."""
49
+
50
+ id: int | None
51
+ speaker: Any | None
52
+ raw: dict[str, Any] = field(default_factory=dict, repr=False)
53
+
54
+ @classmethod
55
+ def from_dict(cls, data: dict[str, Any]) -> "Sentence":
56
+ return cls(id=data.get("id"), speaker=data.get("speaker"), raw=data)
57
+
58
+ @property
59
+ def text(self) -> str | None:
60
+ return self.raw.get("text") or self.raw.get("sentence")
61
+
62
+
63
+ @dataclass
64
+ class Media:
65
+ """Full media detail (``GET /media``)."""
66
+
67
+ id: str
68
+ title: str | None
69
+ state: str | None
70
+ origin: str | None
71
+ origin_url: str | None
72
+ file: str | None
73
+ error: str | None
74
+ thumbnail: str | None
75
+ duration: int | None
76
+ published_at: Any | None
77
+ added_at: Any | None
78
+ transcription_progress: int | None
79
+ download_progress: int | None
80
+ language: str | None
81
+ available_languages: dict[str, Any]
82
+ sentences: list[Sentence]
83
+ diarized_sentences: dict[str, Any]
84
+ raw: dict[str, Any] = field(default_factory=dict, repr=False)
85
+
86
+ @classmethod
87
+ def from_dict(cls, data: dict[str, Any]) -> "Media":
88
+ return cls(
89
+ id=data.get("id"),
90
+ title=data.get("title"),
91
+ state=data.get("state"),
92
+ origin=data.get("origin"),
93
+ origin_url=data.get("originUrl"),
94
+ file=data.get("file"),
95
+ error=data.get("error"),
96
+ thumbnail=data.get("thumbnail"),
97
+ duration=data.get("duration"),
98
+ published_at=data.get("published_at"),
99
+ added_at=data.get("added_at"),
100
+ transcription_progress=data.get("transcription_progress"),
101
+ download_progress=data.get("download_progress"),
102
+ language=data.get("language"),
103
+ available_languages=data.get("available_languages") or {},
104
+ sentences=[Sentence.from_dict(s) for s in data.get("sentences") or []],
105
+ diarized_sentences=data.get("diarized_sentences") or {},
106
+ raw=data,
107
+ )
108
+
109
+
110
+ @dataclass
111
+ class MediaList:
112
+ """A page of media items (``GET /media/list``)."""
113
+
114
+ media: list[dict[str, Any]]
115
+ total: int
116
+ pages: int
117
+ raw: dict[str, Any] = field(default_factory=dict, repr=False)
118
+
119
+ @classmethod
120
+ def from_dict(cls, data: dict[str, Any]) -> "MediaList":
121
+ return cls(
122
+ media=data.get("media") or [],
123
+ total=data.get("total", 0),
124
+ pages=data.get("pages", 0),
125
+ raw=data,
126
+ )
127
+
128
+ def __iter__(self):
129
+ return iter(self.media)
130
+
131
+ def __len__(self) -> int:
132
+ return len(self.media)
133
+
134
+
135
+ @dataclass
136
+ class TranscribeResult:
137
+ """Result of a transcription request (``id`` + ``state``)."""
138
+
139
+ id: str
140
+ state: str | None
141
+ raw: dict[str, Any] = field(default_factory=dict, repr=False)
142
+
143
+ @classmethod
144
+ def from_dict(cls, data: dict[str, Any]) -> "TranscribeResult":
145
+ return cls(id=data.get("id"), state=data.get("state"), raw=data)
146
+
147
+
148
+ @dataclass
149
+ class TranslateResult:
150
+ """Result of a translation request."""
151
+
152
+ id: str
153
+ available_languages: dict[str, Any]
154
+ raw: dict[str, Any] = field(default_factory=dict, repr=False)
155
+
156
+ @classmethod
157
+ def from_dict(cls, data: dict[str, Any]) -> "TranslateResult":
158
+ return cls(
159
+ id=data.get("id"),
160
+ available_languages=data.get("available_languages") or {},
161
+ raw=data,
162
+ )
163
+
164
+
165
+ @dataclass
166
+ class Directory:
167
+ """A directory in the user's library."""
168
+
169
+ id: int | None
170
+ title: str | None
171
+ raw: dict[str, Any] = field(default_factory=dict, repr=False)
172
+
173
+ @classmethod
174
+ def from_dict(cls, data: dict[str, Any]) -> "Directory":
175
+ return cls(id=data.get("id"), title=data.get("title"), raw=data)
176
+
177
+
178
+ @dataclass
179
+ class SearchResults:
180
+ """Results of a sentence search (``GET /sentence/search``).
181
+
182
+ Each entry in :attr:`results` is a dict with two keys: ``_sentence`` (the
183
+ matched sentence and its ``media`` metadata) and ``_highlight`` (the matched
184
+ fragments with ``<hlt>...</hlt>`` markers around the hit terms).
185
+ """
186
+
187
+ results: list[dict[str, Any]]
188
+ total: int | None
189
+ raw: dict[str, Any] = field(default_factory=dict, repr=False)
190
+
191
+ @classmethod
192
+ def from_dict(cls, data: dict[str, Any]) -> "SearchResults":
193
+ results: list[dict[str, Any]] = []
194
+ total: int | None = None
195
+ if isinstance(data, dict):
196
+ results = data.get("results") or []
197
+ # ``total`` is an Elasticsearch total object: {"value": N, ...}.
198
+ total_obj = data.get("total")
199
+ if isinstance(total_obj, dict):
200
+ total = total_obj.get("value")
201
+ elif isinstance(total_obj, int):
202
+ total = total_obj
203
+ return cls(results=results, total=total, raw=data)
204
+
205
+ def __iter__(self):
206
+ return iter(self.results)
207
+
208
+ def __len__(self) -> int:
209
+ return len(self.results)
@@ -0,0 +1,228 @@
1
+ Metadata-Version: 2.4
2
+ Name: gaston
3
+ Version: 0.2.0
4
+ Summary: Python client for the Gaston API (transcription, translation and sentence search).
5
+ Author-email: "Streams s.r.o." <contact@streams.guru>
6
+ License: MIT
7
+ Project-URL: Homepage, https://gaston.live
8
+ Project-URL: Documentation, https://www.gaston.live/en/api
9
+ Keywords: gaston,transcription,translation,speech-to-text,api-client
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: requests>=2.28
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7.0; extra == "dev"
27
+ Requires-Dist: responses>=0.23; extra == "dev"
28
+ Dynamic: license-file
29
+
30
+ # Gaston API Client
31
+
32
+ A small, typed Python client for the **Gaston API**: transcription, translation
33
+ and full-text search of sentences within transcribed recordings.
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install gaston
39
+ ```
40
+
41
+ Requires Python 3.10+.
42
+
43
+ For local development from a checkout instead:
44
+
45
+ ```bash
46
+ pip install -e .
47
+ ```
48
+
49
+ ## Quick start
50
+
51
+ ```python
52
+ from gaston import GastonClient
53
+
54
+ client = GastonClient(token="gapi-...")
55
+
56
+ # Who am I + remaining quota
57
+ me = client.me()
58
+ print(me.email, "files left:", me.usage.files_left)
59
+
60
+ # Transcribe a local file
61
+ result = client.transcribe("interview.mp4", lang="en", title="My interview")
62
+ print(result.id, result.state)
63
+
64
+ # Transcribe from a URL (YouTube or web)
65
+ client.transcribe_url("https://youtu.be/dQw4w9WgXcQ", lang="en")
66
+
67
+ # Translate an existing transcription
68
+ client.translate(result.id, target_lang="de")
69
+
70
+ # Speaker diarization (requires a completed translation in that language)
71
+ client.diarize(result.id, lang="de", speakers=2)
72
+
73
+ # Fetch a media item with its sentences
74
+ media = client.get_media(result.id, lang="en")
75
+ for sentence in media.sentences:
76
+ print(sentence.id, sentence.text, sentence.speaker)
77
+
78
+ # Full text search across the whole library
79
+ results = client.search("climate change", max_=20)
80
+ print("total matches:", results.total)
81
+ for hit in results:
82
+ print(hit["_sentence"]["body"], "->", hit["_highlight"]["body"])
83
+ ```
84
+
85
+ See [Search](#search) for query syntax and filtering options.
86
+
87
+ ### Configuration
88
+
89
+ Generate an API token in the Gaston app under
90
+ [Settings -> API](https://www.gaston.live/user/settings/api/en). Full endpoint
91
+ documentation is available at <https://www.gaston.live/en/api>.
92
+
93
+ The token can be supplied directly or via an environment variable:
94
+
95
+ | Argument | Environment variable | Default |
96
+ |----------|----------------------|------------|
97
+ | `token` | `GASTON_API_TOKEN` | (required) |
98
+
99
+ ```python
100
+ # Uses GASTON_API_TOKEN from the environment
101
+ with GastonClient() as client:
102
+ ...
103
+ ```
104
+
105
+ ### Timeouts
106
+
107
+ Ordinary requests use a 30s timeout. The file upload in `transcribe` can take
108
+ minutes for large files, so it uses a separate, more generous `upload_timeout`
109
+ (default `(10s connect, 600s read)`).
110
+
111
+ A timeout may be a single float, a `(connect, read)` tuple, or `None` to wait
112
+ indefinitely.
113
+
114
+ ```python
115
+ # Customise the defaults for all calls
116
+ client = GastonClient(
117
+ token="gapi-...",
118
+ timeout=30,
119
+ upload_timeout=(10, 1800), # allow up to 30 min to upload large files
120
+ )
121
+
122
+ # Or override per call (e.g. no read timeout for a very large file)
123
+ client.transcribe("huge-recording.mp4", timeout=(10, None))
124
+ ```
125
+
126
+ ## Directories
127
+
128
+ ```python
129
+ folder = client.create_directory("Podcasts")
130
+ client.update_directory(folder.id, title="Podcast archive")
131
+ client.move_media(media_id="me...", dir_id=folder.id)
132
+ tree = client.directory_tree()
133
+ client.delete_directory(folder.id)
134
+ ```
135
+
136
+ ## Search
137
+
138
+ `client.search(query, from_=0, max_=50, dir_ids=None, lang=None)` runs a
139
+ full-text search over every sentence in your transcribed media.
140
+
141
+ ### Query syntax
142
+
143
+ The query supports a subset of the Lucene `query_string` syntax:
144
+
145
+ | Feature | Example | Notes |
146
+ |-------------------|--------------------------|------------------------------------------|
147
+ | Boolean `AND` | `cats AND dogs` | both terms must appear |
148
+ | Boolean `OR` | `cats OR dogs` | either term |
149
+ | Boolean `NOT` | `cats NOT dogs` | exclude a term |
150
+ | Grouping | `(cats OR dogs) AND vet` | combine operators with parentheses |
151
+ | Exact phrase | `"climate change"` | quoted terms match as a phrase |
152
+ | Trailing wildcard | `transcri*` | matches `transcribe`, `transcription`... |
153
+
154
+ Leading wildcards (`*tion`), field selectors, fuzzy (`~`), boosts (`^`) and
155
+ ranges are not supported and are stripped server-side. Queries must be at least
156
+ 3 characters.
157
+
158
+ ```python
159
+ results = client.search('(invoice OR receipt) AND "due date" NOT draft')
160
+ ```
161
+
162
+ ### Filtering and pagination
163
+
164
+ ```python
165
+ # Search within a single directory
166
+ client.search("budget", dir_ids=[42])
167
+
168
+ # Search across several directories
169
+ client.search("budget", dir_ids=[42, 43, 7])
170
+
171
+ # Restrict to one language, and page through results
172
+ page2 = client.search("budget", from_=50, max_=50, lang="en")
173
+ ```
174
+
175
+ ### Reading results
176
+
177
+ `search()` returns a `SearchResults` object. Iterate it for hits, or read
178
+ `.total` for the overall match count. Each hit is a dict with:
179
+
180
+ - `_sentence` - the matched sentence plus its `media` metadata (id, title,
181
+ duration, directory, thumbnail, file, originUrl).
182
+ - `_highlight` - matched fragments with the hit terms wrapped in
183
+ `<hlt>...</hlt>` tags.
184
+
185
+ ```python
186
+ results = client.search("climate change", max_=20)
187
+ print("total matches:", results.total)
188
+ for hit in results:
189
+ sentence = hit["_sentence"]
190
+ print(sentence["media"]["title"], "|", hit["_highlight"]["body"])
191
+ ```
192
+
193
+ ## Error handling
194
+
195
+ All failures raise a subclass of `GastonError`:
196
+
197
+ ```python
198
+ from gaston import GastonClient, AuthenticationError, RateLimitError, NotFoundError
199
+
200
+ try:
201
+ client.transcribe("clip.mp4")
202
+ except RateLimitError:
203
+ print("File limit reached")
204
+ except AuthenticationError:
205
+ print("Bad token / disabled account")
206
+ except NotFoundError as e:
207
+ print("Not found:", e.message)
208
+ ```
209
+
210
+ | Exception | Trigger |
211
+ |-----------------------|------------------------------------------|
212
+ | `AuthenticationError` | HTTP 403, invalid token / disabled user |
213
+ | `BadRequestError` | HTTP 400, invalid parameters |
214
+ | `NotFoundError` | HTTP 404, resource not found |
215
+ | `RateLimitError` | HTTP 429, usage limit exceeded |
216
+ | `GastonAPIError` | any other API error |
217
+
218
+ Every exception carries `.status_code`, `.message`, `.details` and the raw
219
+ `.payload`.
220
+
221
+ ## Supported languages
222
+
223
+ ```python
224
+ from gaston import SUPPORTED_LANGUAGES, TRANSLATION_LANGUAGES
225
+ ```
226
+
227
+ `SUPPORTED_LANGUAGES` lists transcription source languages; `TRANSLATION_LANGUAGES`
228
+ lists the available translation targets.
@@ -0,0 +1,10 @@
1
+ gaston/__init__.py,sha256=sgIkG-WIsaPydkKTXYpIOVUHyYSLOKnZ9Lo1BiCg4wc,1104
2
+ gaston/client.py,sha256=lglH9_98ZV39LJxRaA-IAw7XUpbSIxub800xb3cRmAE,14170
3
+ gaston/constants.py,sha256=6Mxtc3nMkLJJvqlFEwapGF8WRCO4wL2Yw8x1qQPKIHw,2287
4
+ gaston/exceptions.py,sha256=E7j26kBIheRTRzwmtyyeKzPsl6T4e4QzBk4tYisK0os,1436
5
+ gaston/models.py,sha256=qycz0Gil9XCN_ra7s5gY08dxiahVNlxB1Y8oAMyRvsQ,5994
6
+ gaston-0.2.0.dist-info/licenses/LICENSE,sha256=4kqoIqcVwtUQeMI4Yy-mS3s_u3UwaFsHagw6QeecH9Q,1070
7
+ gaston-0.2.0.dist-info/METADATA,sha256=hM5eyrI2HkLKdnjfbMXt-METNBDizsmn1C93KXhsCZg,7379
8
+ gaston-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
9
+ gaston-0.2.0.dist-info/top_level.txt,sha256=gccKrT4ad62T38FlJeHInqZlh5pueads-oHN79TMreA,7
10
+ gaston-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Streams s.r.o.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ gaston