gaston 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gaston/__init__.py +60 -0
- gaston/client.py +395 -0
- gaston/constants.py +43 -0
- gaston/exceptions.py +50 -0
- gaston/models.py +209 -0
- gaston-0.2.0.dist-info/METADATA +228 -0
- gaston-0.2.0.dist-info/RECORD +10 -0
- gaston-0.2.0.dist-info/WHEEL +5 -0
- gaston-0.2.0.dist-info/licenses/LICENSE +21 -0
- gaston-0.2.0.dist-info/top_level.txt +1 -0
gaston/__init__.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Python client library for the Gaston API.
|
|
2
|
+
|
|
3
|
+
Transcription, translation and full-text search of sentences within
|
|
4
|
+
transcribed recordings.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from .client import GastonClient
|
|
10
|
+
from .constants import (
|
|
11
|
+
SUPPORTED_LANGUAGES,
|
|
12
|
+
TRANSLATION_LANGUAGES,
|
|
13
|
+
TRANSLATION_OPTIONS,
|
|
14
|
+
)
|
|
15
|
+
from .exceptions import (
|
|
16
|
+
AuthenticationError,
|
|
17
|
+
BadRequestError,
|
|
18
|
+
GastonAPIError,
|
|
19
|
+
GastonError,
|
|
20
|
+
NotFoundError,
|
|
21
|
+
RateLimitError,
|
|
22
|
+
)
|
|
23
|
+
from .models import (
|
|
24
|
+
Directory,
|
|
25
|
+
Media,
|
|
26
|
+
MediaList,
|
|
27
|
+
SearchResults,
|
|
28
|
+
Sentence,
|
|
29
|
+
TranscribeResult,
|
|
30
|
+
TranslateResult,
|
|
31
|
+
Usage,
|
|
32
|
+
User,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
__version__ = "0.2.0"
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
"GastonClient",
|
|
39
|
+
# exceptions
|
|
40
|
+
"GastonError",
|
|
41
|
+
"GastonAPIError",
|
|
42
|
+
"AuthenticationError",
|
|
43
|
+
"BadRequestError",
|
|
44
|
+
"NotFoundError",
|
|
45
|
+
"RateLimitError",
|
|
46
|
+
# models
|
|
47
|
+
"User",
|
|
48
|
+
"Usage",
|
|
49
|
+
"Media",
|
|
50
|
+
"MediaList",
|
|
51
|
+
"Sentence",
|
|
52
|
+
"Directory",
|
|
53
|
+
"TranscribeResult",
|
|
54
|
+
"TranslateResult",
|
|
55
|
+
"SearchResults",
|
|
56
|
+
# constants
|
|
57
|
+
"SUPPORTED_LANGUAGES",
|
|
58
|
+
"TRANSLATION_LANGUAGES",
|
|
59
|
+
"TRANSLATION_OPTIONS",
|
|
60
|
+
]
|
gaston/client.py
ADDED
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
"""Synchronous client for the Gaston API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import IO, Any, BinaryIO, Iterable, Mapping, Tuple, Union
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
from .constants import SUPPORTED_LANGUAGES, TRANSLATION_LANGUAGES
|
|
11
|
+
from .exceptions import (
|
|
12
|
+
AuthenticationError,
|
|
13
|
+
BadRequestError,
|
|
14
|
+
GastonAPIError,
|
|
15
|
+
GastonError,
|
|
16
|
+
NotFoundError,
|
|
17
|
+
RateLimitError,
|
|
18
|
+
)
|
|
19
|
+
from .models import (
|
|
20
|
+
Directory,
|
|
21
|
+
Media,
|
|
22
|
+
MediaList,
|
|
23
|
+
SearchResults,
|
|
24
|
+
TranscribeResult,
|
|
25
|
+
TranslateResult,
|
|
26
|
+
User,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
BASE_URL = "https://api.gaston.live"
|
|
30
|
+
# Undocumented escape hatch for development/testing only.
|
|
31
|
+
_BASE_URL_OVERRIDE_ENV = "GASTON_API_URL_OVERRIDE"
|
|
32
|
+
|
|
33
|
+
# Timeouts are passed straight to requests: a single value covers both connect
|
|
34
|
+
# and read; a (connect, read) tuple sets them separately; None waits forever.
|
|
35
|
+
TimeoutType = Union[float, Tuple[float, float], None]
|
|
36
|
+
|
|
37
|
+
# Quick metadata calls.
|
|
38
|
+
DEFAULT_TIMEOUT: TimeoutType = 30.0
|
|
39
|
+
# Endpoints that upload a file or fetch a remote URL can legitimately take
|
|
40
|
+
# minutes, so they get a much more generous read timeout by default.
|
|
41
|
+
DEFAULT_UPLOAD_TIMEOUT: TimeoutType = (10.0, 600.0)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class _Unset:
|
|
45
|
+
"""Sentinel for "argument not provided" (since None is a valid timeout)."""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
_UNSET = _Unset()
|
|
49
|
+
|
|
50
|
+
_STATUS_EXCEPTIONS = {
|
|
51
|
+
400: BadRequestError,
|
|
52
|
+
403: AuthenticationError,
|
|
53
|
+
404: NotFoundError,
|
|
54
|
+
429: RateLimitError,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class GastonClient:
|
|
59
|
+
"""A client for the Gaston transcription/translation/search API.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
token: API token (the ``gapi-...`` token issued by the platform).
|
|
63
|
+
Falls back to the ``GASTON_API_TOKEN`` environment variable.
|
|
64
|
+
timeout: Timeout for ordinary requests. A single float covers both
|
|
65
|
+
connect and read; a ``(connect, read)`` tuple sets them separately;
|
|
66
|
+
``None`` waits indefinitely. Defaults to 30s.
|
|
67
|
+
upload_timeout: Timeout for the file-upload endpoint (``transcribe``),
|
|
68
|
+
which can take minutes for large files. Defaults to
|
|
69
|
+
``(10s connect, 600s read)``.
|
|
70
|
+
session: An optional pre-configured :class:`requests.Session`.
|
|
71
|
+
|
|
72
|
+
Example::
|
|
73
|
+
|
|
74
|
+
from gaston import GastonClient
|
|
75
|
+
|
|
76
|
+
client = GastonClient(token="gapi-...")
|
|
77
|
+
me = client.me()
|
|
78
|
+
print(me.email, me.usage.files_left)
|
|
79
|
+
|
|
80
|
+
The client can also be used as a context manager to ensure the underlying
|
|
81
|
+
HTTP session is closed::
|
|
82
|
+
|
|
83
|
+
with GastonClient(token="gapi-...") as client:
|
|
84
|
+
client.transcribe("interview.mp4", lang="en")
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
token: str | None = None,
|
|
90
|
+
timeout: TimeoutType = DEFAULT_TIMEOUT,
|
|
91
|
+
upload_timeout: TimeoutType = DEFAULT_UPLOAD_TIMEOUT,
|
|
92
|
+
session: requests.Session | None = None,
|
|
93
|
+
) -> None:
|
|
94
|
+
token = token or os.getenv("GASTON_API_TOKEN")
|
|
95
|
+
if not token:
|
|
96
|
+
raise GastonError(
|
|
97
|
+
"An API token is required (pass token=... or set GASTON_API_TOKEN)."
|
|
98
|
+
)
|
|
99
|
+
self.token = token
|
|
100
|
+
self.base_url = (os.getenv(_BASE_URL_OVERRIDE_ENV) or BASE_URL).rstrip("/")
|
|
101
|
+
self.timeout = timeout
|
|
102
|
+
self.upload_timeout = upload_timeout
|
|
103
|
+
self._session = session or requests.Session()
|
|
104
|
+
self._session.headers.update({"token": token})
|
|
105
|
+
|
|
106
|
+
# -- context manager -------------------------------------------------
|
|
107
|
+
|
|
108
|
+
def __enter__(self) -> "GastonClient":
|
|
109
|
+
return self
|
|
110
|
+
|
|
111
|
+
def __exit__(self, *_exc: object) -> None:
|
|
112
|
+
self.close()
|
|
113
|
+
|
|
114
|
+
def close(self) -> None:
|
|
115
|
+
"""Close the underlying HTTP session."""
|
|
116
|
+
self._session.close()
|
|
117
|
+
|
|
118
|
+
# -- low level -------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
def _request(
|
|
121
|
+
self,
|
|
122
|
+
method: str,
|
|
123
|
+
path: str,
|
|
124
|
+
*,
|
|
125
|
+
params: Mapping[str, Any] | None = None,
|
|
126
|
+
files: Mapping[str, Any] | None = None,
|
|
127
|
+
timeout: TimeoutType | _Unset = _UNSET,
|
|
128
|
+
) -> Any:
|
|
129
|
+
url = f"{self.base_url}{path}"
|
|
130
|
+
# Drop params that are None so we don't send "dir_id=None" literally.
|
|
131
|
+
clean_params = None
|
|
132
|
+
if params is not None:
|
|
133
|
+
clean_params = {k: v for k, v in params.items() if v is not None}
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
resp = self._session.request(
|
|
137
|
+
method,
|
|
138
|
+
url,
|
|
139
|
+
params=clean_params,
|
|
140
|
+
files=files,
|
|
141
|
+
timeout=self.timeout if isinstance(timeout, _Unset) else timeout,
|
|
142
|
+
)
|
|
143
|
+
except requests.RequestException as exc:
|
|
144
|
+
raise GastonError(f"Request to {url} failed: {exc}") from exc
|
|
145
|
+
|
|
146
|
+
return self._handle_response(resp)
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def _handle_response(resp: requests.Response) -> Any:
|
|
150
|
+
try:
|
|
151
|
+
body = resp.json()
|
|
152
|
+
except ValueError:
|
|
153
|
+
if resp.ok:
|
|
154
|
+
return resp.text
|
|
155
|
+
raise GastonAPIError(
|
|
156
|
+
f"Non-JSON response from server: {resp.text[:200]}",
|
|
157
|
+
status_code=resp.status_code,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# The API signals failures both via HTTP status codes and via an
|
|
161
|
+
# ``error`` key in an otherwise 200 response. Handle both.
|
|
162
|
+
error_message = body.get("error") if isinstance(body, dict) else None
|
|
163
|
+
|
|
164
|
+
if not resp.ok or error_message is not None:
|
|
165
|
+
exc_cls = _STATUS_EXCEPTIONS.get(resp.status_code, GastonAPIError)
|
|
166
|
+
details = None
|
|
167
|
+
if isinstance(body, dict):
|
|
168
|
+
details = (
|
|
169
|
+
body.get("details")
|
|
170
|
+
or body.get("supported_languages")
|
|
171
|
+
or body.get("supportedLanguages")
|
|
172
|
+
)
|
|
173
|
+
raise exc_cls(
|
|
174
|
+
message=error_message or f"Request failed with status {resp.status_code}",
|
|
175
|
+
status_code=resp.status_code,
|
|
176
|
+
details=details,
|
|
177
|
+
payload=body,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
return body
|
|
181
|
+
|
|
182
|
+
# -- user ------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
def me(self) -> User:
|
|
185
|
+
"""Return the authenticated user and remaining usage."""
|
|
186
|
+
return User.from_dict(self._request("GET", "/user/me"))
|
|
187
|
+
|
|
188
|
+
# -- media -----------------------------------------------------------
|
|
189
|
+
|
|
190
|
+
def list_media(self, page: int = 1, dir_id: int | None = None) -> MediaList:
|
|
191
|
+
"""List media in the library, paginated.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
page: 1-based page number.
|
|
195
|
+
dir_id: Restrict to a directory (``None`` for the root listing).
|
|
196
|
+
"""
|
|
197
|
+
return MediaList.from_dict(
|
|
198
|
+
self._request("GET", "/media/list", params={"page": page, "dir_id": dir_id})
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
def get_media(self, media_id: str, lang: str | None = None) -> Media:
|
|
202
|
+
"""Fetch a single media item including its sentences.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
media_id: The public media id (``uid``).
|
|
206
|
+
lang: Return sentences in this language (defaults to the original).
|
|
207
|
+
"""
|
|
208
|
+
return Media.from_dict(
|
|
209
|
+
self._request("GET", "/media", params={"media_id": media_id, "lang": lang})
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
def move_media(self, media_id: str, dir_id: int | None = None) -> dict[str, Any]:
|
|
213
|
+
"""Move a media item into a directory (``dir_id=None`` for root)."""
|
|
214
|
+
return self._request("PATCH", "/media", params={"media_id": media_id, "dir_id": dir_id})
|
|
215
|
+
|
|
216
|
+
def transcribe(
|
|
217
|
+
self,
|
|
218
|
+
file: str | os.PathLike[str] | BinaryIO | IO[bytes],
|
|
219
|
+
lang: str | None = None,
|
|
220
|
+
dir_id: int | None = None,
|
|
221
|
+
title: str | None = None,
|
|
222
|
+
timeout: TimeoutType | _Unset = _UNSET,
|
|
223
|
+
) -> TranscribeResult:
|
|
224
|
+
"""Upload a media file and queue it for transcription.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
file: Path to a file, or an already-open binary file object.
|
|
228
|
+
lang: Source language hint (see :data:`SUPPORTED_LANGUAGES`). If
|
|
229
|
+
omitted the language is auto-detected.
|
|
230
|
+
dir_id: Directory to place the media into.
|
|
231
|
+
title: Display title (defaults to the file name).
|
|
232
|
+
timeout: Override the client's ``upload_timeout`` for this call.
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
A :class:`TranscribeResult` with the new media id and state.
|
|
236
|
+
"""
|
|
237
|
+
if lang is not None and lang not in SUPPORTED_LANGUAGES:
|
|
238
|
+
raise BadRequestError(
|
|
239
|
+
f"Language '{lang}' is not supported.", details=list(SUPPORTED_LANGUAGES)
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
params = {"lang": lang, "dir_id": dir_id, "title": title}
|
|
243
|
+
|
|
244
|
+
should_close = False
|
|
245
|
+
if isinstance(file, (str, os.PathLike)):
|
|
246
|
+
fh: IO[bytes] = open(file, "rb")
|
|
247
|
+
should_close = True
|
|
248
|
+
else:
|
|
249
|
+
fh = file
|
|
250
|
+
effective_timeout = self.upload_timeout if isinstance(timeout, _Unset) else timeout
|
|
251
|
+
try:
|
|
252
|
+
files = {"file": fh}
|
|
253
|
+
data = self._request(
|
|
254
|
+
"POST", "/media/transcribe", params=params, files=files, timeout=effective_timeout
|
|
255
|
+
)
|
|
256
|
+
finally:
|
|
257
|
+
if should_close:
|
|
258
|
+
fh.close()
|
|
259
|
+
return TranscribeResult.from_dict(data)
|
|
260
|
+
|
|
261
|
+
def transcribe_url(
|
|
262
|
+
self,
|
|
263
|
+
url: str,
|
|
264
|
+
lang: str | None = None,
|
|
265
|
+
dir_id: int | None = None,
|
|
266
|
+
) -> TranscribeResult:
|
|
267
|
+
"""Queue transcription of a remote media URL (YouTube or web).
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
url: The media URL to download and transcribe.
|
|
271
|
+
lang: Source language hint (see :data:`SUPPORTED_LANGUAGES`).
|
|
272
|
+
dir_id: Directory to place the media into.
|
|
273
|
+
"""
|
|
274
|
+
if lang is not None and lang not in SUPPORTED_LANGUAGES:
|
|
275
|
+
raise BadRequestError(
|
|
276
|
+
f"Language '{lang}' is not supported.", details=list(SUPPORTED_LANGUAGES)
|
|
277
|
+
)
|
|
278
|
+
data = self._request(
|
|
279
|
+
"POST", "/media/transcribe-url", params={"url": url, "lang": lang, "dir_id": dir_id}
|
|
280
|
+
)
|
|
281
|
+
return TranscribeResult.from_dict(data)
|
|
282
|
+
|
|
283
|
+
def translate(self, media_id: str, target_lang: str) -> TranslateResult:
|
|
284
|
+
"""Queue a translation of a transcribed media into ``target_lang``.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
media_id: The public media id.
|
|
288
|
+
target_lang: Target language short code (see
|
|
289
|
+
:data:`TRANSLATION_LANGUAGES`).
|
|
290
|
+
"""
|
|
291
|
+
target_lang = target_lang.lower().strip()
|
|
292
|
+
if target_lang not in TRANSLATION_LANGUAGES:
|
|
293
|
+
raise BadRequestError(
|
|
294
|
+
f"Language '{target_lang}' is not a supported translation target.",
|
|
295
|
+
details=list(TRANSLATION_LANGUAGES),
|
|
296
|
+
)
|
|
297
|
+
data = self._request(
|
|
298
|
+
"PATCH", "/media/translate", params={"media_id": media_id, "target_lang": target_lang}
|
|
299
|
+
)
|
|
300
|
+
return TranslateResult.from_dict(data)
|
|
301
|
+
|
|
302
|
+
def diarize(
|
|
303
|
+
self,
|
|
304
|
+
media_id: str,
|
|
305
|
+
lang: str,
|
|
306
|
+
speakers: int | None = None,
|
|
307
|
+
) -> TranscribeResult:
|
|
308
|
+
"""Queue speaker diarization for a (translated) media in ``lang``.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
media_id: The public media id.
|
|
312
|
+
lang: Language of the transcript to diarize (must be fully
|
|
313
|
+
translated first).
|
|
314
|
+
speakers: Optional expected number of speakers.
|
|
315
|
+
"""
|
|
316
|
+
data = self._request(
|
|
317
|
+
"PATCH",
|
|
318
|
+
"/media/diarize",
|
|
319
|
+
params={"media_id": media_id, "lang": lang, "speakers": speakers},
|
|
320
|
+
)
|
|
321
|
+
return TranscribeResult.from_dict(data)
|
|
322
|
+
|
|
323
|
+
# -- directories -----------------------------------------------------
|
|
324
|
+
|
|
325
|
+
def directory_tree(self) -> dict[str, Any]:
|
|
326
|
+
"""Return the full nested directory tree for the user."""
|
|
327
|
+
return self._request("GET", "/directory/tree")
|
|
328
|
+
|
|
329
|
+
def create_directory(self, title: str, dir_id: int | None = None) -> Directory:
|
|
330
|
+
"""Create a directory, optionally nested under ``dir_id``."""
|
|
331
|
+
return Directory.from_dict(
|
|
332
|
+
self._request("POST", "/directory", params={"title": title, "dir_id": dir_id})
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
def delete_directory(self, dir_id: int) -> bool:
|
|
336
|
+
"""Delete a directory. Returns ``True`` on success."""
|
|
337
|
+
data = self._request("DELETE", "/directory", params={"dir_id": dir_id})
|
|
338
|
+
return bool(data.get("result")) if isinstance(data, dict) else bool(data)
|
|
339
|
+
|
|
340
|
+
def update_directory(
|
|
341
|
+
self,
|
|
342
|
+
dir_id: int,
|
|
343
|
+
title: str,
|
|
344
|
+
parent_id: int | None = None,
|
|
345
|
+
) -> Directory:
|
|
346
|
+
"""Rename a directory and/or move it under ``parent_id``."""
|
|
347
|
+
return Directory.from_dict(
|
|
348
|
+
self._request(
|
|
349
|
+
"PATCH",
|
|
350
|
+
"/directory",
|
|
351
|
+
params={"dir_id": dir_id, "title": title, "parent_id": parent_id},
|
|
352
|
+
)
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
# -- search ----------------------------------------------------------
|
|
356
|
+
|
|
357
|
+
def search(
|
|
358
|
+
self,
|
|
359
|
+
query: str,
|
|
360
|
+
from_: int = 0,
|
|
361
|
+
max_: int = 50,
|
|
362
|
+
dir_ids: Iterable[str | int] | None = None,
|
|
363
|
+
lang: str | None = None,
|
|
364
|
+
) -> SearchResults:
|
|
365
|
+
"""Search for sentences across all transcribed media.
|
|
366
|
+
|
|
367
|
+
The query supports a subset of the Lucene ``query_string`` syntax:
|
|
368
|
+
|
|
369
|
+
* Boolean operators ``AND`` / ``OR`` / ``NOT`` (e.g. ``cats AND dogs``).
|
|
370
|
+
* Grouping with parentheses (e.g. ``(cats OR dogs) AND vet``).
|
|
371
|
+
* Quoted phrases for exact matches (e.g. ``"climate change"``).
|
|
372
|
+
* Trailing wildcards (e.g. ``transcri*``). Leading wildcards
|
|
373
|
+
(``*tion``) are not allowed and are stripped.
|
|
374
|
+
|
|
375
|
+
Field selectors, fuzzy/proximity (``~``), boosts (``^``) and ranges are
|
|
376
|
+
stripped server-side.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
query: Full text query (must be at least 3 characters).
|
|
380
|
+
from_: Offset of the first result (for pagination).
|
|
381
|
+
max_: Maximum number of results to return.
|
|
382
|
+
dir_ids: Restrict the search to one or more directory ids.
|
|
383
|
+
lang: Restrict the search to a single language.
|
|
384
|
+
"""
|
|
385
|
+
if len(query) < 3:
|
|
386
|
+
raise BadRequestError("Query must be at least 3 characters.")
|
|
387
|
+
params: dict[str, Any] = {
|
|
388
|
+
"query": query,
|
|
389
|
+
"_from": from_,
|
|
390
|
+
"_max": max_,
|
|
391
|
+
"lang": lang,
|
|
392
|
+
}
|
|
393
|
+
if dir_ids is not None:
|
|
394
|
+
params["dir_ids"] = [str(d) for d in dir_ids]
|
|
395
|
+
return SearchResults.from_dict(self._request("GET", "/sentence/search", params=params))
|
gaston/constants.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Constants mirrored from the Gaston API.
|
|
2
|
+
|
|
3
|
+
These are kept in sync with the server so the client can validate input
|
|
4
|
+
locally before issuing a request.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
#: Languages accepted by the transcription endpoints (Whisper language codes).
|
|
10
|
+
SUPPORTED_LANGUAGES: tuple[str, ...] = (
|
|
11
|
+
"af", "am", "ar", "as", "az", "ba", "be", "bg", "bn", "bo", "br", "bs", "ca", "cs", "cy", "da", "de",
|
|
12
|
+
"el", "en", "es", "et", "eu", "fa", "fi", "fo", "fr", "gl", "gu", "ha", "haw", "he", "hi", "hr", "ht",
|
|
13
|
+
"hu", "hy", "id", "is", "it", "ja", "jw", "ka", "kk", "km", "kn", "ko", "la", "lb", "ln", "lo", "lt",
|
|
14
|
+
"lv", "mg", "mi", "mk", "ml", "mn", "mr", "ms", "mt", "my", "ne", "nl", "nn", "no", "oc", "pa", "pl",
|
|
15
|
+
"ps", "pt", "ro", "ru", "sa", "sd", "si", "sk", "sl", "sn", "so", "sq", "sr", "su", "sv", "sw", "ta",
|
|
16
|
+
"te", "tg", "th", "tk", "tl", "tr", "tt", "uk", "ur", "uz", "vi", "yi", "yo", "zh", "yue",
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
#: Languages the translation endpoint can translate into (short code -> FLORES code).
|
|
20
|
+
TRANSLATION_OPTIONS: dict[str, str] = {
|
|
21
|
+
"en": "eng_Latn", "de": "deu_Latn", "es": "spa_Latn", "pl": "pol_Latn", "hu": "hun_Latn",
|
|
22
|
+
"cs": "ces_Latn", "sk": "slk_Latn", "uk": "ukr_Cyrl", "bg": "bul_Cyrl", "hr": "hrv_Latn",
|
|
23
|
+
"da": "dan_Latn", "nl": "nld_Latn", "et": "est_Latn", "fi": "fin_Latn", "fr": "fra_Latn",
|
|
24
|
+
"el": "ell_Grek", "it": "ita_Latn", "lv": "lav_Latn", "lt": "lit_Latn", "mt": "mlt_Latn",
|
|
25
|
+
"pt": "por_Latn", "ro": "ron_Latn", "sl": "slv_Latn", "sv": "swe_Latn", "zh": "zho_Hans",
|
|
26
|
+
"ar": "arb_Arab", "hi": "hin_Deva", "ja": "jpn_Jpan", "id": "ind_Latn", "is": "isl_Latn",
|
|
27
|
+
"he": "heb_Hebr", "kk": "kaz_Cyrl", "ko": "kor_Hang", "lb": "ltz_Latn", "mk": "mkd_Cyrl",
|
|
28
|
+
"tr": "tur_Latn", "vi": "vie_Latn", "bn": "ben_Beng", "be": "bel_Cyrl", "ka": "kat_Geor",
|
|
29
|
+
"fa": "pes_Arab", "ur": "urd_Arab", "te": "tel_Telu", "ru": "rus_Cyrl",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
#: Languages available as translation targets.
|
|
33
|
+
TRANSLATION_LANGUAGES: tuple[str, ...] = tuple(TRANSLATION_OPTIONS.keys())
|
|
34
|
+
|
|
35
|
+
#: Possible values of ``Media.state``.
|
|
36
|
+
MEDIA_STATE_PENDING = "pending"
|
|
37
|
+
MEDIA_STATE_UPLOADED = "uploaded"
|
|
38
|
+
MEDIA_STATE_TRANSCRIBED = "transcribed"
|
|
39
|
+
|
|
40
|
+
#: Possible values of ``Media.origin``.
|
|
41
|
+
MEDIA_ORIGIN_UPLOADED = "up"
|
|
42
|
+
MEDIA_ORIGIN_YOUTUBE = "yt"
|
|
43
|
+
MEDIA_ORIGIN_WEB = "web"
|
gaston/exceptions.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Exceptions raised by the Gaston API client."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class GastonError(Exception):
|
|
9
|
+
"""Base class for all errors raised by the client."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class GastonAPIError(GastonError):
|
|
13
|
+
"""Raised when the API returns an error response.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
message: Human readable error message returned by the API.
|
|
17
|
+
status_code: HTTP status code of the response (if any).
|
|
18
|
+
details: Optional extra payload returned alongside the error.
|
|
19
|
+
payload: The full decoded response body.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
message: str,
|
|
25
|
+
status_code: int | None = None,
|
|
26
|
+
details: Any | None = None,
|
|
27
|
+
payload: Any | None = None,
|
|
28
|
+
) -> None:
|
|
29
|
+
self.message = message
|
|
30
|
+
self.status_code = status_code
|
|
31
|
+
self.details = details
|
|
32
|
+
self.payload = payload
|
|
33
|
+
prefix = f"[{status_code}] " if status_code is not None else ""
|
|
34
|
+
super().__init__(f"{prefix}{message}")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class AuthenticationError(GastonAPIError):
|
|
38
|
+
"""Raised when the token is invalid or the user is disabled (HTTP 403)."""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NotFoundError(GastonAPIError):
|
|
42
|
+
"""Raised when a requested resource does not exist (HTTP 404)."""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class RateLimitError(GastonAPIError):
|
|
46
|
+
"""Raised when the monthly file/API limit is exhausted (HTTP 429)."""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class BadRequestError(GastonAPIError):
|
|
50
|
+
"""Raised for invalid requests (HTTP 400)."""
|
gaston/models.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""Typed models for Gaston API responses.
|
|
2
|
+
|
|
3
|
+
Every model keeps the original decoded payload in ``raw`` so that fields not
|
|
4
|
+
explicitly mapped here are still accessible.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class Usage:
|
|
15
|
+
"""Account usage information."""
|
|
16
|
+
|
|
17
|
+
files_left: int
|
|
18
|
+
raw: dict[str, Any] = field(default_factory=dict, repr=False)
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def from_dict(cls, data: dict[str, Any]) -> "Usage":
|
|
22
|
+
return cls(files_left=data.get("filesLeft", 0), raw=data)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class User:
|
|
27
|
+
"""The authenticated user (``GET /user/me``)."""
|
|
28
|
+
|
|
29
|
+
id: str
|
|
30
|
+
email: str
|
|
31
|
+
enabled: bool
|
|
32
|
+
usage: Usage
|
|
33
|
+
raw: dict[str, Any] = field(default_factory=dict, repr=False)
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def from_dict(cls, data: dict[str, Any]) -> "User":
|
|
37
|
+
return cls(
|
|
38
|
+
id=data.get("id"),
|
|
39
|
+
email=data.get("email"),
|
|
40
|
+
enabled=data.get("enabled", False),
|
|
41
|
+
usage=Usage.from_dict(data.get("usage", {})),
|
|
42
|
+
raw=data,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class Sentence:
|
|
48
|
+
"""A single transcribed (or translated) sentence."""
|
|
49
|
+
|
|
50
|
+
id: int | None
|
|
51
|
+
speaker: Any | None
|
|
52
|
+
raw: dict[str, Any] = field(default_factory=dict, repr=False)
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def from_dict(cls, data: dict[str, Any]) -> "Sentence":
|
|
56
|
+
return cls(id=data.get("id"), speaker=data.get("speaker"), raw=data)
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def text(self) -> str | None:
|
|
60
|
+
return self.raw.get("text") or self.raw.get("sentence")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class Media:
|
|
65
|
+
"""Full media detail (``GET /media``)."""
|
|
66
|
+
|
|
67
|
+
id: str
|
|
68
|
+
title: str | None
|
|
69
|
+
state: str | None
|
|
70
|
+
origin: str | None
|
|
71
|
+
origin_url: str | None
|
|
72
|
+
file: str | None
|
|
73
|
+
error: str | None
|
|
74
|
+
thumbnail: str | None
|
|
75
|
+
duration: int | None
|
|
76
|
+
published_at: Any | None
|
|
77
|
+
added_at: Any | None
|
|
78
|
+
transcription_progress: int | None
|
|
79
|
+
download_progress: int | None
|
|
80
|
+
language: str | None
|
|
81
|
+
available_languages: dict[str, Any]
|
|
82
|
+
sentences: list[Sentence]
|
|
83
|
+
diarized_sentences: dict[str, Any]
|
|
84
|
+
raw: dict[str, Any] = field(default_factory=dict, repr=False)
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def from_dict(cls, data: dict[str, Any]) -> "Media":
|
|
88
|
+
return cls(
|
|
89
|
+
id=data.get("id"),
|
|
90
|
+
title=data.get("title"),
|
|
91
|
+
state=data.get("state"),
|
|
92
|
+
origin=data.get("origin"),
|
|
93
|
+
origin_url=data.get("originUrl"),
|
|
94
|
+
file=data.get("file"),
|
|
95
|
+
error=data.get("error"),
|
|
96
|
+
thumbnail=data.get("thumbnail"),
|
|
97
|
+
duration=data.get("duration"),
|
|
98
|
+
published_at=data.get("published_at"),
|
|
99
|
+
added_at=data.get("added_at"),
|
|
100
|
+
transcription_progress=data.get("transcription_progress"),
|
|
101
|
+
download_progress=data.get("download_progress"),
|
|
102
|
+
language=data.get("language"),
|
|
103
|
+
available_languages=data.get("available_languages") or {},
|
|
104
|
+
sentences=[Sentence.from_dict(s) for s in data.get("sentences") or []],
|
|
105
|
+
diarized_sentences=data.get("diarized_sentences") or {},
|
|
106
|
+
raw=data,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dataclass
|
|
111
|
+
class MediaList:
|
|
112
|
+
"""A page of media items (``GET /media/list``)."""
|
|
113
|
+
|
|
114
|
+
media: list[dict[str, Any]]
|
|
115
|
+
total: int
|
|
116
|
+
pages: int
|
|
117
|
+
raw: dict[str, Any] = field(default_factory=dict, repr=False)
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def from_dict(cls, data: dict[str, Any]) -> "MediaList":
|
|
121
|
+
return cls(
|
|
122
|
+
media=data.get("media") or [],
|
|
123
|
+
total=data.get("total", 0),
|
|
124
|
+
pages=data.get("pages", 0),
|
|
125
|
+
raw=data,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
def __iter__(self):
|
|
129
|
+
return iter(self.media)
|
|
130
|
+
|
|
131
|
+
def __len__(self) -> int:
|
|
132
|
+
return len(self.media)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@dataclass
|
|
136
|
+
class TranscribeResult:
|
|
137
|
+
"""Result of a transcription request (``id`` + ``state``)."""
|
|
138
|
+
|
|
139
|
+
id: str
|
|
140
|
+
state: str | None
|
|
141
|
+
raw: dict[str, Any] = field(default_factory=dict, repr=False)
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def from_dict(cls, data: dict[str, Any]) -> "TranscribeResult":
|
|
145
|
+
return cls(id=data.get("id"), state=data.get("state"), raw=data)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@dataclass
|
|
149
|
+
class TranslateResult:
|
|
150
|
+
"""Result of a translation request."""
|
|
151
|
+
|
|
152
|
+
id: str
|
|
153
|
+
available_languages: dict[str, Any]
|
|
154
|
+
raw: dict[str, Any] = field(default_factory=dict, repr=False)
|
|
155
|
+
|
|
156
|
+
@classmethod
|
|
157
|
+
def from_dict(cls, data: dict[str, Any]) -> "TranslateResult":
|
|
158
|
+
return cls(
|
|
159
|
+
id=data.get("id"),
|
|
160
|
+
available_languages=data.get("available_languages") or {},
|
|
161
|
+
raw=data,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@dataclass
|
|
166
|
+
class Directory:
|
|
167
|
+
"""A directory in the user's library."""
|
|
168
|
+
|
|
169
|
+
id: int | None
|
|
170
|
+
title: str | None
|
|
171
|
+
raw: dict[str, Any] = field(default_factory=dict, repr=False)
|
|
172
|
+
|
|
173
|
+
@classmethod
|
|
174
|
+
def from_dict(cls, data: dict[str, Any]) -> "Directory":
|
|
175
|
+
return cls(id=data.get("id"), title=data.get("title"), raw=data)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@dataclass
|
|
179
|
+
class SearchResults:
|
|
180
|
+
"""Results of a sentence search (``GET /sentence/search``).
|
|
181
|
+
|
|
182
|
+
Each entry in :attr:`results` is a dict with two keys: ``_sentence`` (the
|
|
183
|
+
matched sentence and its ``media`` metadata) and ``_highlight`` (the matched
|
|
184
|
+
fragments with ``<hlt>...</hlt>`` markers around the hit terms).
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
results: list[dict[str, Any]]
|
|
188
|
+
total: int | None
|
|
189
|
+
raw: dict[str, Any] = field(default_factory=dict, repr=False)
|
|
190
|
+
|
|
191
|
+
@classmethod
|
|
192
|
+
def from_dict(cls, data: dict[str, Any]) -> "SearchResults":
|
|
193
|
+
results: list[dict[str, Any]] = []
|
|
194
|
+
total: int | None = None
|
|
195
|
+
if isinstance(data, dict):
|
|
196
|
+
results = data.get("results") or []
|
|
197
|
+
# ``total`` is an Elasticsearch total object: {"value": N, ...}.
|
|
198
|
+
total_obj = data.get("total")
|
|
199
|
+
if isinstance(total_obj, dict):
|
|
200
|
+
total = total_obj.get("value")
|
|
201
|
+
elif isinstance(total_obj, int):
|
|
202
|
+
total = total_obj
|
|
203
|
+
return cls(results=results, total=total, raw=data)
|
|
204
|
+
|
|
205
|
+
def __iter__(self):
|
|
206
|
+
return iter(self.results)
|
|
207
|
+
|
|
208
|
+
def __len__(self) -> int:
|
|
209
|
+
return len(self.results)
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gaston
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Python client for the Gaston API (transcription, translation and sentence search).
|
|
5
|
+
Author-email: "Streams s.r.o." <contact@streams.guru>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://gaston.live
|
|
8
|
+
Project-URL: Documentation, https://www.gaston.live/en/api
|
|
9
|
+
Keywords: gaston,transcription,translation,speech-to-text,api-client
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: requests>=2.28
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
27
|
+
Requires-Dist: responses>=0.23; extra == "dev"
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# Gaston API Client
|
|
31
|
+
|
|
32
|
+
A small, typed Python client for the **Gaston API**: transcription, translation
|
|
33
|
+
and full-text search of sentences within transcribed recordings.
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install gaston
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Requires Python 3.10+.
|
|
42
|
+
|
|
43
|
+
For local development from a checkout instead:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install -e .
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Quick start
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from gaston import GastonClient
|
|
53
|
+
|
|
54
|
+
client = GastonClient(token="gapi-...")
|
|
55
|
+
|
|
56
|
+
# Who am I + remaining quota
|
|
57
|
+
me = client.me()
|
|
58
|
+
print(me.email, "files left:", me.usage.files_left)
|
|
59
|
+
|
|
60
|
+
# Transcribe a local file
|
|
61
|
+
result = client.transcribe("interview.mp4", lang="en", title="My interview")
|
|
62
|
+
print(result.id, result.state)
|
|
63
|
+
|
|
64
|
+
# Transcribe from a URL (YouTube or web)
|
|
65
|
+
client.transcribe_url("https://youtu.be/dQw4w9WgXcQ", lang="en")
|
|
66
|
+
|
|
67
|
+
# Translate an existing transcription
|
|
68
|
+
client.translate(result.id, target_lang="de")
|
|
69
|
+
|
|
70
|
+
# Speaker diarization (requires a completed translation in that language)
|
|
71
|
+
client.diarize(result.id, lang="de", speakers=2)
|
|
72
|
+
|
|
73
|
+
# Fetch a media item with its sentences
|
|
74
|
+
media = client.get_media(result.id, lang="en")
|
|
75
|
+
for sentence in media.sentences:
|
|
76
|
+
print(sentence.id, sentence.text, sentence.speaker)
|
|
77
|
+
|
|
78
|
+
# Full text search across the whole library
|
|
79
|
+
results = client.search("climate change", max_=20)
|
|
80
|
+
print("total matches:", results.total)
|
|
81
|
+
for hit in results:
|
|
82
|
+
print(hit["_sentence"]["body"], "->", hit["_highlight"]["body"])
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
See [Search](#search) for query syntax and filtering options.
|
|
86
|
+
|
|
87
|
+
### Configuration
|
|
88
|
+
|
|
89
|
+
Generate an API token in the Gaston app under
|
|
90
|
+
[Settings -> API](https://www.gaston.live/user/settings/api/en). Full endpoint
|
|
91
|
+
documentation is available at <https://www.gaston.live/en/api>.
|
|
92
|
+
|
|
93
|
+
The token can be supplied directly or via an environment variable:
|
|
94
|
+
|
|
95
|
+
| Argument | Environment variable | Default |
|
|
96
|
+
|----------|----------------------|------------|
|
|
97
|
+
| `token` | `GASTON_API_TOKEN` | (required) |
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
# Uses GASTON_API_TOKEN from the environment
|
|
101
|
+
with GastonClient() as client:
|
|
102
|
+
...
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Timeouts
|
|
106
|
+
|
|
107
|
+
Ordinary requests use a 30s timeout. The file upload in `transcribe` can take
|
|
108
|
+
minutes for large files, so it uses a separate, more generous `upload_timeout`
|
|
109
|
+
(default `(10s connect, 600s read)`).
|
|
110
|
+
|
|
111
|
+
A timeout may be a single float, a `(connect, read)` tuple, or `None` to wait
|
|
112
|
+
indefinitely.
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
# Customise the defaults for all calls
|
|
116
|
+
client = GastonClient(
|
|
117
|
+
token="gapi-...",
|
|
118
|
+
timeout=30,
|
|
119
|
+
upload_timeout=(10, 1800), # allow up to 30 min to upload large files
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Or override per call (e.g. no read timeout for a very large file)
|
|
123
|
+
client.transcribe("huge-recording.mp4", timeout=(10, None))
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Directories
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
folder = client.create_directory("Podcasts")
|
|
130
|
+
client.update_directory(folder.id, title="Podcast archive")
|
|
131
|
+
client.move_media(media_id="me...", dir_id=folder.id)
|
|
132
|
+
tree = client.directory_tree()
|
|
133
|
+
client.delete_directory(folder.id)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Search
|
|
137
|
+
|
|
138
|
+
`client.search(query, from_=0, max_=50, dir_ids=None, lang=None)` runs a
|
|
139
|
+
full-text search over every sentence in your transcribed media.
|
|
140
|
+
|
|
141
|
+
### Query syntax
|
|
142
|
+
|
|
143
|
+
The query supports a subset of the Lucene `query_string` syntax:
|
|
144
|
+
|
|
145
|
+
| Feature | Example | Notes |
|
|
146
|
+
|-------------------|--------------------------|------------------------------------------|
|
|
147
|
+
| Boolean `AND` | `cats AND dogs` | both terms must appear |
|
|
148
|
+
| Boolean `OR` | `cats OR dogs` | either term |
|
|
149
|
+
| Boolean `NOT` | `cats NOT dogs` | exclude a term |
|
|
150
|
+
| Grouping | `(cats OR dogs) AND vet` | combine operators with parentheses |
|
|
151
|
+
| Exact phrase | `"climate change"` | quoted terms match as a phrase |
|
|
152
|
+
| Trailing wildcard | `transcri*` | matches `transcribe`, `transcription`... |
|
|
153
|
+
|
|
154
|
+
Leading wildcards (`*tion`), field selectors, fuzzy (`~`), boosts (`^`) and
|
|
155
|
+
ranges are not supported and are stripped server-side. Queries must be at least
|
|
156
|
+
3 characters.
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
results = client.search('(invoice OR receipt) AND "due date" NOT draft')
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Filtering and pagination
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
# Search within a single directory
|
|
166
|
+
client.search("budget", dir_ids=[42])
|
|
167
|
+
|
|
168
|
+
# Search across several directories
|
|
169
|
+
client.search("budget", dir_ids=[42, 43, 7])
|
|
170
|
+
|
|
171
|
+
# Restrict to one language, and page through results
|
|
172
|
+
page2 = client.search("budget", from_=50, max_=50, lang="en")
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Reading results
|
|
176
|
+
|
|
177
|
+
`search()` returns a `SearchResults` object. Iterate it for hits, or read
|
|
178
|
+
`.total` for the overall match count. Each hit is a dict with:
|
|
179
|
+
|
|
180
|
+
- `_sentence` - the matched sentence plus its `media` metadata (id, title,
|
|
181
|
+
duration, directory, thumbnail, file, originUrl).
|
|
182
|
+
- `_highlight` - matched fragments with the hit terms wrapped in
|
|
183
|
+
`<hlt>...</hlt>` tags.
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
results = client.search("climate change", max_=20)
|
|
187
|
+
print("total matches:", results.total)
|
|
188
|
+
for hit in results:
|
|
189
|
+
sentence = hit["_sentence"]
|
|
190
|
+
print(sentence["media"]["title"], "|", hit["_highlight"]["body"])
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Error handling
|
|
194
|
+
|
|
195
|
+
All failures raise a subclass of `GastonError`:
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
from gaston import GastonClient, AuthenticationError, RateLimitError, NotFoundError
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
client.transcribe("clip.mp4")
|
|
202
|
+
except RateLimitError:
|
|
203
|
+
print("File limit reached")
|
|
204
|
+
except AuthenticationError:
|
|
205
|
+
print("Bad token / disabled account")
|
|
206
|
+
except NotFoundError as e:
|
|
207
|
+
print("Not found:", e.message)
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
| Exception | Trigger |
|
|
211
|
+
|-----------------------|------------------------------------------|
|
|
212
|
+
| `AuthenticationError` | HTTP 403, invalid token / disabled user |
|
|
213
|
+
| `BadRequestError` | HTTP 400, invalid parameters |
|
|
214
|
+
| `NotFoundError` | HTTP 404, resource not found |
|
|
215
|
+
| `RateLimitError` | HTTP 429, usage limit exceeded |
|
|
216
|
+
| `GastonAPIError` | any other API error |
|
|
217
|
+
|
|
218
|
+
Every exception carries `.status_code`, `.message`, `.details` and the raw
|
|
219
|
+
`.payload`.
|
|
220
|
+
|
|
221
|
+
## Supported languages
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
from gaston import SUPPORTED_LANGUAGES, TRANSLATION_LANGUAGES
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
`SUPPORTED_LANGUAGES` lists transcription source languages; `TRANSLATION_LANGUAGES`
|
|
228
|
+
lists the available translation targets.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
gaston/__init__.py,sha256=sgIkG-WIsaPydkKTXYpIOVUHyYSLOKnZ9Lo1BiCg4wc,1104
|
|
2
|
+
gaston/client.py,sha256=lglH9_98ZV39LJxRaA-IAw7XUpbSIxub800xb3cRmAE,14170
|
|
3
|
+
gaston/constants.py,sha256=6Mxtc3nMkLJJvqlFEwapGF8WRCO4wL2Yw8x1qQPKIHw,2287
|
|
4
|
+
gaston/exceptions.py,sha256=E7j26kBIheRTRzwmtyyeKzPsl6T4e4QzBk4tYisK0os,1436
|
|
5
|
+
gaston/models.py,sha256=qycz0Gil9XCN_ra7s5gY08dxiahVNlxB1Y8oAMyRvsQ,5994
|
|
6
|
+
gaston-0.2.0.dist-info/licenses/LICENSE,sha256=4kqoIqcVwtUQeMI4Yy-mS3s_u3UwaFsHagw6QeecH9Q,1070
|
|
7
|
+
gaston-0.2.0.dist-info/METADATA,sha256=hM5eyrI2HkLKdnjfbMXt-METNBDizsmn1C93KXhsCZg,7379
|
|
8
|
+
gaston-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
9
|
+
gaston-0.2.0.dist-info/top_level.txt,sha256=gccKrT4ad62T38FlJeHInqZlh5pueads-oHN79TMreA,7
|
|
10
|
+
gaston-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Streams s.r.o.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
gaston
|