gaston 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gaston-0.2.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Streams s.r.o.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
gaston-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,228 @@
1
+ Metadata-Version: 2.4
2
+ Name: gaston
3
+ Version: 0.2.0
4
+ Summary: Python client for the Gaston API (transcription, translation and sentence search).
5
+ Author-email: "Streams s.r.o." <contact@streams.guru>
6
+ License: MIT
7
+ Project-URL: Homepage, https://gaston.live
8
+ Project-URL: Documentation, https://www.gaston.live/en/api
9
+ Keywords: gaston,transcription,translation,speech-to-text,api-client
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: requests>=2.28
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7.0; extra == "dev"
27
+ Requires-Dist: responses>=0.23; extra == "dev"
28
+ Dynamic: license-file
29
+
30
+ # Gaston API Client
31
+
32
+ A small, typed Python client for the **Gaston API**: transcription, translation
33
+ and full-text search of sentences within transcribed recordings.
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install gaston
39
+ ```
40
+
41
+ Requires Python 3.10+.
42
+
43
+ For local development from a checkout instead:
44
+
45
+ ```bash
46
+ pip install -e .
47
+ ```
48
+
49
+ ## Quick start
50
+
51
+ ```python
52
+ from gaston import GastonClient
53
+
54
+ client = GastonClient(token="gapi-...")
55
+
56
+ # Who am I + remaining quota
57
+ me = client.me()
58
+ print(me.email, "files left:", me.usage.files_left)
59
+
60
+ # Transcribe a local file
61
+ result = client.transcribe("interview.mp4", lang="en", title="My interview")
62
+ print(result.id, result.state)
63
+
64
+ # Transcribe from a URL (YouTube or web)
65
+ client.transcribe_url("https://youtu.be/dQw4w9WgXcQ", lang="en")
66
+
67
+ # Translate an existing transcription
68
+ client.translate(result.id, target_lang="de")
69
+
70
+ # Speaker diarization (requires a completed translation in that language)
71
+ client.diarize(result.id, lang="de", speakers=2)
72
+
73
+ # Fetch a media item with its sentences
74
+ media = client.get_media(result.id, lang="en")
75
+ for sentence in media.sentences:
76
+ print(sentence.id, sentence.text, sentence.speaker)
77
+
78
+ # Full text search across the whole library
79
+ results = client.search("climate change", max_=20)
80
+ print("total matches:", results.total)
81
+ for hit in results:
82
+ print(hit["_sentence"]["body"], "->", hit["_highlight"]["body"])
83
+ ```
84
+
85
+ See [Search](#search) for query syntax and filtering options.
86
+
87
+ ### Configuration
88
+
89
+ Generate an API token in the Gaston app under
90
+ [Settings -> API](https://www.gaston.live/user/settings/api/en). Full endpoint
91
+ documentation is available at <https://www.gaston.live/en/api>.
92
+
93
+ The token can be supplied directly or via an environment variable:
94
+
95
+ | Argument | Environment variable | Default |
96
+ |----------|----------------------|------------|
97
+ | `token` | `GASTON_API_TOKEN` | (required) |
98
+
99
+ ```python
100
+ # Uses GASTON_API_TOKEN from the environment
101
+ with GastonClient() as client:
102
+ ...
103
+ ```
104
+
105
+ ### Timeouts
106
+
107
+ Ordinary requests use a 30s timeout. The file upload in `transcribe` can take
108
+ minutes for large files, so it uses a separate, more generous `upload_timeout`
109
+ (default `(10s connect, 600s read)`).
110
+
111
+ A timeout may be a single float, a `(connect, read)` tuple, or `None` to wait
112
+ indefinitely.
113
+
114
+ ```python
115
+ # Customise the defaults for all calls
116
+ client = GastonClient(
117
+ token="gapi-...",
118
+ timeout=30,
119
+ upload_timeout=(10, 1800), # allow up to 30 min to upload large files
120
+ )
121
+
122
+ # Or override per call (e.g. no read timeout for a very large file)
123
+ client.transcribe("huge-recording.mp4", timeout=(10, None))
124
+ ```
125
+
126
+ ## Directories
127
+
128
+ ```python
129
+ folder = client.create_directory("Podcasts")
130
+ client.update_directory(folder.id, title="Podcast archive")
131
+ client.move_media(media_id="me...", dir_id=folder.id)
132
+ tree = client.directory_tree()
133
+ client.delete_directory(folder.id)
134
+ ```
135
+
136
+ ## Search
137
+
138
+ `client.search(query, from_=0, max_=50, dir_ids=None, lang=None)` runs a
139
+ full-text search over every sentence in your transcribed media.
140
+
141
+ ### Query syntax
142
+
143
+ The query supports a subset of the Lucene `query_string` syntax:
144
+
145
+ | Feature | Example | Notes |
146
+ |-------------------|--------------------------|------------------------------------------|
147
+ | Boolean `AND` | `cats AND dogs` | both terms must appear |
148
+ | Boolean `OR` | `cats OR dogs` | either term |
149
+ | Boolean `NOT` | `cats NOT dogs` | exclude a term |
150
+ | Grouping | `(cats OR dogs) AND vet` | combine operators with parentheses |
151
+ | Exact phrase | `"climate change"` | quoted terms match as a phrase |
152
+ | Trailing wildcard | `transcri*` | matches `transcribe`, `transcription`... |
153
+
154
+ Leading wildcards (`*tion`), field selectors, fuzzy (`~`), boosts (`^`) and
155
+ ranges are not supported and are stripped server-side. Queries must be at least
156
+ 3 characters.
157
+
158
+ ```python
159
+ results = client.search('(invoice OR receipt) AND "due date" NOT draft')
160
+ ```
161
+
162
+ ### Filtering and pagination
163
+
164
+ ```python
165
+ # Search within a single directory
166
+ client.search("budget", dir_ids=[42])
167
+
168
+ # Search across several directories
169
+ client.search("budget", dir_ids=[42, 43, 7])
170
+
171
+ # Restrict to one language, and page through results
172
+ page2 = client.search("budget", from_=50, max_=50, lang="en")
173
+ ```
174
+
175
+ ### Reading results
176
+
177
+ `search()` returns a `SearchResults` object. Iterate it for hits, or read
178
+ `.total` for the overall match count. Each hit is a dict with:
179
+
180
+ - `_sentence` - the matched sentence plus its `media` metadata (id, title,
181
+ duration, directory, thumbnail, file, originUrl).
182
+ - `_highlight` - matched fragments with the hit terms wrapped in
183
+ `<hlt>...</hlt>` tags.
184
+
185
+ ```python
186
+ results = client.search("climate change", max_=20)
187
+ print("total matches:", results.total)
188
+ for hit in results:
189
+ sentence = hit["_sentence"]
190
+ print(sentence["media"]["title"], "|", hit["_highlight"]["body"])
191
+ ```
192
+
193
+ ## Error handling
194
+
195
+ All failures raise a subclass of `GastonError`:
196
+
197
+ ```python
198
+ from gaston import GastonClient, AuthenticationError, RateLimitError, NotFoundError
199
+
200
+ try:
201
+ client.transcribe("clip.mp4")
202
+ except RateLimitError:
203
+ print("File limit reached")
204
+ except AuthenticationError:
205
+ print("Bad token / disabled account")
206
+ except NotFoundError as e:
207
+ print("Not found:", e.message)
208
+ ```
209
+
210
+ | Exception | Trigger |
211
+ |-----------------------|------------------------------------------|
212
+ | `AuthenticationError` | HTTP 403, invalid token / disabled user |
213
+ | `BadRequestError` | HTTP 400, invalid parameters |
214
+ | `NotFoundError` | HTTP 404, resource not found |
215
+ | `RateLimitError` | HTTP 429, usage limit exceeded |
216
+ | `GastonAPIError` | any other API error |
217
+
218
+ Every exception carries `.status_code`, `.message`, `.details` and the raw
219
+ `.payload`.
220
+
221
+ ## Supported languages
222
+
223
+ ```python
224
+ from gaston import SUPPORTED_LANGUAGES, TRANSLATION_LANGUAGES
225
+ ```
226
+
227
+ `SUPPORTED_LANGUAGES` lists transcription source languages; `TRANSLATION_LANGUAGES`
228
+ lists the available translation targets.
gaston-0.2.0/README.md ADDED
@@ -0,0 +1,199 @@
1
+ # Gaston API Client
2
+
3
+ A small, typed Python client for the **Gaston API**: transcription, translation
4
+ and full-text search of sentences within transcribed recordings.
5
+
6
+ ## Installation
7
+
8
+ ```bash
9
+ pip install gaston
10
+ ```
11
+
12
+ Requires Python 3.10+.
13
+
14
+ For local development from a checkout instead:
15
+
16
+ ```bash
17
+ pip install -e .
18
+ ```
19
+
20
+ ## Quick start
21
+
22
+ ```python
23
+ from gaston import GastonClient
24
+
25
+ client = GastonClient(token="gapi-...")
26
+
27
+ # Who am I + remaining quota
28
+ me = client.me()
29
+ print(me.email, "files left:", me.usage.files_left)
30
+
31
+ # Transcribe a local file
32
+ result = client.transcribe("interview.mp4", lang="en", title="My interview")
33
+ print(result.id, result.state)
34
+
35
+ # Transcribe from a URL (YouTube or web)
36
+ client.transcribe_url("https://youtu.be/dQw4w9WgXcQ", lang="en")
37
+
38
+ # Translate an existing transcription
39
+ client.translate(result.id, target_lang="de")
40
+
41
+ # Speaker diarization (requires a completed translation in that language)
42
+ client.diarize(result.id, lang="de", speakers=2)
43
+
44
+ # Fetch a media item with its sentences
45
+ media = client.get_media(result.id, lang="en")
46
+ for sentence in media.sentences:
47
+ print(sentence.id, sentence.text, sentence.speaker)
48
+
49
+ # Full text search across the whole library
50
+ results = client.search("climate change", max_=20)
51
+ print("total matches:", results.total)
52
+ for hit in results:
53
+ print(hit["_sentence"]["body"], "->", hit["_highlight"]["body"])
54
+ ```
55
+
56
+ See [Search](#search) for query syntax and filtering options.
57
+
58
+ ### Configuration
59
+
60
+ Generate an API token in the Gaston app under
61
+ [Settings -> API](https://www.gaston.live/user/settings/api/en). Full endpoint
62
+ documentation is available at <https://www.gaston.live/en/api>.
63
+
64
+ The token can be supplied directly or via an environment variable:
65
+
66
+ | Argument | Environment variable | Default |
67
+ |----------|----------------------|------------|
68
+ | `token` | `GASTON_API_TOKEN` | (required) |
69
+
70
+ ```python
71
+ # Uses GASTON_API_TOKEN from the environment
72
+ with GastonClient() as client:
73
+ ...
74
+ ```
75
+
76
+ ### Timeouts
77
+
78
+ Ordinary requests use a 30s timeout. The file upload in `transcribe` can take
79
+ minutes for large files, so it uses a separate, more generous `upload_timeout`
80
+ (default `(10s connect, 600s read)`).
81
+
82
+ A timeout may be a single float, a `(connect, read)` tuple, or `None` to wait
83
+ indefinitely.
84
+
85
+ ```python
86
+ # Customise the defaults for all calls
87
+ client = GastonClient(
88
+ token="gapi-...",
89
+ timeout=30,
90
+ upload_timeout=(10, 1800), # allow up to 30 min to upload large files
91
+ )
92
+
93
+ # Or override per call (e.g. no read timeout for a very large file)
94
+ client.transcribe("huge-recording.mp4", timeout=(10, None))
95
+ ```
96
+
97
+ ## Directories
98
+
99
+ ```python
100
+ folder = client.create_directory("Podcasts")
101
+ client.update_directory(folder.id, title="Podcast archive")
102
+ client.move_media(media_id="me...", dir_id=folder.id)
103
+ tree = client.directory_tree()
104
+ client.delete_directory(folder.id)
105
+ ```
106
+
107
+ ## Search
108
+
109
+ `client.search(query, from_=0, max_=50, dir_ids=None, lang=None)` runs a
110
+ full-text search over every sentence in your transcribed media.
111
+
112
+ ### Query syntax
113
+
114
+ The query supports a subset of the Lucene `query_string` syntax:
115
+
116
+ | Feature | Example | Notes |
117
+ |-------------------|--------------------------|------------------------------------------|
118
+ | Boolean `AND` | `cats AND dogs` | both terms must appear |
119
+ | Boolean `OR` | `cats OR dogs` | either term |
120
+ | Boolean `NOT` | `cats NOT dogs` | exclude a term |
121
+ | Grouping | `(cats OR dogs) AND vet` | combine operators with parentheses |
122
+ | Exact phrase | `"climate change"` | quoted terms match as a phrase |
123
+ | Trailing wildcard | `transcri*` | matches `transcribe`, `transcription`... |
124
+
125
+ Leading wildcards (`*tion`), field selectors, fuzzy (`~`), boosts (`^`) and
126
+ ranges are not supported and are stripped server-side. Queries must be at least
127
+ 3 characters.
128
+
129
+ ```python
130
+ results = client.search('(invoice OR receipt) AND "due date" NOT draft')
131
+ ```
132
+
133
+ ### Filtering and pagination
134
+
135
+ ```python
136
+ # Search within a single directory
137
+ client.search("budget", dir_ids=[42])
138
+
139
+ # Search across several directories
140
+ client.search("budget", dir_ids=[42, 43, 7])
141
+
142
+ # Restrict to one language, and page through results
143
+ page2 = client.search("budget", from_=50, max_=50, lang="en")
144
+ ```
145
+
146
+ ### Reading results
147
+
148
+ `search()` returns a `SearchResults` object. Iterate it for hits, or read
149
+ `.total` for the overall match count. Each hit is a dict with:
150
+
151
+ - `_sentence` - the matched sentence plus its `media` metadata (id, title,
152
+ duration, directory, thumbnail, file, originUrl).
153
+ - `_highlight` - matched fragments with the hit terms wrapped in
154
+ `<hlt>...</hlt>` tags.
155
+
156
+ ```python
157
+ results = client.search("climate change", max_=20)
158
+ print("total matches:", results.total)
159
+ for hit in results:
160
+ sentence = hit["_sentence"]
161
+ print(sentence["media"]["title"], "|", hit["_highlight"]["body"])
162
+ ```
163
+
164
+ ## Error handling
165
+
166
+ All failures raise a subclass of `GastonError`:
167
+
168
+ ```python
169
+ from gaston import GastonClient, AuthenticationError, RateLimitError, NotFoundError
170
+
171
+ try:
172
+ client.transcribe("clip.mp4")
173
+ except RateLimitError:
174
+ print("File limit reached")
175
+ except AuthenticationError:
176
+ print("Bad token / disabled account")
177
+ except NotFoundError as e:
178
+ print("Not found:", e.message)
179
+ ```
180
+
181
+ | Exception | Trigger |
182
+ |-----------------------|------------------------------------------|
183
+ | `AuthenticationError` | HTTP 403, invalid token / disabled user |
184
+ | `BadRequestError` | HTTP 400, invalid parameters |
185
+ | `NotFoundError` | HTTP 404, resource not found |
186
+ | `RateLimitError` | HTTP 429, usage limit exceeded |
187
+ | `GastonAPIError` | any other API error |
188
+
189
+ Every exception carries `.status_code`, `.message`, `.details` and the raw
190
+ `.payload`.
191
+
192
+ ## Supported languages
193
+
194
+ ```python
195
+ from gaston import SUPPORTED_LANGUAGES, TRANSLATION_LANGUAGES
196
+ ```
197
+
198
+ `SUPPORTED_LANGUAGES` lists transcription source languages; `TRANSLATION_LANGUAGES`
199
+ lists the available translation targets.
@@ -0,0 +1,60 @@
1
+ """Python client library for the Gaston API.
2
+
3
+ Transcription, translation and full-text search of sentences within
4
+ transcribed recordings.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from .client import GastonClient
10
+ from .constants import (
11
+ SUPPORTED_LANGUAGES,
12
+ TRANSLATION_LANGUAGES,
13
+ TRANSLATION_OPTIONS,
14
+ )
15
+ from .exceptions import (
16
+ AuthenticationError,
17
+ BadRequestError,
18
+ GastonAPIError,
19
+ GastonError,
20
+ NotFoundError,
21
+ RateLimitError,
22
+ )
23
+ from .models import (
24
+ Directory,
25
+ Media,
26
+ MediaList,
27
+ SearchResults,
28
+ Sentence,
29
+ TranscribeResult,
30
+ TranslateResult,
31
+ Usage,
32
+ User,
33
+ )
34
+
35
+ __version__ = "0.2.0"
36
+
37
+ __all__ = [
38
+ "GastonClient",
39
+ # exceptions
40
+ "GastonError",
41
+ "GastonAPIError",
42
+ "AuthenticationError",
43
+ "BadRequestError",
44
+ "NotFoundError",
45
+ "RateLimitError",
46
+ # models
47
+ "User",
48
+ "Usage",
49
+ "Media",
50
+ "MediaList",
51
+ "Sentence",
52
+ "Directory",
53
+ "TranscribeResult",
54
+ "TranslateResult",
55
+ "SearchResults",
56
+ # constants
57
+ "SUPPORTED_LANGUAGES",
58
+ "TRANSLATION_LANGUAGES",
59
+ "TRANSLATION_OPTIONS",
60
+ ]