kurious 0.8.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kurious-0.8.4/.gitignore +277 -0
- kurious-0.8.4/CHANGELOG.md +323 -0
- kurious-0.8.4/LICENSE +6 -0
- kurious-0.8.4/PKG-INFO +598 -0
- kurious-0.8.4/README.md +549 -0
- kurious-0.8.4/pyproject.toml +107 -0
- kurious-0.8.4/src/kurious/__init__.py +204 -0
- kurious-0.8.4/src/kurious/_config.py +106 -0
- kurious-0.8.4/src/kurious/_exceptions.py +54 -0
- kurious-0.8.4/src/kurious/_http.py +364 -0
- kurious-0.8.4/src/kurious/ai.py +83 -0
- kurious-0.8.4/src/kurious/api_keys.py +73 -0
- kurious-0.8.4/src/kurious/auth.py +184 -0
- kurious-0.8.4/src/kurious/bookmarks.py +67 -0
- kurious-0.8.4/src/kurious/cli.py +282 -0
- kurious-0.8.4/src/kurious/client.py +177 -0
- kurious-0.8.4/src/kurious/conversations.py +189 -0
- kurious-0.8.4/src/kurious/credentials.py +157 -0
- kurious-0.8.4/src/kurious/documents.py +83 -0
- kurious-0.8.4/src/kurious/entitlements.py +104 -0
- kurious-0.8.4/src/kurious/evals.py +277 -0
- kurious-0.8.4/src/kurious/files.py +321 -0
- kurious-0.8.4/src/kurious/indices.py +47 -0
- kurious-0.8.4/src/kurious/jobs.py +369 -0
- kurious-0.8.4/src/kurious/kg.py +281 -0
- kurious-0.8.4/src/kurious/nl2sql.py +179 -0
- kurious-0.8.4/src/kurious/projects.py +620 -0
- kurious-0.8.4/src/kurious/py.typed +0 -0
- kurious-0.8.4/src/kurious/search.py +239 -0
- kurious-0.8.4/src/kurious/search_log.py +92 -0
- kurious-0.8.4/src/kurious/types.py +1140 -0
- kurious-0.8.4/src/kurious/usage.py +28 -0
- kurious-0.8.4/src/kurious/video.py +303 -0
kurious-0.8.4/.gitignore
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
#poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
#pdm.lock
|
|
116
|
+
#pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
#pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.envrc
|
|
140
|
+
.venv
|
|
141
|
+
env/
|
|
142
|
+
venv/
|
|
143
|
+
ENV/
|
|
144
|
+
env.bak/
|
|
145
|
+
venv.bak/
|
|
146
|
+
|
|
147
|
+
# Per-environment secrets (real values). The .example variant is committed.
|
|
148
|
+
deployment/environments/*/env.secrets.yaml
|
|
149
|
+
|
|
150
|
+
# scripts/lib/ holds shared shell + python helpers; the `lib/` rule earlier
|
|
151
|
+
# (Python packaging) would otherwise ignore it.
|
|
152
|
+
!scripts/lib/
|
|
153
|
+
!scripts/lib/**
|
|
154
|
+
|
|
155
|
+
# Rendered templates produced by `python3 scripts/lib/load_env_config.py render ...`
|
|
156
|
+
# — the .tmpl source is committed; the rendered output is ephemeral.
|
|
157
|
+
*.rendered.yaml
|
|
158
|
+
|
|
159
|
+
# Spyder project settings
|
|
160
|
+
.spyderproject
|
|
161
|
+
.spyproject
|
|
162
|
+
|
|
163
|
+
# Rope project settings
|
|
164
|
+
.ropeproject
|
|
165
|
+
|
|
166
|
+
# mkdocs documentation
|
|
167
|
+
/site
|
|
168
|
+
|
|
169
|
+
# mypy
|
|
170
|
+
.mypy_cache/
|
|
171
|
+
.dmypy.json
|
|
172
|
+
dmypy.json
|
|
173
|
+
|
|
174
|
+
# Pyre type checker
|
|
175
|
+
.pyre/
|
|
176
|
+
|
|
177
|
+
# pytype static type analyzer
|
|
178
|
+
.pytype/
|
|
179
|
+
|
|
180
|
+
# Cython debug symbols
|
|
181
|
+
cython_debug/
|
|
182
|
+
|
|
183
|
+
# PyCharm
|
|
184
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
185
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
186
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
187
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
188
|
+
#.idea/
|
|
189
|
+
|
|
190
|
+
# Abstra
|
|
191
|
+
# Abstra is an AI-powered process automation framework.
|
|
192
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
193
|
+
# Learn more at https://abstra.io/docs
|
|
194
|
+
.abstra/
|
|
195
|
+
|
|
196
|
+
# Visual Studio Code
|
|
197
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
198
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
199
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
200
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
201
|
+
# .vscode/
|
|
202
|
+
|
|
203
|
+
# Ruff stuff:
|
|
204
|
+
.ruff_cache/
|
|
205
|
+
|
|
206
|
+
# PyPI configuration file
|
|
207
|
+
.pypirc
|
|
208
|
+
|
|
209
|
+
# Cursor
|
|
210
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
211
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
212
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
213
|
+
.cursorignore
|
|
214
|
+
.cursorindexingignore
|
|
215
|
+
|
|
216
|
+
# Marimo
|
|
217
|
+
marimo/_static/
|
|
218
|
+
marimo/_lsp/
|
|
219
|
+
__marimo__/
|
|
220
|
+
temp/*.csv
|
|
221
|
+
temp/*.json
|
|
222
|
+
|
|
223
|
+
data
|
|
224
|
+
*.out
|
|
225
|
+
|
|
226
|
+
sonar-scan.sh
|
|
227
|
+
sonar-project.properties
|
|
228
|
+
# Per-service sonar configs needed by ci-*.yml per-service scans — whitelisted.
|
|
229
|
+
!services/*/sonar-project.properties
|
|
230
|
+
.scannerwork.sonar-token
|
|
231
|
+
.sonar-token
|
|
232
|
+
reports/*.md
|
|
233
|
+
reports/*.json
|
|
234
|
+
|
|
235
|
+
gh-issue-agent/
|
|
236
|
+
.hitl_state/
|
|
237
|
+
|
|
238
|
+
# AgentOS — local-only skill overrides (never commit)
|
|
239
|
+
.claude/*.local.*
|
|
240
|
+
.claude/**/*.local.*
|
|
241
|
+
services/kurious-engine/credentials
|
|
242
|
+
.claude/scheduled_tasks.lock
|
|
243
|
+
|
|
244
|
+
# SonarQube local-scan artifacts
|
|
245
|
+
.scannerwork/
|
|
246
|
+
sonar-project.local.properties
|
|
247
|
+
.DS_Store
|
|
248
|
+
|
|
249
|
+
# Internal planning docs — not for repo
|
|
250
|
+
docs/6 weeks Sprint prioritisation.xlsx
|
|
251
|
+
docs/sprint-9-12-ml-mlops-plan.md
|
|
252
|
+
|
|
253
|
+
services/kurious-engine/cov_annotate/
|
|
254
|
+
services/kurious-worker/cov_annotate/
|
|
255
|
+
services/users-service/cov_annotate/
|
|
256
|
+
services/metering-service/cov_annotate/
|
|
257
|
+
cov_annotate_worker
|
|
258
|
+
|
|
259
|
+
# bug-issues-digest skill: root-level run output (sample lives in docs/progress-tracking/)
|
|
260
|
+
/bug_issues_digest.md
|
|
261
|
+
|
|
262
|
+
# L-S1: secret-bearing artifacts — block by extension/name so they can never be
|
|
263
|
+
# accidentally committed (TLS material, PKCS#12 bundles, GCP service-account
|
|
264
|
+
# JSON keys). Real values live in Vault / k8s secrets, not the repo.
|
|
265
|
+
*.pem
|
|
266
|
+
*.key
|
|
267
|
+
*.p12
|
|
268
|
+
service-account*.json
|
|
269
|
+
gcs-sa*.json
|
|
270
|
+
|
|
271
|
+
# Claude Code agent isolation worktrees — ephemeral, never tracked.
|
|
272
|
+
.claude/worktrees
|
|
273
|
+
|
|
274
|
+
# CI test fixture output — auto-generated, must not be committed
|
|
275
|
+
portal-results.json
|
|
276
|
+
credentials/
|
|
277
|
+
.venv312/
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to the `kurious` Python SDK are documented here.
|
|
4
|
+
|
|
5
|
+
The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and
|
|
6
|
+
this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.8.4] — 2026-06-12
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- **`projects.ingest(wait=True)` now blocks until the file is searchable
|
|
14
|
+
(#800)**: a "completed" auto-ingest job only meant preprocessing had
|
|
15
|
+
finished — the chained indexing job was still running, so an immediate
|
|
16
|
+
search hit an empty index and returned 0 sources. `wait=True` now follows
|
|
17
|
+
the chained job recorded in `job.result["auto_chain"]` to a terminal state
|
|
18
|
+
and then polls the file's unified ingest status until it is `indexed` (or
|
|
19
|
+
`failed`). The whole wait shares the one `timeout_s` budget.
|
|
20
|
+
- **Real indexing counts on `auto_chain` (#801)**: after the chained job
|
|
21
|
+
finishes, `job.result["auto_chain"]` carries its terminal `status` plus
|
|
22
|
+
`documents_indexed` / `documents_failed` from the indexing run (and
|
|
23
|
+
`error` on failure), so callers can tell whether indexing actually
|
|
24
|
+
succeeded without scraping worker logs.
|
|
25
|
+
|
|
26
|
+
## [0.7.0] — 2026-06-04
|
|
27
|
+
|
|
28
|
+
### Added
|
|
29
|
+
- **`kurious init` CLI (#709)**: a headless onboarding wizard. Prompts for
|
|
30
|
+
email, verifies it via email OTP, then signs up, logs in, mints a
|
|
31
|
+
`read_write` API key, and saves it to `~/.kurious/config.toml` (override with
|
|
32
|
+
`--config` or `KURIOUS_CONFIG_PATH`). No browser required — the entry point
|
|
33
|
+
for Builder Mode. Installed as the `kurious` console script
|
|
34
|
+
(`pip install kurious`). Non-interactive: `KURIOUS_PASSWORD` +
|
|
35
|
+
`KURIOUS_OTP_CODE`. `--skip-otp` skips the client-side OTP prompt only —
|
|
36
|
+
signup still fails if the server requires verification (not a bypass).
|
|
37
|
+
- **Email OTP methods (#709)**: `auth.request_email_otp(email)` →
|
|
38
|
+
`EmailOtpRequestResult` and `auth.verify_email_otp(email, code)` →
|
|
39
|
+
`EmailOtpVerifyResult`. Verify-first signup: prove email ownership, then pass
|
|
40
|
+
the returned `verification_token` to `signup(..., email_verification_token=)`.
|
|
41
|
+
- **`AIntropy.from_config()`**: build a client from the credentials written by
|
|
42
|
+
`kurious init`, so scripts can do `client = AIntropy.from_config()`.
|
|
43
|
+
- **`SignupResponse`, `EmailOtpRequestResult`, `EmailOtpVerifyResult` types.**
|
|
44
|
+
|
|
45
|
+
### Changed (breaking)
|
|
46
|
+
- **`client.auth.signup()` no longer returns tokens (#709)**: signup is async
|
|
47
|
+
(HTTP 202) — the engine creates the account + org and queues
|
|
48
|
+
entitlement provisioning, so it returns a `SignupResponse` (`user_id`,
|
|
49
|
+
`company_id`) instead of a `TokenResponse`, and no longer sets auth state.
|
|
50
|
+
Call `client.auth.login()` afterwards to obtain a session. This matches the
|
|
51
|
+
engine fix where `POST /auth/signup` returns the upstream 202 body instead of
|
|
52
|
+
raising a `ValidationError` trying to coerce it into a token response.
|
|
53
|
+
|
|
54
|
+
## [0.6.0] — 2026-06-03
|
|
55
|
+
|
|
56
|
+
### Changed (breaking)
|
|
57
|
+
- **`client.video.inspect()` now requires `project_id` (#657)**: method signature changed from `inspect()` → `inspect(project_id: str)`. Update call sites to pass the project UUID.
|
|
58
|
+
|
|
59
|
+
### Added
|
|
60
|
+
- `VideoInspectResult.empty: bool` — engine returns HTTP 200 with `empty=True` when the project has no parquet shards yet.
|
|
61
|
+
|
|
62
|
+
## [0.5.5] — 2026-05-26
|
|
63
|
+
|
|
64
|
+
### Fixed
|
|
65
|
+
- **`client.video.preprocess()` missing `project_id` (#633)**: the method
|
|
66
|
+
signature now accepts `project_id` as the first required argument and
|
|
67
|
+
passes it on the POST body. Earlier versions hit the engine schema
|
|
68
|
+
with `project_id` missing and got HTTP 422 `ValidationError`. Customers
|
|
69
|
+
following the LVBench / Video-MME runbook against SDK ≤ 0.5.4 saw the
|
|
70
|
+
call fail immediately; on 0.5.5 the documented happy path works
|
|
71
|
+
end-to-end. Paired with an engine-side fix that wires
|
|
72
|
+
`ensure_project_config_for_ingest` into `POST /pipeline/ingest` so the
|
|
73
|
+
auto-routing path is searchable as soon as the chain finishes (#634).
|
|
74
|
+
|
|
75
|
+
### Added
|
|
76
|
+
- `client.projects.ingest()` is now the recommended one-call upload +
|
|
77
|
+
auto-route + dispatch path. The previous `presign_upload → PUT → files.ingest
|
|
78
|
+
→ files.wait_for_job` flow remains available but `client.files.ingest`,
|
|
79
|
+
`client.files.wait_for_job` emit `DeprecationWarning`.
|
|
80
|
+
- `client.resources` property — discovery aid that lists every attached
|
|
81
|
+
resource attribute. `repr(client)` now advertises the resource count.
|
|
82
|
+
- Page-wrapper return types for previously-raw `List[...]` endpoints:
|
|
83
|
+
`EvalSetPage`, `EvalRunPage`, `EvalResultPage`, `APIKeyPage`,
|
|
84
|
+
`AuditLogPage`, `IngestionJobPage`. All wrappers are iterable, support
|
|
85
|
+
`len()` / indexing, and are falsy when empty so existing `for x in page:`
|
|
86
|
+
call sites keep working.
|
|
87
|
+
- `JobStatus` Literal exported (`"pending" | "running" | "completed" |
|
|
88
|
+
"failed" | "cancelled"`) for IDE autocomplete on status comparisons.
|
|
89
|
+
- `JobThrottled` and `TERMINAL_STATUSES` exported from the package root
|
|
90
|
+
(previously importable only via `kurious.jobs`).
|
|
91
|
+
|
|
92
|
+
### Fixed
|
|
93
|
+
- **Cross-tenant search leak (#592 Layer 4)**: `project_rag_search` no longer
|
|
94
|
+
falls back to the NJ open data global index when a project-scoped caller
|
|
95
|
+
passes a `project_config` without `search_mode="kg_unstructured"` —
|
|
96
|
+
returns empty hits with a clear error instead of another tenant's PDFs.
|
|
97
|
+
- **GCS upload silent failure (#592 Layer 3)**: `client.files.upload` and
|
|
98
|
+
`client.projects.ingest` now raise `AIntropyError` on a non-2xx PUT
|
|
99
|
+
response from GCS. Previously a 403 / 413 / 415 was silently absorbed
|
|
100
|
+
and the downstream ingest "succeeded" on a missing object.
|
|
101
|
+
- **SSE error events**: an `event: error` frame mid-stream now raises
|
|
102
|
+
`AIntropyError` instead of being yielded as a regular event and dropped
|
|
103
|
+
by text-only consumers. No more silent stream truncation.
|
|
104
|
+
- **`Job.retry` 429 handling**: the prior `httpx.HTTPStatusError` catch
|
|
105
|
+
was dead code (the transport raises `RateLimitError` instead). Now
|
|
106
|
+
correctly translates rate-limit responses into `JobThrottled` with
|
|
107
|
+
the server's `Retry-After` hint.
|
|
108
|
+
- **JWT auto-refresh**: `client.AIntropy` now wires
|
|
109
|
+
`auth.ensure_valid_token` into the transport as a pre-request hook,
|
|
110
|
+
matching the docstring claim. Long-lived JWT-mode clients no longer
|
|
111
|
+
hit random 401s when the access token expires.
|
|
112
|
+
- **`project_files.status` state machine (#592 Layer 3)**: the engine
|
|
113
|
+
now transitions `pending_upload → ingesting` at `auto_ingest` dispatch
|
|
114
|
+
and `ingesting → indexed` at the terminal worker handler. SDK
|
|
115
|
+
pollers see live state instead of an indefinite `pending_upload`.
|
|
116
|
+
- Worker `video.preprocess` now verifies `assembled.parquet` exists in
|
|
117
|
+
GCS before declaring success (closes the 121s mark-completed-with-2-of-5-
|
|
118
|
+
stages regression).
|
|
119
|
+
- Worker `video.preprocess` → `video.ingest` auto-chain wired so a
|
|
120
|
+
single SDK call ends with the file actually indexed and searchable. The
|
|
121
|
+
pre-existing `video.ingest → kg.run` chain now propagates `file_id`
|
|
122
|
+
through to the terminal so `project_files.status=INDEXED` lands.
|
|
123
|
+
- New worker handler `document.preprocess` registered (was a stub) so
|
|
124
|
+
document uploads via auto-ingest run end-to-end like video.
|
|
125
|
+
|
|
126
|
+
### Changed
|
|
127
|
+
- `DEFAULT_BASE_URL` docstring corrected: confirms it points at the dev
|
|
128
|
+
beta host and documents that prod opens at GA.
|
|
129
|
+
- All `wait_for_job` helpers (files, evals, video, nl2sql, kg) now share
|
|
130
|
+
the canonical `TERMINAL_JOB_STATUSES` frozenset from `kurious.types`.
|
|
131
|
+
Previously three different definitions disagreed on whether `cancelled`
|
|
132
|
+
was terminal.
|
|
133
|
+
- README rewritten: `client.whoami()` first-call section, tenant-isolation
|
|
134
|
+
explainer, `client.projects.ingest()` quickstart replaces the old
|
|
135
|
+
presign + requests.put flow, ingest-status / resume recovery example,
|
|
136
|
+
exception table mapping HTTP codes to typed errors, `mode="deep_think"`
|
|
137
|
+
example, discoverability section (`client.resources`).
|
|
138
|
+
|
|
139
|
+
## [0.5.4] — 2026-05-21
|
|
140
|
+
|
|
141
|
+
### Added
|
|
142
|
+
- `Job` now carries auto-ingest detection metadata when the job was created
|
|
143
|
+
via `POST /pipeline/ingest`: `detected_domain`, `detected_sub_kind`,
|
|
144
|
+
`detected_mime`, `detected_confidence`, `fallback_used`, `detection_notes`.
|
|
145
|
+
Previously these fields were silently discarded by `Job.from_payload`;
|
|
146
|
+
callers can now inspect what the magic-byte detector decided without a
|
|
147
|
+
separate status call.
|
|
148
|
+
- `AutoIngestConfig` and `KGAutoChainConfig` types exported from the package
|
|
149
|
+
root. Pass `AutoIngestConfig(kg=KGAutoChainConfig(enabled=False))` to
|
|
150
|
+
`client.projects.ingest(config=...)` to suppress the automatic KG rebuild
|
|
151
|
+
after document ingestion. Plain `dict` config still accepted for backward
|
|
152
|
+
compatibility.
|
|
153
|
+
- `client.files.wait_for_indexed(file_id, *, poll_interval=5.0, timeout=7200.0)`
|
|
154
|
+
— convenience poller that calls `GET /ingest/status/{file_id}` until
|
|
155
|
+
`overall_status` is `indexed` or `failed`. Works across both the document
|
|
156
|
+
chain (`ingestion_jobs`) and video / KG path (`pipeline_jobs`). Raises
|
|
157
|
+
`TimeoutError` when the deadline elapses.
|
|
158
|
+
- `ProjectFile` now carries optional detection fields (`detected_domain`,
|
|
159
|
+
`detected_sub_kind`, `detection_confidence`, `detection_notes`) stamped
|
|
160
|
+
by the auto-routing ingest endpoint. `None` on files uploaded via legacy
|
|
161
|
+
per-domain endpoints.
|
|
162
|
+
|
|
163
|
+
## [0.5.3] — 2026-05-19
|
|
164
|
+
|
|
165
|
+
### Fixed
|
|
166
|
+
- `search.nl2sql`, `search.intelligent`, `search.intelligent_stream`, and
|
|
167
|
+
`search.intelligent_stream_text` were sending `project_id` only in the
|
|
168
|
+
URL path. The backend `NL2SQLQueryRequest` / `IntelligentQueryRequest`
|
|
169
|
+
schemas also require `project_id` in the body, so every call to these
|
|
170
|
+
four methods returned `422 ValidationError: {type: missing, loc:
|
|
171
|
+
[body, project_id]}`. The SDK now injects `project_id` into the body
|
|
172
|
+
payload automatically — caller signatures are unchanged.
|
|
173
|
+
- Synced stale `__version__` in `src/kurious/__init__.py` (was reporting
|
|
174
|
+
`0.5.1` while `pyproject.toml` advertised `0.5.2`).
|
|
175
|
+
|
|
176
|
+
## [0.5.2] — 2026-05-15
|
|
177
|
+
|
|
178
|
+
### Added
|
|
179
|
+
- `client.whoami()` — returns `WhoAmIResponse` with `user_id`, `company_id`,
|
|
180
|
+
`access_type`, and `project_id`. Backed by `GET /api/v1/whoami`.
|
|
181
|
+
- `client.company_id` property — inspect the resolved company after init.
|
|
182
|
+
- Auto-resolution of `company_id` on `AIntropy(api_key=...)` construction:
|
|
183
|
+
the client calls `/whoami` eagerly, caches the result on the transport, and
|
|
184
|
+
injects `X-Company-ID` automatically on every subsequent request. Callers
|
|
185
|
+
no longer need to pass `company_id` explicitly when using API-key auth.
|
|
186
|
+
|
|
187
|
+
### Changed
|
|
188
|
+
- `DEFAULT_BASE_URL` corrected to the dev beta host
|
|
189
|
+
`kurious-backend-dev-api.centralus.cloudapp.azure.com/api/v1` (was the
|
|
190
|
+
unreachable `api.aintropy.ai`). Prod opens at GA; override `base_url=`
|
|
191
|
+
on the client when prod is live.
|
|
192
|
+
- `wait_for_job` / `Job.wait_until_done` timeout raised `300s → 7200s` and
|
|
193
|
+
poll interval increased `2s → 5s` — matches real KG and video pipeline runtimes.
|
|
194
|
+
|
|
195
|
+
## [0.5.1] — 2026-05-12
|
|
196
|
+
|
|
197
|
+
### Added
|
|
198
|
+
- `client.projects.ingest(project_id, file_or_uri, ...)` — one-click
|
|
199
|
+
upload + auto-route + dispatch. Accepts a local path, a `gs://` URI,
|
|
200
|
+
or a file-like object. Returns a `Job` keyed off `pipeline_jobs.id`.
|
|
201
|
+
Passing `wait=True` blocks until terminal status and invokes an
|
|
202
|
+
optional `on_progress` callback on each poll tick.
|
|
203
|
+
- `client.projects.jobs(project_id, ...)` — cursor-paginated iterator
|
|
204
|
+
over all `Job`s for a project.
|
|
205
|
+
- `client.jobs` — new `JobsResource` with `get(job_id)` (single fetch)
|
|
206
|
+
and `list(...)` (cross-project paginated iterator).
|
|
207
|
+
- `Job` dataclass exported from the package root with `refresh()`,
|
|
208
|
+
`retry(publish=False)`, `cancel()`, and `wait_until_done(...)`
|
|
209
|
+
ergonomics over `GET/POST /api/v1/jobs/{id}[/retry|/cancel]`.
|
|
210
|
+
|
|
211
|
+
### Notes
|
|
212
|
+
- Backed by `POST /api/v1/pipeline/ingest` (auto-route by magic bytes)
|
|
213
|
+
and `GET /api/v1/jobs/{id}` (sanitised `error_detail` for non-admins).
|
|
214
|
+
- `retry` / `cancel` are no-ops against an engine that has not been upgraded.
|
|
215
|
+
|
|
216
|
+
## [0.5.0] — 2026-05-07
|
|
217
|
+
|
|
218
|
+
### Added
|
|
219
|
+
- `client.projects.get_step_timings(project_id, kind=None)` — per-project
|
|
220
|
+
per-pipeline-step duration rollup (count / avg_ms / p50 / p95 / total_ms).
|
|
221
|
+
Optional `kind` parameter filters to a single pipeline kind
|
|
222
|
+
(e.g. `video.preprocess`, `kg.extract`).
|
|
223
|
+
- New types `StepTiming` and `StepTimingsResponse` exported from the
|
|
224
|
+
package root.
|
|
225
|
+
|
|
226
|
+
### Notes
|
|
227
|
+
- Backed by `GET /api/v1/projects/{project_id}/step-timings`. The endpoint
|
|
228
|
+
is scoped to project members; non-members get 403.
|
|
229
|
+
- Use `client.projects.get_step_timings(project_id).steps` to drive a
|
|
230
|
+
"where is time going" panel; pair with `total_duration_ms` to compute
|
|
231
|
+
share-of-time per step.
|
|
232
|
+
|
|
233
|
+
## [0.4.1] — 2026-05-06
|
|
234
|
+
|
|
235
|
+
### Added
|
|
236
|
+
- `client.video.backfill_utterances(project_id, max_slices=4, ...)` — kick off
|
|
237
|
+
the new sliced-scroll utterance backfill (engine endpoint
|
|
238
|
+
`POST /video/pipeline/backfill_utterances` introduced in PR #409). Returns
|
|
239
|
+
a `BackfillFanoutResponse` with the umbrella `job_id` and the dispatched
|
|
240
|
+
`slice_count`. Children run in parallel on the engine's background workers;
|
|
241
|
+
poll the umbrella with `client.video.get_job(job_id)`.
|
|
242
|
+
- `BackfillFanoutResponse` and `JobChildrenRollup` types exported from the
|
|
243
|
+
package root.
|
|
244
|
+
- `VideoPipelineStatus.children` convenience property — surfaces
|
|
245
|
+
`{slice_count, completed, failed, running, pending}` when the polled row is
|
|
246
|
+
an umbrella over fan-out children. Returns `None` for single-task jobs.
|
|
247
|
+
- `VideoPipelineJob` now carries dispatch metadata: `celery_task_id`,
|
|
248
|
+
`priority`, `queue_name`, `parent_task_id`, `slice_id`, `max_slices`.
|
|
249
|
+
All optional — only populated on rows created after the engine enables
|
|
250
|
+
async dispatch.
|
|
251
|
+
|
|
252
|
+
### Notes
|
|
253
|
+
- `max_slices` is capped at 16 by the engine; the SDK validates client-side
|
|
254
|
+
and raises `ValueError` before any HTTP call.
|
|
255
|
+
|
|
256
|
+
## [0.4.0] — 2026-05-04
|
|
257
|
+
|
|
258
|
+
### Added
|
|
259
|
+
- `client.video.preprocess(input_video_gcs_uri, stage_prefix, ...)` — kick off
|
|
260
|
+
the per-video preprocessing chain (decode → transcribe → embed → caption →
|
|
261
|
+
assemble). Per-stage `skip_*` flags allow resuming after an interruption.
|
|
262
|
+
- New endpoint backing it: `POST /video/pipeline/preprocess`.
|
|
263
|
+
|
|
264
|
+
### Notes
|
|
265
|
+
- Per-video runtime varies based on video length and available GPU resources.
|
|
266
|
+
- Intermediate parquets land at
|
|
267
|
+
`<stage_prefix>/<video_id>/{frames,transcripts,embeddings,captions,assembled}.parquet`.
|
|
268
|
+
Downstream `client.video.ingest()` consumes the assembled parquet.
|
|
269
|
+
|
|
270
|
+
## [0.3.0] — 2026-05-01
|
|
271
|
+
|
|
272
|
+
### Added
|
|
273
|
+
- `client.video.timeline(video_id, max_events=100)` — fetch per-video
|
|
274
|
+
scene-level timeline (chunked events with start/end timestamps, topic,
|
|
275
|
+
keywords, speakers). Powers timeline hover-cards on video result UIs.
|
|
276
|
+
- `client.video.cancel_job(job_id)` — cancel a pending or running video
|
|
277
|
+
pipeline job.
|
|
278
|
+
- New types: `VideoTimeline`, `VideoTimelineEvent`.
|
|
279
|
+
|
|
280
|
+
### Coverage
|
|
281
|
+
SDK now covers ~98% of the public Kurious Engine API surface. The
|
|
282
|
+
`/projects/{id}/ingest*` upload + ingestion endpoints (already in
|
|
283
|
+
`client.files.*` since 0.1.0) remain the recommended path for uploads;
|
|
284
|
+
no additional ingestion wrapper is needed.
|
|
285
|
+
|
|
286
|
+
## [0.2.0] — 2026-04-25
|
|
287
|
+
|
|
288
|
+
### Added
|
|
289
|
+
- `client.conversations` — chat thread management: create / list / get / update /
|
|
290
|
+
delete, message CRUD, public-share enable/disable, public shared-conversation
|
|
291
|
+
reads.
|
|
292
|
+
- `client.bookmarks` — list / create / delete saved messages.
|
|
293
|
+
- `client.search_log` — log a UI search and submit star-rating feedback.
|
|
294
|
+
- `client.ai` — direct embedding and chat-completion access.
|
|
295
|
+
- `client.entitlements` — list, get, request upgrade, start trial.
|
|
296
|
+
- `search.blended_query()` — benchmark blended (lexical + semantic) search.
|
|
297
|
+
- `search.nl2sql_stream()` — streaming NL2SQL responses.
|
|
298
|
+
- `py.typed` marker so type checkers (mypy, pyright) recognize the package as
|
|
299
|
+
fully typed.
|
|
300
|
+
- `__repr__` on every resource class for friendlier debugging.
|
|
301
|
+
- `LICENSE` (MIT) and richer `pyproject.toml` metadata: project URLs, keywords,
|
|
302
|
+
`lint` / `docs` extras groups, ruff and mypy config.
|
|
303
|
+
|
|
304
|
+
### Changed
|
|
305
|
+
- Expanded `README.md` with end-to-end examples (auth, projects, ingest,
|
|
306
|
+
search, conversations, evals, error handling, streaming).
|
|
307
|
+
- Tightened class docstrings on `IndexStats` and other low-level types.
|
|
308
|
+
|
|
309
|
+
### Coverage
|
|
310
|
+
SDK now covers ~95% of the public Kurious Engine API surface (vs. 71% in
|
|
311
|
+
v0.1.0). Admin- and internal-only endpoints (`/admin/*`, `/internal/*`,
|
|
312
|
+
`/telemetry/*`, `/resources/*`) remain intentionally out of scope.
|
|
313
|
+
|
|
314
|
+
## [0.1.0] — 2026-04-22
|
|
315
|
+
|
|
316
|
+
Initial release.
|
|
317
|
+
|
|
318
|
+
### Added
|
|
319
|
+
- Core resources: `auth`, `projects`, `files`, `indices`, `documents`, `search`,
|
|
320
|
+
`evals`, `usage`, `api_keys`, `nl2sql`, `kg`, `video`.
|
|
321
|
+
- Sync HTTP transport with retries, rate-limit backoff, and JWT/API-key auth.
|
|
322
|
+
- SSE streaming helpers for `intelligent_stream` / `intelligent_stream_text`.
|
|
323
|
+
- Pydantic v2 type models mirroring Kurious Engine response shapes.
|
kurious-0.8.4/LICENSE
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
Copyright © 2026 AIntropy. All rights reserved.
|
|
2
|
+
|
|
3
|
+
This software and its source code are proprietary and confidential.
|
|
4
|
+
Unauthorized use, copying, modification, distribution, or redistribution
|
|
5
|
+
of this software, in whole or in part, is strictly prohibited without the
|
|
6
|
+
prior written consent of AIntropy.
|