knowledge2 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowledge2-0.4.0.dist-info/METADATA +556 -0
- knowledge2-0.4.0.dist-info/RECORD +139 -0
- knowledge2-0.4.0.dist-info/WHEEL +5 -0
- knowledge2-0.4.0.dist-info/top_level.txt +1 -0
- sdk/__init__.py +70 -0
- sdk/_async_base.py +525 -0
- sdk/_async_paging.py +57 -0
- sdk/_base.py +541 -0
- sdk/_logging.py +41 -0
- sdk/_paging.py +73 -0
- sdk/_preview.py +70 -0
- sdk/_raw_response.py +25 -0
- sdk/_request_options.py +51 -0
- sdk/_transport.py +144 -0
- sdk/_validation.py +25 -0
- sdk/_validation_response.py +36 -0
- sdk/_version.py +3 -0
- sdk/async_client.py +320 -0
- sdk/async_resources/__init__.py +45 -0
- sdk/async_resources/_mixin_base.py +42 -0
- sdk/async_resources/a2a.py +230 -0
- sdk/async_resources/agents.py +489 -0
- sdk/async_resources/audit.py +145 -0
- sdk/async_resources/auth.py +133 -0
- sdk/async_resources/console.py +409 -0
- sdk/async_resources/corpora.py +276 -0
- sdk/async_resources/deployments.py +106 -0
- sdk/async_resources/documents.py +592 -0
- sdk/async_resources/feeds.py +248 -0
- sdk/async_resources/indexes.py +208 -0
- sdk/async_resources/jobs.py +165 -0
- sdk/async_resources/metadata.py +48 -0
- sdk/async_resources/models.py +102 -0
- sdk/async_resources/onboarding.py +538 -0
- sdk/async_resources/orgs.py +37 -0
- sdk/async_resources/pipelines.py +523 -0
- sdk/async_resources/projects.py +90 -0
- sdk/async_resources/search.py +262 -0
- sdk/async_resources/training.py +357 -0
- sdk/async_resources/usage.py +91 -0
- sdk/client.py +417 -0
- sdk/config.py +182 -0
- sdk/errors.py +178 -0
- sdk/examples/auth_factory.py +34 -0
- sdk/examples/batch_operations.py +57 -0
- sdk/examples/document_upload.py +56 -0
- sdk/examples/e2e_lifecycle.py +213 -0
- sdk/examples/error_handling.py +61 -0
- sdk/examples/pagination.py +64 -0
- sdk/examples/quickstart.py +36 -0
- sdk/examples/request_options.py +44 -0
- sdk/examples/search.py +64 -0
- sdk/integrations/__init__.py +57 -0
- sdk/integrations/_client.py +101 -0
- sdk/integrations/langchain/__init__.py +6 -0
- sdk/integrations/langchain/retriever.py +166 -0
- sdk/integrations/langchain/tools.py +108 -0
- sdk/integrations/llamaindex/__init__.py +11 -0
- sdk/integrations/llamaindex/filters.py +78 -0
- sdk/integrations/llamaindex/retriever.py +162 -0
- sdk/integrations/llamaindex/tools.py +109 -0
- sdk/integrations/llamaindex/vector_store.py +320 -0
- sdk/models/__init__.py +18 -0
- sdk/models/_base.py +24 -0
- sdk/models/_registry.py +457 -0
- sdk/models/a2a.py +92 -0
- sdk/models/agents.py +109 -0
- sdk/models/audit.py +28 -0
- sdk/models/auth.py +49 -0
- sdk/models/chunks.py +20 -0
- sdk/models/common.py +14 -0
- sdk/models/console.py +103 -0
- sdk/models/corpora.py +48 -0
- sdk/models/deployments.py +13 -0
- sdk/models/documents.py +126 -0
- sdk/models/embeddings.py +24 -0
- sdk/models/evaluation.py +17 -0
- sdk/models/feedback.py +9 -0
- sdk/models/feeds.py +57 -0
- sdk/models/indexes.py +36 -0
- sdk/models/jobs.py +52 -0
- sdk/models/models.py +26 -0
- sdk/models/onboarding.py +323 -0
- sdk/models/orgs.py +11 -0
- sdk/models/pipelines.py +147 -0
- sdk/models/projects.py +19 -0
- sdk/models/search.py +149 -0
- sdk/models/training.py +57 -0
- sdk/models/usage.py +39 -0
- sdk/namespaces.py +386 -0
- sdk/py.typed +0 -0
- sdk/resources/__init__.py +45 -0
- sdk/resources/_mixin_base.py +40 -0
- sdk/resources/a2a.py +230 -0
- sdk/resources/agents.py +487 -0
- sdk/resources/audit.py +144 -0
- sdk/resources/auth.py +138 -0
- sdk/resources/console.py +411 -0
- sdk/resources/corpora.py +269 -0
- sdk/resources/deployments.py +105 -0
- sdk/resources/documents.py +597 -0
- sdk/resources/feeds.py +246 -0
- sdk/resources/indexes.py +210 -0
- sdk/resources/jobs.py +164 -0
- sdk/resources/metadata.py +53 -0
- sdk/resources/models.py +99 -0
- sdk/resources/onboarding.py +542 -0
- sdk/resources/orgs.py +35 -0
- sdk/resources/pipeline_builder.py +257 -0
- sdk/resources/pipelines.py +520 -0
- sdk/resources/projects.py +87 -0
- sdk/resources/search.py +277 -0
- sdk/resources/training.py +358 -0
- sdk/resources/usage.py +92 -0
- sdk/types/__init__.py +366 -0
- sdk/types/a2a.py +88 -0
- sdk/types/agents.py +133 -0
- sdk/types/audit.py +26 -0
- sdk/types/auth.py +45 -0
- sdk/types/chunks.py +18 -0
- sdk/types/common.py +10 -0
- sdk/types/console.py +99 -0
- sdk/types/corpora.py +42 -0
- sdk/types/deployments.py +11 -0
- sdk/types/documents.py +104 -0
- sdk/types/embeddings.py +22 -0
- sdk/types/evaluation.py +15 -0
- sdk/types/feedback.py +7 -0
- sdk/types/feeds.py +61 -0
- sdk/types/indexes.py +30 -0
- sdk/types/jobs.py +50 -0
- sdk/types/models.py +22 -0
- sdk/types/onboarding.py +395 -0
- sdk/types/orgs.py +9 -0
- sdk/types/pipelines.py +177 -0
- sdk/types/projects.py +14 -0
- sdk/types/search.py +116 -0
- sdk/types/training.py +55 -0
- sdk/types/usage.py +37 -0
sdk/resources/models.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Model resource mixin for the Knowledge2 SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from sdk._paging import Page, SyncPager
|
|
8
|
+
from sdk._request_options import RequestOptions
|
|
9
|
+
from sdk._validation import require_str
|
|
10
|
+
from sdk.errors import ConfirmationRequiredError
|
|
11
|
+
from sdk.resources._mixin_base import RequesterMixin
|
|
12
|
+
from sdk.types import ModelDeleteResponse
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ModelsMixin(RequesterMixin):
|
|
16
|
+
def list_models(
|
|
17
|
+
self,
|
|
18
|
+
limit: int = 100,
|
|
19
|
+
offset: int = 0,
|
|
20
|
+
request_options: RequestOptions | None = None,
|
|
21
|
+
) -> Page[dict[str, Any]]:
|
|
22
|
+
"""List models accessible to the current credentials.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
limit: Maximum number of models to return per page.
|
|
26
|
+
offset: Number of models to skip for pagination.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
A Page containing model records with pagination metadata.
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
Knowledge2Error: If the API request fails.
|
|
33
|
+
"""
|
|
34
|
+
return self._list_page("GET", "/v1/models", items_key="models", limit=limit, offset=offset)
|
|
35
|
+
|
|
36
|
+
def iter_models(
|
|
37
|
+
self,
|
|
38
|
+
*,
|
|
39
|
+
limit: int = 100,
|
|
40
|
+
request_options: RequestOptions | None = None,
|
|
41
|
+
) -> SyncPager[dict[str, Any]]:
|
|
42
|
+
"""Lazily paginate models, yielding individual model items.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
limit: Page size used for each underlying API request.
|
|
46
|
+
|
|
47
|
+
Yields:
|
|
48
|
+
Individual model dicts.
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
Knowledge2Error: If any underlying API request fails.
|
|
52
|
+
"""
|
|
53
|
+
return self._paginate(
|
|
54
|
+
"GET",
|
|
55
|
+
"/v1/models",
|
|
56
|
+
items_key="models",
|
|
57
|
+
limit=limit,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def delete_model(
|
|
61
|
+
self,
|
|
62
|
+
model_id: str,
|
|
63
|
+
*,
|
|
64
|
+
confirm: bool = False,
|
|
65
|
+
force: bool = False,
|
|
66
|
+
request_options: RequestOptions | None = None,
|
|
67
|
+
) -> ModelDeleteResponse:
|
|
68
|
+
"""Delete a model and its associated artifacts.
|
|
69
|
+
|
|
70
|
+
This is an irreversible operation. You must pass ``confirm=True``
|
|
71
|
+
to acknowledge this and proceed.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
model_id: Unique identifier of the model to delete.
|
|
75
|
+
confirm: Safety guard — must be ``True`` to execute the
|
|
76
|
+
deletion. Raises ``ConfirmationRequiredError`` when ``False``.
|
|
77
|
+
force: If ``True``, delete even if the model is currently
|
|
78
|
+
deployed.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Confirmation of the deletion.
|
|
82
|
+
|
|
83
|
+
Raises:
|
|
84
|
+
ConfirmationRequiredError: If *confirm* is not ``True``.
|
|
85
|
+
NotFoundError: If the model does not exist.
|
|
86
|
+
ConflictError: If *force* is ``False`` and the model has
|
|
87
|
+
active deployments.
|
|
88
|
+
Knowledge2Error: If the API request fails.
|
|
89
|
+
"""
|
|
90
|
+
model_id = require_str(model_id, "model_id")
|
|
91
|
+
if not confirm:
|
|
92
|
+
raise ConfirmationRequiredError("model", model_id)
|
|
93
|
+
data = self._request(
|
|
94
|
+
"DELETE",
|
|
95
|
+
f"/v1/models/{model_id}",
|
|
96
|
+
params={"force": force},
|
|
97
|
+
request_options=request_options,
|
|
98
|
+
)
|
|
99
|
+
return self._maybe_validate(data, "ModelDeleteResponse")
|
|
@@ -0,0 +1,542 @@
|
|
|
1
|
+
"""SDK resource for dataset onboarding operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from sdk._paging import Page, SyncPager
|
|
9
|
+
from sdk._request_options import RequestOptions
|
|
10
|
+
from sdk._validation import require_str
|
|
11
|
+
from sdk.resources._mixin_base import RequesterMixin
|
|
12
|
+
from sdk.types import (
|
|
13
|
+
DatasetAnalysisDetails,
|
|
14
|
+
DatasetAnalysisRequest,
|
|
15
|
+
DatasetAnalysisResponse,
|
|
16
|
+
DocumentSummaryResponse,
|
|
17
|
+
EvaluationDetails,
|
|
18
|
+
EvaluationListResponse,
|
|
19
|
+
EvaluationReportResponse,
|
|
20
|
+
EvaluationRequest,
|
|
21
|
+
EvaluationResponse,
|
|
22
|
+
GoldLabelEntry,
|
|
23
|
+
GoldLabelsUploadResponse,
|
|
24
|
+
OnboardingStatusResponse,
|
|
25
|
+
SummarizationRequest,
|
|
26
|
+
SummarizationResponse,
|
|
27
|
+
SummarizationStatusResponse,
|
|
28
|
+
SyntheticQueryBatchDetails,
|
|
29
|
+
SyntheticQueryBatchListResponse,
|
|
30
|
+
SyntheticQueryBatchResponse,
|
|
31
|
+
SyntheticQueryGenerationRequest,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class OnboardingMixin(RequesterMixin):
|
|
36
|
+
"""Mixin providing dataset onboarding operations."""
|
|
37
|
+
|
|
38
|
+
# =========================================================================
|
|
39
|
+
# Dataset Analysis
|
|
40
|
+
# =========================================================================
|
|
41
|
+
|
|
42
|
+
def start_analysis(
|
|
43
|
+
self,
|
|
44
|
+
corpus_id: str,
|
|
45
|
+
*,
|
|
46
|
+
description: str | None = None,
|
|
47
|
+
auto_bootstrap: bool = True,
|
|
48
|
+
bootstrap_num_samples: int | None = None,
|
|
49
|
+
queries_per_chunk: int | None = None,
|
|
50
|
+
request_options: RequestOptions | None = None,
|
|
51
|
+
) -> DatasetAnalysisResponse:
|
|
52
|
+
"""Start the dataset analysis pipeline for a corpus.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
corpus_id: ID of the corpus to analyze
|
|
56
|
+
description: Optional dataset description for analysis context
|
|
57
|
+
auto_bootstrap: Automatically bootstrap if no gold labels exist
|
|
58
|
+
bootstrap_num_samples: Override bootstrap sample count
|
|
59
|
+
queries_per_chunk: Override queries per chunk
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
DatasetAnalysisResponse with analysis_id and job_id
|
|
63
|
+
"""
|
|
64
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
65
|
+
payload: dict[str, Any] = {"auto_bootstrap": auto_bootstrap}
|
|
66
|
+
if description is not None:
|
|
67
|
+
payload["description"] = description
|
|
68
|
+
if bootstrap_num_samples is not None:
|
|
69
|
+
payload["bootstrap_num_samples"] = bootstrap_num_samples
|
|
70
|
+
if queries_per_chunk is not None:
|
|
71
|
+
payload["queries_per_chunk"] = queries_per_chunk
|
|
72
|
+
|
|
73
|
+
data = self._request(
|
|
74
|
+
"POST",
|
|
75
|
+
f"/v1/corpora/{corpus_id}/onboard:analyze",
|
|
76
|
+
json=payload,
|
|
77
|
+
request_options=request_options,
|
|
78
|
+
)
|
|
79
|
+
return self._maybe_validate(data, "DatasetAnalysisResponse")
|
|
80
|
+
|
|
81
|
+
def get_onboarding_status(
|
|
82
|
+
self,
|
|
83
|
+
corpus_id: str,
|
|
84
|
+
request_options: RequestOptions | None = None,
|
|
85
|
+
) -> OnboardingStatusResponse:
|
|
86
|
+
"""Get current onboarding status for a corpus.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
corpus_id: ID of the corpus
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
OnboardingStatusResponse with latest analysis and counts
|
|
93
|
+
"""
|
|
94
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
95
|
+
data = self._request(
|
|
96
|
+
"GET", f"/v1/corpora/{corpus_id}/onboard/status", request_options=request_options
|
|
97
|
+
)
|
|
98
|
+
return self._maybe_validate(data, "OnboardingStatusResponse")
|
|
99
|
+
|
|
100
|
+
def get_analysis(
|
|
101
|
+
self,
|
|
102
|
+
corpus_id: str,
|
|
103
|
+
analysis_id: str,
|
|
104
|
+
request_options: RequestOptions | None = None,
|
|
105
|
+
) -> DatasetAnalysisDetails:
|
|
106
|
+
"""Get detailed results of an analysis run.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
corpus_id: ID of the corpus
|
|
110
|
+
analysis_id: ID of the analysis run
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
DatasetAnalysisDetails with full analysis results
|
|
114
|
+
"""
|
|
115
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
116
|
+
analysis_id = require_str(analysis_id, "analysis_id")
|
|
117
|
+
data = self._request(
|
|
118
|
+
"GET",
|
|
119
|
+
f"/v1/corpora/{corpus_id}/onboard/analysis/{analysis_id}",
|
|
120
|
+
request_options=request_options,
|
|
121
|
+
)
|
|
122
|
+
return self._maybe_validate(data, "DatasetAnalysisDetails")
|
|
123
|
+
|
|
124
|
+
# =========================================================================
|
|
125
|
+
# Gold Labels
|
|
126
|
+
# =========================================================================
|
|
127
|
+
|
|
128
|
+
def upload_gold_labels(
|
|
129
|
+
self,
|
|
130
|
+
corpus_id: str,
|
|
131
|
+
labels: list[GoldLabelEntry],
|
|
132
|
+
*,
|
|
133
|
+
description: str | None = None,
|
|
134
|
+
request_options: RequestOptions | None = None,
|
|
135
|
+
) -> GoldLabelsUploadResponse:
|
|
136
|
+
"""Upload gold labels for a corpus.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
corpus_id: ID of the corpus
|
|
140
|
+
labels: List of gold label entries (query-chunk pairs)
|
|
141
|
+
description: Optional description of the labels source
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
GoldLabelsUploadResponse with resolution results
|
|
145
|
+
"""
|
|
146
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
147
|
+
payload: dict[str, Any] = {"labels": labels}
|
|
148
|
+
if description is not None:
|
|
149
|
+
payload["description"] = description
|
|
150
|
+
|
|
151
|
+
data = self._request(
|
|
152
|
+
"POST",
|
|
153
|
+
f"/v1/corpora/{corpus_id}/onboard:upload-labels",
|
|
154
|
+
json=payload,
|
|
155
|
+
request_options=request_options,
|
|
156
|
+
)
|
|
157
|
+
return self._maybe_validate(data, "GoldLabelsUploadResponse")
|
|
158
|
+
|
|
159
|
+
def upload_gold_labels_file(
|
|
160
|
+
self,
|
|
161
|
+
corpus_id: str,
|
|
162
|
+
file_path: str | Path,
|
|
163
|
+
*,
|
|
164
|
+
description: str | None = None,
|
|
165
|
+
request_options: RequestOptions | None = None,
|
|
166
|
+
) -> GoldLabelsUploadResponse:
|
|
167
|
+
"""Upload gold labels from a JSONL file.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
corpus_id: ID of the corpus
|
|
171
|
+
file_path: Path to JSONL file with gold labels
|
|
172
|
+
description: Optional description of the labels source
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
GoldLabelsUploadResponse with resolution results
|
|
176
|
+
"""
|
|
177
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
178
|
+
import json
|
|
179
|
+
|
|
180
|
+
labels: list[GoldLabelEntry] = []
|
|
181
|
+
with open(file_path, "r") as f:
|
|
182
|
+
for line in f:
|
|
183
|
+
if line.strip():
|
|
184
|
+
labels.append(json.loads(line))
|
|
185
|
+
|
|
186
|
+
return self.upload_gold_labels(
|
|
187
|
+
corpus_id, labels, description=description, request_options=request_options
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
def list_gold_labels(
|
|
191
|
+
self,
|
|
192
|
+
corpus_id: str,
|
|
193
|
+
*,
|
|
194
|
+
limit: int = 100,
|
|
195
|
+
offset: int = 0,
|
|
196
|
+
request_options: RequestOptions | None = None,
|
|
197
|
+
) -> Page[dict[str, Any]]:
|
|
198
|
+
"""List gold labels for a corpus.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
corpus_id: ID of the corpus
|
|
202
|
+
limit: Maximum number of labels to return
|
|
203
|
+
offset: Offset for pagination
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
A Page containing gold label entries with pagination metadata.
|
|
207
|
+
"""
|
|
208
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
209
|
+
return self._list_page(
|
|
210
|
+
"GET",
|
|
211
|
+
f"/v1/corpora/{corpus_id}/gold-labels",
|
|
212
|
+
items_key="labels",
|
|
213
|
+
limit=limit,
|
|
214
|
+
offset=offset,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
def iter_gold_labels(
|
|
218
|
+
self,
|
|
219
|
+
corpus_id: str,
|
|
220
|
+
*,
|
|
221
|
+
limit: int = 100,
|
|
222
|
+
request_options: RequestOptions | None = None,
|
|
223
|
+
) -> SyncPager[dict[str, Any]]:
|
|
224
|
+
"""Iterate over gold labels, automatically paginating."""
|
|
225
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
226
|
+
return self._paginate(
|
|
227
|
+
"GET",
|
|
228
|
+
f"/v1/corpora/{corpus_id}/gold-labels",
|
|
229
|
+
items_key="labels",
|
|
230
|
+
limit=limit,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# =========================================================================
|
|
234
|
+
# Synthetic Query Generation
|
|
235
|
+
# =========================================================================
|
|
236
|
+
|
|
237
|
+
def generate_synthetic_queries(
|
|
238
|
+
self,
|
|
239
|
+
corpus_id: str,
|
|
240
|
+
analysis_id: str,
|
|
241
|
+
*,
|
|
242
|
+
sample_size: int = 0,
|
|
243
|
+
queries_per_chunk: int = 3,
|
|
244
|
+
use_document_context: bool = True,
|
|
245
|
+
eval_sample_size: int | None = None,
|
|
246
|
+
request_options: RequestOptions | None = None,
|
|
247
|
+
) -> SyntheticQueryBatchResponse:
|
|
248
|
+
"""Start synthetic query generation.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
corpus_id: ID of the corpus
|
|
252
|
+
analysis_id: ID of the analysis run to use
|
|
253
|
+
sample_size: Chunks to sample (0 = automatic bounded sample)
|
|
254
|
+
queries_per_chunk: Queries to generate per chunk
|
|
255
|
+
use_document_context: Inject document summaries into prompts
|
|
256
|
+
eval_sample_size: Override eval sample size (None = use default)
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
SyntheticQueryBatchResponse with batch_id and job_id
|
|
260
|
+
"""
|
|
261
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
262
|
+
analysis_id = require_str(analysis_id, "analysis_id")
|
|
263
|
+
payload: dict[str, Any] = {
|
|
264
|
+
"analysis_id": analysis_id,
|
|
265
|
+
"sample_size": sample_size,
|
|
266
|
+
"queries_per_chunk": queries_per_chunk,
|
|
267
|
+
"use_document_context": use_document_context,
|
|
268
|
+
}
|
|
269
|
+
if eval_sample_size is not None:
|
|
270
|
+
payload["eval_sample_size"] = eval_sample_size
|
|
271
|
+
|
|
272
|
+
data = self._request(
|
|
273
|
+
"POST",
|
|
274
|
+
f"/v1/corpora/{corpus_id}/synthetic-queries:generate",
|
|
275
|
+
json=payload,
|
|
276
|
+
request_options=request_options,
|
|
277
|
+
)
|
|
278
|
+
return self._maybe_validate(data, "SyntheticQueryBatchResponse")
|
|
279
|
+
|
|
280
|
+
def list_synthetic_batches(
|
|
281
|
+
self,
|
|
282
|
+
corpus_id: str,
|
|
283
|
+
request_options: RequestOptions | None = None,
|
|
284
|
+
) -> SyntheticQueryBatchListResponse:
|
|
285
|
+
"""List synthetic query batches for a corpus.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
corpus_id: ID of the corpus
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
SyntheticQueryBatchListResponse with batches list
|
|
292
|
+
"""
|
|
293
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
294
|
+
data = self._request(
|
|
295
|
+
"GET",
|
|
296
|
+
f"/v1/corpora/{corpus_id}/synthetic-queries/batches",
|
|
297
|
+
request_options=request_options,
|
|
298
|
+
)
|
|
299
|
+
return self._maybe_validate(data, "SyntheticQueryBatchListResponse")
|
|
300
|
+
|
|
301
|
+
def get_synthetic_batch(
|
|
302
|
+
self,
|
|
303
|
+
corpus_id: str,
|
|
304
|
+
batch_id: str,
|
|
305
|
+
request_options: RequestOptions | None = None,
|
|
306
|
+
) -> SyntheticQueryBatchDetails:
|
|
307
|
+
"""Get details of a synthetic query batch.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
corpus_id: ID of the corpus
|
|
311
|
+
batch_id: ID of the batch
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
SyntheticQueryBatchDetails with full batch info
|
|
315
|
+
"""
|
|
316
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
317
|
+
batch_id = require_str(batch_id, "batch_id")
|
|
318
|
+
data = self._request(
|
|
319
|
+
"GET",
|
|
320
|
+
f"/v1/corpora/{corpus_id}/synthetic-queries/batches/{batch_id}",
|
|
321
|
+
request_options=request_options,
|
|
322
|
+
)
|
|
323
|
+
return self._maybe_validate(data, "SyntheticQueryBatchDetails")
|
|
324
|
+
|
|
325
|
+
def download_synthetic_queries(
|
|
326
|
+
self,
|
|
327
|
+
corpus_id: str,
|
|
328
|
+
batch_id: str,
|
|
329
|
+
output_path: str | Path,
|
|
330
|
+
request_options: RequestOptions | None = None,
|
|
331
|
+
) -> str:
|
|
332
|
+
"""Download synthetic queries from a batch.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
corpus_id: ID of the corpus
|
|
336
|
+
batch_id: ID of the batch
|
|
337
|
+
output_path: Path to save downloaded queries
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Path to the downloaded file
|
|
341
|
+
"""
|
|
342
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
343
|
+
batch_id = require_str(batch_id, "batch_id")
|
|
344
|
+
# Get batch details to find artifact URI
|
|
345
|
+
batch = self.get_synthetic_batch(corpus_id, batch_id, request_options=request_options)
|
|
346
|
+
|
|
347
|
+
artifact_uri = (
|
|
348
|
+
batch.get("artifact_uri")
|
|
349
|
+
if isinstance(batch, dict)
|
|
350
|
+
else getattr(batch, "artifact_uri", None)
|
|
351
|
+
)
|
|
352
|
+
if not artifact_uri:
|
|
353
|
+
raise ValueError("Batch does not have an artifact URI")
|
|
354
|
+
|
|
355
|
+
return str(artifact_uri)
|
|
356
|
+
|
|
357
|
+
# =========================================================================
|
|
358
|
+
# Evaluation
|
|
359
|
+
# =========================================================================
|
|
360
|
+
|
|
361
|
+
def evaluate_synthetic_queries(
|
|
362
|
+
self,
|
|
363
|
+
corpus_id: str,
|
|
364
|
+
batch_id: str,
|
|
365
|
+
*,
|
|
366
|
+
sample_size: int | None = None,
|
|
367
|
+
generate_report: bool = True,
|
|
368
|
+
report_formats: list[str] | None = None,
|
|
369
|
+
request_options: RequestOptions | None = None,
|
|
370
|
+
) -> EvaluationResponse:
|
|
371
|
+
"""Start evaluation of synthetic queries.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
corpus_id: ID of the corpus
|
|
375
|
+
batch_id: ID of the synthetic query batch
|
|
376
|
+
sample_size: Sample for evaluation (None or 0 = automatic bounded sample)
|
|
377
|
+
generate_report: Generate HTML/JSON report
|
|
378
|
+
report_formats: Report formats to generate
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
EvaluationResponse with eval_id and job_id
|
|
382
|
+
"""
|
|
383
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
384
|
+
batch_id = require_str(batch_id, "batch_id")
|
|
385
|
+
payload: dict[str, Any] = {
|
|
386
|
+
"batch_id": batch_id,
|
|
387
|
+
"generate_report": generate_report,
|
|
388
|
+
}
|
|
389
|
+
if sample_size is not None:
|
|
390
|
+
payload["sample_size"] = sample_size
|
|
391
|
+
if report_formats is not None:
|
|
392
|
+
payload["report_formats"] = report_formats
|
|
393
|
+
|
|
394
|
+
data = self._request(
|
|
395
|
+
"POST",
|
|
396
|
+
f"/v1/corpora/{corpus_id}/synthetic-queries:evaluate",
|
|
397
|
+
json=payload,
|
|
398
|
+
request_options=request_options,
|
|
399
|
+
)
|
|
400
|
+
return self._maybe_validate(data, "EvaluationResponse")
|
|
401
|
+
|
|
402
|
+
def list_evaluations(
|
|
403
|
+
self,
|
|
404
|
+
corpus_id: str,
|
|
405
|
+
request_options: RequestOptions | None = None,
|
|
406
|
+
) -> EvaluationListResponse:
|
|
407
|
+
"""List evaluations for a corpus.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
corpus_id: ID of the corpus
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
EvaluationListResponse with evaluations list
|
|
414
|
+
"""
|
|
415
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
416
|
+
data = self._request(
|
|
417
|
+
"GET", f"/v1/corpora/{corpus_id}/evaluations", request_options=request_options
|
|
418
|
+
)
|
|
419
|
+
return self._maybe_validate(data, "EvaluationListResponse")
|
|
420
|
+
|
|
421
|
+
def get_evaluation(
|
|
422
|
+
self,
|
|
423
|
+
corpus_id: str,
|
|
424
|
+
eval_id: str,
|
|
425
|
+
request_options: RequestOptions | None = None,
|
|
426
|
+
) -> EvaluationDetails:
|
|
427
|
+
"""Get details of an evaluation.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
corpus_id: ID of the corpus
|
|
431
|
+
eval_id: ID of the evaluation
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
EvaluationDetails with full evaluation results
|
|
435
|
+
"""
|
|
436
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
437
|
+
eval_id = require_str(eval_id, "eval_id")
|
|
438
|
+
data = self._request(
|
|
439
|
+
"GET", f"/v1/corpora/{corpus_id}/evaluations/{eval_id}", request_options=request_options
|
|
440
|
+
)
|
|
441
|
+
return self._maybe_validate(data, "EvaluationDetails")
|
|
442
|
+
|
|
443
|
+
def get_evaluation_report(
|
|
444
|
+
self,
|
|
445
|
+
corpus_id: str,
|
|
446
|
+
eval_id: str,
|
|
447
|
+
*,
|
|
448
|
+
format: str = "json",
|
|
449
|
+
request_options: RequestOptions | None = None,
|
|
450
|
+
) -> EvaluationReportResponse:
|
|
451
|
+
"""Get evaluation report.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
corpus_id: ID of the corpus
|
|
455
|
+
eval_id: ID of the evaluation
|
|
456
|
+
format: Report format ("json" or "html")
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
EvaluationReportResponse with report URI and metrics
|
|
460
|
+
"""
|
|
461
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
462
|
+
eval_id = require_str(eval_id, "eval_id")
|
|
463
|
+
data = self._request(
|
|
464
|
+
"GET",
|
|
465
|
+
f"/v1/corpora/{corpus_id}/evaluations/{eval_id}/report",
|
|
466
|
+
params={"format": format},
|
|
467
|
+
request_options=request_options,
|
|
468
|
+
)
|
|
469
|
+
return self._maybe_validate(data, "EvaluationReportResponse")
|
|
470
|
+
|
|
471
|
+
# =========================================================================
|
|
472
|
+
# Document Summarization
|
|
473
|
+
# =========================================================================
|
|
474
|
+
|
|
475
|
+
def summarize_documents(
|
|
476
|
+
self,
|
|
477
|
+
corpus_id: str,
|
|
478
|
+
*,
|
|
479
|
+
force_regenerate: bool = False,
|
|
480
|
+
request_options: RequestOptions | None = None,
|
|
481
|
+
) -> SummarizationResponse:
|
|
482
|
+
"""Start document summarization.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
corpus_id: ID of the corpus
|
|
486
|
+
force_regenerate: Regenerate summaries for all documents
|
|
487
|
+
|
|
488
|
+
Returns:
|
|
489
|
+
SummarizationResponse with job_id and stats
|
|
490
|
+
"""
|
|
491
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
492
|
+
payload: dict[str, Any] = {"force_regenerate": force_regenerate}
|
|
493
|
+
data = self._request(
|
|
494
|
+
"POST",
|
|
495
|
+
f"/v1/corpora/{corpus_id}/documents:summarize",
|
|
496
|
+
json=payload,
|
|
497
|
+
request_options=request_options,
|
|
498
|
+
)
|
|
499
|
+
return self._maybe_validate(data, "SummarizationResponse")
|
|
500
|
+
|
|
501
|
+
def get_summarization_status(
|
|
502
|
+
self,
|
|
503
|
+
corpus_id: str,
|
|
504
|
+
request_options: RequestOptions | None = None,
|
|
505
|
+
) -> SummarizationStatusResponse:
|
|
506
|
+
"""Get summarization status for a corpus.
|
|
507
|
+
|
|
508
|
+
Args:
|
|
509
|
+
corpus_id: ID of the corpus
|
|
510
|
+
|
|
511
|
+
Returns:
|
|
512
|
+
SummarizationStatusResponse with coverage stats
|
|
513
|
+
"""
|
|
514
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
515
|
+
data = self._request(
|
|
516
|
+
"GET", f"/v1/corpora/{corpus_id}/summaries/status", request_options=request_options
|
|
517
|
+
)
|
|
518
|
+
return self._maybe_validate(data, "SummarizationStatusResponse")
|
|
519
|
+
|
|
520
|
+
def get_document_summary(
|
|
521
|
+
self,
|
|
522
|
+
corpus_id: str,
|
|
523
|
+
doc_id: str,
|
|
524
|
+
request_options: RequestOptions | None = None,
|
|
525
|
+
) -> DocumentSummaryResponse:
|
|
526
|
+
"""Get summary for a specific document.
|
|
527
|
+
|
|
528
|
+
Args:
|
|
529
|
+
corpus_id: ID of the corpus
|
|
530
|
+
doc_id: ID of the document
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
DocumentSummaryResponse with summary and entities
|
|
534
|
+
"""
|
|
535
|
+
corpus_id = require_str(corpus_id, "corpus_id")
|
|
536
|
+
doc_id = require_str(doc_id, "doc_id")
|
|
537
|
+
data = self._request(
|
|
538
|
+
"GET",
|
|
539
|
+
f"/v1/corpora/{corpus_id}/documents/{doc_id}/summary",
|
|
540
|
+
request_options=request_options,
|
|
541
|
+
)
|
|
542
|
+
return self._maybe_validate(data, "DocumentSummaryResponse")
|
sdk/resources/orgs.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Organisation resource mixin for the Knowledge2 SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from sdk._request_options import RequestOptions
|
|
8
|
+
from sdk.resources._mixin_base import RequesterMixin
|
|
9
|
+
from sdk.types import OrgResponse
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class OrgsMixin(RequesterMixin):
|
|
13
|
+
def create_org(
|
|
14
|
+
self,
|
|
15
|
+
name: str,
|
|
16
|
+
contact_email: str | None = None,
|
|
17
|
+
request_options: RequestOptions | None = None,
|
|
18
|
+
) -> OrgResponse:
|
|
19
|
+
"""Create a new organisation.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
name: Display name for the organisation.
|
|
23
|
+
contact_email: Optional contact email address for the org.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
The newly created organisation record.
|
|
27
|
+
|
|
28
|
+
Raises:
|
|
29
|
+
Knowledge2Error: If the API request fails.
|
|
30
|
+
"""
|
|
31
|
+
payload: dict[str, Any] = {"name": name}
|
|
32
|
+
if contact_email is not None:
|
|
33
|
+
payload["contact_email"] = contact_email
|
|
34
|
+
data = self._request("POST", "/v1/orgs", json=payload, request_options=request_options)
|
|
35
|
+
return self._maybe_validate(data, "OrgResponse")
|