knowledge2 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowledge2-0.4.0.dist-info/METADATA +556 -0
- knowledge2-0.4.0.dist-info/RECORD +139 -0
- knowledge2-0.4.0.dist-info/WHEEL +5 -0
- knowledge2-0.4.0.dist-info/top_level.txt +1 -0
- sdk/__init__.py +70 -0
- sdk/_async_base.py +525 -0
- sdk/_async_paging.py +57 -0
- sdk/_base.py +541 -0
- sdk/_logging.py +41 -0
- sdk/_paging.py +73 -0
- sdk/_preview.py +70 -0
- sdk/_raw_response.py +25 -0
- sdk/_request_options.py +51 -0
- sdk/_transport.py +144 -0
- sdk/_validation.py +25 -0
- sdk/_validation_response.py +36 -0
- sdk/_version.py +3 -0
- sdk/async_client.py +320 -0
- sdk/async_resources/__init__.py +45 -0
- sdk/async_resources/_mixin_base.py +42 -0
- sdk/async_resources/a2a.py +230 -0
- sdk/async_resources/agents.py +489 -0
- sdk/async_resources/audit.py +145 -0
- sdk/async_resources/auth.py +133 -0
- sdk/async_resources/console.py +409 -0
- sdk/async_resources/corpora.py +276 -0
- sdk/async_resources/deployments.py +106 -0
- sdk/async_resources/documents.py +592 -0
- sdk/async_resources/feeds.py +248 -0
- sdk/async_resources/indexes.py +208 -0
- sdk/async_resources/jobs.py +165 -0
- sdk/async_resources/metadata.py +48 -0
- sdk/async_resources/models.py +102 -0
- sdk/async_resources/onboarding.py +538 -0
- sdk/async_resources/orgs.py +37 -0
- sdk/async_resources/pipelines.py +523 -0
- sdk/async_resources/projects.py +90 -0
- sdk/async_resources/search.py +262 -0
- sdk/async_resources/training.py +357 -0
- sdk/async_resources/usage.py +91 -0
- sdk/client.py +417 -0
- sdk/config.py +182 -0
- sdk/errors.py +178 -0
- sdk/examples/auth_factory.py +34 -0
- sdk/examples/batch_operations.py +57 -0
- sdk/examples/document_upload.py +56 -0
- sdk/examples/e2e_lifecycle.py +213 -0
- sdk/examples/error_handling.py +61 -0
- sdk/examples/pagination.py +64 -0
- sdk/examples/quickstart.py +36 -0
- sdk/examples/request_options.py +44 -0
- sdk/examples/search.py +64 -0
- sdk/integrations/__init__.py +57 -0
- sdk/integrations/_client.py +101 -0
- sdk/integrations/langchain/__init__.py +6 -0
- sdk/integrations/langchain/retriever.py +166 -0
- sdk/integrations/langchain/tools.py +108 -0
- sdk/integrations/llamaindex/__init__.py +11 -0
- sdk/integrations/llamaindex/filters.py +78 -0
- sdk/integrations/llamaindex/retriever.py +162 -0
- sdk/integrations/llamaindex/tools.py +109 -0
- sdk/integrations/llamaindex/vector_store.py +320 -0
- sdk/models/__init__.py +18 -0
- sdk/models/_base.py +24 -0
- sdk/models/_registry.py +457 -0
- sdk/models/a2a.py +92 -0
- sdk/models/agents.py +109 -0
- sdk/models/audit.py +28 -0
- sdk/models/auth.py +49 -0
- sdk/models/chunks.py +20 -0
- sdk/models/common.py +14 -0
- sdk/models/console.py +103 -0
- sdk/models/corpora.py +48 -0
- sdk/models/deployments.py +13 -0
- sdk/models/documents.py +126 -0
- sdk/models/embeddings.py +24 -0
- sdk/models/evaluation.py +17 -0
- sdk/models/feedback.py +9 -0
- sdk/models/feeds.py +57 -0
- sdk/models/indexes.py +36 -0
- sdk/models/jobs.py +52 -0
- sdk/models/models.py +26 -0
- sdk/models/onboarding.py +323 -0
- sdk/models/orgs.py +11 -0
- sdk/models/pipelines.py +147 -0
- sdk/models/projects.py +19 -0
- sdk/models/search.py +149 -0
- sdk/models/training.py +57 -0
- sdk/models/usage.py +39 -0
- sdk/namespaces.py +386 -0
- sdk/py.typed +0 -0
- sdk/resources/__init__.py +45 -0
- sdk/resources/_mixin_base.py +40 -0
- sdk/resources/a2a.py +230 -0
- sdk/resources/agents.py +487 -0
- sdk/resources/audit.py +144 -0
- sdk/resources/auth.py +138 -0
- sdk/resources/console.py +411 -0
- sdk/resources/corpora.py +269 -0
- sdk/resources/deployments.py +105 -0
- sdk/resources/documents.py +597 -0
- sdk/resources/feeds.py +246 -0
- sdk/resources/indexes.py +210 -0
- sdk/resources/jobs.py +164 -0
- sdk/resources/metadata.py +53 -0
- sdk/resources/models.py +99 -0
- sdk/resources/onboarding.py +542 -0
- sdk/resources/orgs.py +35 -0
- sdk/resources/pipeline_builder.py +257 -0
- sdk/resources/pipelines.py +520 -0
- sdk/resources/projects.py +87 -0
- sdk/resources/search.py +277 -0
- sdk/resources/training.py +358 -0
- sdk/resources/usage.py +92 -0
- sdk/types/__init__.py +366 -0
- sdk/types/a2a.py +88 -0
- sdk/types/agents.py +133 -0
- sdk/types/audit.py +26 -0
- sdk/types/auth.py +45 -0
- sdk/types/chunks.py +18 -0
- sdk/types/common.py +10 -0
- sdk/types/console.py +99 -0
- sdk/types/corpora.py +42 -0
- sdk/types/deployments.py +11 -0
- sdk/types/documents.py +104 -0
- sdk/types/embeddings.py +22 -0
- sdk/types/evaluation.py +15 -0
- sdk/types/feedback.py +7 -0
- sdk/types/feeds.py +61 -0
- sdk/types/indexes.py +30 -0
- sdk/types/jobs.py +50 -0
- sdk/types/models.py +22 -0
- sdk/types/onboarding.py +395 -0
- sdk/types/orgs.py +9 -0
- sdk/types/pipelines.py +177 -0
- sdk/types/projects.py +14 -0
- sdk/types/search.py +116 -0
- sdk/types/training.py +55 -0
- sdk/types/usage.py +37 -0
sdk/errors.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Knowledge2 SDK exception hierarchy.
|
|
2
|
+
|
|
3
|
+
All SDK exceptions inherit from :class:`Knowledge2Error`, so callers can
|
|
4
|
+
use ``except Knowledge2Error`` as a catch-all.
|
|
5
|
+
|
|
6
|
+
Hierarchy::
|
|
7
|
+
|
|
8
|
+
Knowledge2Error (base)
|
|
9
|
+
├── APIError (HTTP errors from the API)
|
|
10
|
+
│ ├── BadRequestError (400)
|
|
11
|
+
│ ├── AuthenticationError (401)
|
|
12
|
+
│ ├── PermissionDeniedError (403)
|
|
13
|
+
│ ├── NotFoundError (404)
|
|
14
|
+
│ ├── ConflictError (409)
|
|
15
|
+
│ ├── ValidationError (422)
|
|
16
|
+
│ ├── RateLimitError (429)
|
|
17
|
+
│ └── ServerError (500, 502, 503, 504)
|
|
18
|
+
├── APIConnectionError (network / DNS failures)
|
|
19
|
+
├── APITimeoutError (request timeout)
|
|
20
|
+
└── ConfirmationRequiredError (client-side deletion guard)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Knowledge2Error(Exception):
|
|
29
|
+
"""Base exception for all Knowledge2 SDK errors."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, message: str) -> None:
|
|
32
|
+
super().__init__(message)
|
|
33
|
+
self.message = message
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def retryable(self) -> bool:
|
|
37
|
+
"""Whether the operation that caused this error can be retried."""
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class APIError(Knowledge2Error):
|
|
42
|
+
"""Error returned by the Knowledge2 API (HTTP 4xx / 5xx)."""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
message: str,
|
|
47
|
+
*,
|
|
48
|
+
status_code: int,
|
|
49
|
+
code: str | None = None,
|
|
50
|
+
details: Any = None,
|
|
51
|
+
request_id: str | None = None,
|
|
52
|
+
) -> None:
|
|
53
|
+
super().__init__(message)
|
|
54
|
+
self.status_code = status_code
|
|
55
|
+
self.code = code
|
|
56
|
+
self.details = details
|
|
57
|
+
self.request_id = request_id
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class BadRequestError(APIError):
|
|
61
|
+
"""HTTP 400 — the request was malformed or missing required fields."""
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def retryable(self) -> bool:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class AuthenticationError(APIError):
|
|
69
|
+
"""HTTP 401 — invalid or missing API key / bearer token."""
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def retryable(self) -> bool:
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class PermissionDeniedError(APIError):
|
|
77
|
+
"""HTTP 403 — the API key lacks the required scopes, or the requested feature is not enabled for the organization."""
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def retryable(self) -> bool:
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class NotFoundError(APIError):
|
|
85
|
+
"""HTTP 404 — the requested resource does not exist."""
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def retryable(self) -> bool:
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ConflictError(APIError):
|
|
93
|
+
"""HTTP 409 — resource conflict (e.g. duplicate idempotency key)."""
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def retryable(self) -> bool:
|
|
97
|
+
return False
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class ValidationError(APIError):
|
|
101
|
+
"""HTTP 422 — request validation failed."""
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def retryable(self) -> bool:
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class RateLimitError(APIError):
|
|
109
|
+
"""HTTP 429 — too many requests.
|
|
110
|
+
|
|
111
|
+
The :attr:`retry_after` attribute contains the server-suggested
|
|
112
|
+
wait time in seconds (from the ``Retry-After`` header), or *None*
|
|
113
|
+
if the header was absent.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
def __init__(
|
|
117
|
+
self,
|
|
118
|
+
message: str,
|
|
119
|
+
*,
|
|
120
|
+
status_code: int = 429,
|
|
121
|
+
retry_after: float | None = None,
|
|
122
|
+
code: str | None = None,
|
|
123
|
+
details: Any = None,
|
|
124
|
+
request_id: str | None = None,
|
|
125
|
+
) -> None:
|
|
126
|
+
super().__init__(
|
|
127
|
+
message,
|
|
128
|
+
status_code=status_code,
|
|
129
|
+
code=code,
|
|
130
|
+
details=details,
|
|
131
|
+
request_id=request_id,
|
|
132
|
+
)
|
|
133
|
+
self.retry_after = retry_after
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def retryable(self) -> bool:
|
|
137
|
+
return True
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class ServerError(APIError):
|
|
141
|
+
"""HTTP 500 / 502 / 503 / 504 — server-side failure."""
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def retryable(self) -> bool:
|
|
145
|
+
return True
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class APIConnectionError(Knowledge2Error):
|
|
149
|
+
"""Network connectivity failure (DNS, connection refused, etc.)."""
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def retryable(self) -> bool:
|
|
153
|
+
return True
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class APITimeoutError(Knowledge2Error):
|
|
157
|
+
"""The request timed out."""
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def retryable(self) -> bool:
|
|
161
|
+
return True
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class ConfirmationRequiredError(Knowledge2Error):
|
|
165
|
+
"""Raised when a destructive operation requires explicit confirmation.
|
|
166
|
+
|
|
167
|
+
Delete methods require ``confirm=True`` as a safety guard.
|
|
168
|
+
This exception is part of the :class:`Knowledge2Error` hierarchy,
|
|
169
|
+
so ``except Knowledge2Error`` catch-alls will handle it.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
def __init__(self, resource_type: str, resource_id: str) -> None:
|
|
173
|
+
self.resource_type = resource_type
|
|
174
|
+
self.resource_id = resource_id
|
|
175
|
+
super().__init__(
|
|
176
|
+
f"{resource_type.capitalize()} {resource_id!r} deletion is irreversible. "
|
|
177
|
+
f"Pass confirm=True to proceed."
|
|
178
|
+
)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Bearer token factory example — dynamic auth for OAuth/OIDC workloads.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
K2_API_HOST=http://localhost:8000 python -m sdk.examples.auth_factory
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_oauth_token() -> str:
|
|
11
|
+
"""Simulate an OAuth token fetch (replace with your real provider)."""
|
|
12
|
+
# In production: call your OAuth provider here
|
|
13
|
+
return os.environ.get("K2_BEARER_TOKEN", "demo-token")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def main() -> None:
|
|
17
|
+
from sdk import Knowledge2
|
|
18
|
+
|
|
19
|
+
client = Knowledge2(
|
|
20
|
+
api_host=os.environ.get("K2_API_HOST", "https://api.knowledge2.ai"),
|
|
21
|
+
bearer_token_factory=get_oauth_token,
|
|
22
|
+
token_cache_ttl=300, # cache for 5 minutes
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Pre-flight check
|
|
26
|
+
if not client.is_authenticated():
|
|
27
|
+
print("No auth configured!")
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
print(f"Authenticated: {client.is_authenticated()}")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
if __name__ == "__main__":
|
|
34
|
+
main()
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Batch workflow: upload multiple docs, build indexes with wait, batch search, iterate results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
from sdk import Knowledge2, Knowledge2Error
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
api_key = os.environ.get("K2_API_KEY")
|
|
11
|
+
if not api_key:
|
|
12
|
+
raise SystemExit("K2_API_KEY is required")
|
|
13
|
+
|
|
14
|
+
client = Knowledge2(api_key=api_key)
|
|
15
|
+
corpus_id = os.environ.get("K2_CORPUS_ID", "corpus-123")
|
|
16
|
+
|
|
17
|
+
# Upload multiple documents with idempotency key
|
|
18
|
+
docs = [
|
|
19
|
+
{"source_uri": "doc://a", "raw_text": "Content A."},
|
|
20
|
+
{"source_uri": "doc://b", "raw_text": "Content B."},
|
|
21
|
+
{"source_uri": "doc://c", "raw_text": "Content C."},
|
|
22
|
+
]
|
|
23
|
+
upload = client.upload_documents_batch(
|
|
24
|
+
corpus_id,
|
|
25
|
+
docs,
|
|
26
|
+
idempotency_key="batch-demo-abc123",
|
|
27
|
+
wait=True,
|
|
28
|
+
)
|
|
29
|
+
print("Upload job:", upload.get("job_id"))
|
|
30
|
+
|
|
31
|
+
# Build indexes with wait and idempotency
|
|
32
|
+
index_job = client.build_indexes(
|
|
33
|
+
corpus_id,
|
|
34
|
+
dense=True,
|
|
35
|
+
sparse=True,
|
|
36
|
+
mode="full",
|
|
37
|
+
idempotency_key="index-demo-abc123",
|
|
38
|
+
wait=True,
|
|
39
|
+
)
|
|
40
|
+
print("Index job:", index_job.get("job_id"))
|
|
41
|
+
|
|
42
|
+
# Batch search multiple queries
|
|
43
|
+
queries = ["What is A?", "What is B?", "What is C?"]
|
|
44
|
+
batch = client.search_batch(
|
|
45
|
+
corpus_id,
|
|
46
|
+
queries,
|
|
47
|
+
top_k=3,
|
|
48
|
+
return_config={"include_text": True},
|
|
49
|
+
)
|
|
50
|
+
for i, resp in enumerate(batch.get("responses", [])):
|
|
51
|
+
print(f"Query {i + 1}: {queries[i]}")
|
|
52
|
+
for r in resp.get("results", []):
|
|
53
|
+
print(" ", (r.get("text") or "")[:60])
|
|
54
|
+
|
|
55
|
+
except Knowledge2Error as e:
|
|
56
|
+
print(f"API error: {e}")
|
|
57
|
+
raise
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Document upload: single file, batch raw_text, URL ingestion. Shows wait=True pattern."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
from sdk import Knowledge2, Knowledge2Error
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
api_key = os.environ.get("K2_API_KEY")
|
|
11
|
+
if not api_key:
|
|
12
|
+
raise SystemExit("K2_API_KEY is required")
|
|
13
|
+
|
|
14
|
+
client = Knowledge2(api_key=api_key)
|
|
15
|
+
corpus_id = os.environ.get("K2_CORPUS_ID", "corpus-123")
|
|
16
|
+
|
|
17
|
+
# Single upload via file_path
|
|
18
|
+
resp = client.upload_document(
|
|
19
|
+
corpus_id,
|
|
20
|
+
file_path="/path/to/doc.pdf",
|
|
21
|
+
source_uri="doc://my-file",
|
|
22
|
+
idempotency_key="upload-1",
|
|
23
|
+
)
|
|
24
|
+
print("Uploaded:", resp.get("id"))
|
|
25
|
+
|
|
26
|
+
# Batch upload raw_text list (wait=True blocks until job completes)
|
|
27
|
+
docs = [
|
|
28
|
+
{"source_uri": "doc://a", "raw_text": "First document content."},
|
|
29
|
+
{"source_uri": "doc://b", "raw_text": "Second document content."},
|
|
30
|
+
]
|
|
31
|
+
batch_resp = client.upload_documents_batch(
|
|
32
|
+
corpus_id,
|
|
33
|
+
docs,
|
|
34
|
+
idempotency_key="batch-1",
|
|
35
|
+
wait=True,
|
|
36
|
+
poll_s=5,
|
|
37
|
+
)
|
|
38
|
+
print("Batch job:", batch_resp.get("job_id"))
|
|
39
|
+
|
|
40
|
+
# URL ingestion with wait=True
|
|
41
|
+
urls = [
|
|
42
|
+
{"url": "https://example.com/page1", "title": "Page 1"},
|
|
43
|
+
{"url": "https://example.com/page2", "title": "Page 2"},
|
|
44
|
+
]
|
|
45
|
+
url_resp = client.ingest_urls(
|
|
46
|
+
corpus_id,
|
|
47
|
+
urls,
|
|
48
|
+
idempotency_key="urls-1",
|
|
49
|
+
wait=True,
|
|
50
|
+
poll_s=5,
|
|
51
|
+
)
|
|
52
|
+
print("URL ingest job:", url_resp.get("job_id"))
|
|
53
|
+
|
|
54
|
+
except Knowledge2Error as e:
|
|
55
|
+
print(f"API error: {e}")
|
|
56
|
+
raise
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
import uuid
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
from sdk import Knowledge2, Knowledge2Error
|
|
9
|
+
from sdk.types.jobs import JobResponse
|
|
10
|
+
from sdk.types.search import SearchResult
|
|
11
|
+
from sdk.types.training import TuningRunDetailResponse
|
|
12
|
+
|
|
13
|
+
TERMINAL_JOB_STATUSES = {"succeeded", "failed", "canceled"}
|
|
14
|
+
TERMINAL_RUN_STATUSES = {"succeeded", "failed", "canceled"}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _wait_for_job(
|
|
18
|
+
client: Knowledge2,
|
|
19
|
+
job_id: str,
|
|
20
|
+
*,
|
|
21
|
+
poll_s: float = 5.0,
|
|
22
|
+
timeout_s: float = 900.0,
|
|
23
|
+
) -> JobResponse:
|
|
24
|
+
deadline = time.time() + timeout_s
|
|
25
|
+
while True:
|
|
26
|
+
job = client.get_job(job_id)
|
|
27
|
+
status = job.get("status")
|
|
28
|
+
if status in TERMINAL_JOB_STATUSES:
|
|
29
|
+
return job
|
|
30
|
+
if time.time() >= deadline:
|
|
31
|
+
raise TimeoutError(f"Timed out waiting for job {job_id}")
|
|
32
|
+
time.sleep(poll_s)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _wait_for_tuning_run(
|
|
36
|
+
client: Knowledge2,
|
|
37
|
+
run_id: str,
|
|
38
|
+
*,
|
|
39
|
+
poll_s: float = 10.0,
|
|
40
|
+
timeout_s: float = 7200.0,
|
|
41
|
+
) -> TuningRunDetailResponse:
|
|
42
|
+
deadline = time.time() + timeout_s
|
|
43
|
+
while True:
|
|
44
|
+
run = client.get_tuning_run(run_id)
|
|
45
|
+
status = run.get("status")
|
|
46
|
+
if status in TERMINAL_RUN_STATUSES:
|
|
47
|
+
return run
|
|
48
|
+
if time.time() >= deadline:
|
|
49
|
+
raise TimeoutError(f"Timed out waiting for tuning run {run_id}")
|
|
50
|
+
time.sleep(poll_s)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _print_hits(results: Iterable[SearchResult]) -> None:
|
|
54
|
+
for idx, result in enumerate(results, start=1):
|
|
55
|
+
text = (result.get("text") or "").strip().replace("\n", " ")
|
|
56
|
+
print(f"{idx:02d}. score={result.get('score'):.4f} text={text[:120]}")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def main() -> None:
|
|
60
|
+
api_key = os.getenv("K2_API_KEY")
|
|
61
|
+
if not api_key:
|
|
62
|
+
raise SystemExit("K2_API_KEY is required")
|
|
63
|
+
|
|
64
|
+
client = Knowledge2(
|
|
65
|
+
api_host=os.getenv("K2_BASE_URL", "https://api.knowledge2.ai"),
|
|
66
|
+
api_key=api_key,
|
|
67
|
+
)
|
|
68
|
+
idempotency_suffix = os.getenv("K2_IDEMPOTENCY_SUFFIX", uuid.uuid4().hex[:8])
|
|
69
|
+
|
|
70
|
+
def _key(base: str) -> str:
|
|
71
|
+
return f"{base}-{idempotency_suffix}"
|
|
72
|
+
|
|
73
|
+
project_id = os.getenv("K2_PROJECT_ID")
|
|
74
|
+
if not project_id:
|
|
75
|
+
project = client.create_project("knowledge2-demo")
|
|
76
|
+
project_id = project["id"]
|
|
77
|
+
|
|
78
|
+
corpus = client.create_corpus(
|
|
79
|
+
project_id,
|
|
80
|
+
"knowledge2-demo-corpus",
|
|
81
|
+
description="Sample corpus for the Knowledge2 end-to-end lifecycle.",
|
|
82
|
+
)
|
|
83
|
+
corpus_id = corpus["id"]
|
|
84
|
+
|
|
85
|
+
docs = [
|
|
86
|
+
{
|
|
87
|
+
"source_uri": "doc://overview",
|
|
88
|
+
"raw_text": "Knowledge2 organizes knowledge into projects and corpora. Documents are chunked into passages that can be indexed for search. Stable source_uris let you update content without duplicates.",
|
|
89
|
+
"metadata": {"topic": "overview", "product": "knowledge2"},
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"source_uri": "doc://ingestion",
|
|
93
|
+
"raw_text": "Batch ingestion accepts documents with source_uri, raw_text, and optional metadata. Use idempotency keys to prevent duplicate ingest jobs. Upload in batches to stay within API limits.",
|
|
94
|
+
"metadata": {"topic": "ingestion", "product": "knowledge2"},
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"source_uri": "doc://indexing",
|
|
98
|
+
"raw_text": "Dense indexes capture semantic similarity while sparse indexes capture keyword matches. Building both enables hybrid retrieval. Rebuild indexes after large content updates.",
|
|
99
|
+
"metadata": {"topic": "indexing", "product": "knowledge2"},
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"source_uri": "doc://hybrid-search",
|
|
103
|
+
"raw_text": "Hybrid retrieval blends dense and sparse scores using RRF or weighted fusion. Adjust dense_weight and sparse_weight to balance semantics vs exact terms. You can request scores and provenance in the response.",
|
|
104
|
+
"metadata": {"topic": "search", "product": "knowledge2"},
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
"source_uri": "doc://tuning",
|
|
108
|
+
"raw_text": "Tuning runs train a better embedding model from query-document pairs. Training data can be auto-built or uploaded as JSONL. Successful runs can be promoted to a deployable model.",
|
|
109
|
+
"metadata": {"topic": "tuning", "product": "knowledge2"},
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"source_uri": "doc://deployments",
|
|
113
|
+
"raw_text": "Deployments attach a tuned model to a corpus and optionally trigger reindexing. Track the reindex job until it succeeds. Once complete, searches use the tuned model.",
|
|
114
|
+
"metadata": {"topic": "deployments", "product": "knowledge2"},
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"source_uri": "doc://evaluation",
|
|
118
|
+
"raw_text": "Evaluation runs compute metrics like nDCG, recall, and MRR on labeled data. Compare baseline and tuned models before promotion. Keep eval sets representative of real queries.",
|
|
119
|
+
"metadata": {"topic": "evaluation", "product": "knowledge2"},
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"source_uri": "doc://security",
|
|
123
|
+
"raw_text": "API keys authenticate requests and can be rotated without downtime. Admin tokens are required for org bootstrap and should be stored securely. Audit logs and usage endpoints help with governance.",
|
|
124
|
+
"metadata": {"topic": "security", "product": "knowledge2"},
|
|
125
|
+
},
|
|
126
|
+
]
|
|
127
|
+
ingest = client.upload_documents_batch(
|
|
128
|
+
corpus_id,
|
|
129
|
+
docs,
|
|
130
|
+
idempotency_key=_key("demo-ingest-1"),
|
|
131
|
+
auto_index=False,
|
|
132
|
+
wait=False,
|
|
133
|
+
)
|
|
134
|
+
print("Ingest job:", ingest["job_id"])
|
|
135
|
+
_wait_for_job(client, ingest["job_id"])
|
|
136
|
+
|
|
137
|
+
index_job = client.build_indexes(
|
|
138
|
+
corpus_id,
|
|
139
|
+
dense=True,
|
|
140
|
+
sparse=True,
|
|
141
|
+
mode="full",
|
|
142
|
+
idempotency_key=_key("demo-index-1"),
|
|
143
|
+
wait=False,
|
|
144
|
+
)
|
|
145
|
+
print("Index build job:", index_job["job_id"])
|
|
146
|
+
_wait_for_job(client, index_job["job_id"])
|
|
147
|
+
|
|
148
|
+
baseline = client.search(
|
|
149
|
+
corpus_id,
|
|
150
|
+
"How does hybrid retrieval blend dense and sparse signals?",
|
|
151
|
+
top_k=5,
|
|
152
|
+
hybrid={
|
|
153
|
+
"enabled": True,
|
|
154
|
+
"fusion_mode": "rrf",
|
|
155
|
+
"rrf_k": 60,
|
|
156
|
+
"dense_weight": 0.6,
|
|
157
|
+
"sparse_weight": 0.4,
|
|
158
|
+
},
|
|
159
|
+
return_config={"include_text": True, "include_scores": True, "include_provenance": True},
|
|
160
|
+
)
|
|
161
|
+
print("Baseline hybrid results:")
|
|
162
|
+
_print_hits(baseline["results"])
|
|
163
|
+
|
|
164
|
+
training_build = client.build_training_data(
|
|
165
|
+
corpus_id,
|
|
166
|
+
idempotency_key=_key("demo-training-data-1"),
|
|
167
|
+
)
|
|
168
|
+
print("Training data build job:", training_build["job_id"])
|
|
169
|
+
_wait_for_job(client, training_build["job_id"])
|
|
170
|
+
|
|
171
|
+
tuning_run = client.create_tuning_run(
|
|
172
|
+
corpus_id,
|
|
173
|
+
idempotency_key=_key("demo-tuning-1"),
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
print("Tuning run:", tuning_run["run_id"])
|
|
177
|
+
run = _wait_for_tuning_run(client, tuning_run["run_id"])
|
|
178
|
+
if run.get("status") != "succeeded":
|
|
179
|
+
raise SystemExit(f"Tuning run failed with status {run.get('status')}")
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
promoted = client.promote_tuning_run(tuning_run["run_id"])
|
|
183
|
+
except Knowledge2Error as exc:
|
|
184
|
+
raise SystemExit(f"Promotion failed: {exc}") from exc
|
|
185
|
+
|
|
186
|
+
model_id = promoted["model_id"]
|
|
187
|
+
deployment = client.create_deployment(corpus_id, model_id, reindex=True)
|
|
188
|
+
reindex_job_id = deployment.get("reindex_job_id")
|
|
189
|
+
if reindex_job_id:
|
|
190
|
+
print("Reindex job:", reindex_job_id)
|
|
191
|
+
_wait_for_job(client, reindex_job_id)
|
|
192
|
+
|
|
193
|
+
tuned = client.search(
|
|
194
|
+
corpus_id,
|
|
195
|
+
"Explain hybrid retrieval in Knowledge2.",
|
|
196
|
+
top_k=5,
|
|
197
|
+
hybrid={
|
|
198
|
+
"enabled": True,
|
|
199
|
+
"fusion_mode": "rrf",
|
|
200
|
+
"rrf_k": 60,
|
|
201
|
+
"dense_weight": 0.5,
|
|
202
|
+
"sparse_weight": 0.5,
|
|
203
|
+
},
|
|
204
|
+
return_config={"include_text": True, "include_scores": True, "include_provenance": True},
|
|
205
|
+
)
|
|
206
|
+
print("Tuned hybrid results:")
|
|
207
|
+
_print_hits(tuned["results"])
|
|
208
|
+
|
|
209
|
+
print("Eval runs will be generated automatically after tuning completes.")
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
if __name__ == "__main__":
|
|
213
|
+
main()
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Error handling: catch specific errors, use retryable, configure max_retries."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from sdk import (
|
|
9
|
+
AuthenticationError,
|
|
10
|
+
Knowledge2,
|
|
11
|
+
Knowledge2Error,
|
|
12
|
+
NotFoundError,
|
|
13
|
+
RateLimitError,
|
|
14
|
+
ServerError,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
api_key = os.environ.get("K2_API_KEY")
|
|
19
|
+
if not api_key:
|
|
20
|
+
raise SystemExit("K2_API_KEY is required")
|
|
21
|
+
|
|
22
|
+
# Client with custom max_retries (0 = disable auto-retry)
|
|
23
|
+
client = Knowledge2(
|
|
24
|
+
api_key=api_key,
|
|
25
|
+
max_retries=3,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Pre-flight check
|
|
29
|
+
if not client.is_authenticated():
|
|
30
|
+
print("No credentials configured!")
|
|
31
|
+
sys.exit(1)
|
|
32
|
+
|
|
33
|
+
corpus_id = os.environ.get("K2_CORPUS_ID", "corpus-123")
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
client.get_corpus("nonexistent-corpus-id")
|
|
37
|
+
except AuthenticationError as e:
|
|
38
|
+
print("Invalid API key:", e.message)
|
|
39
|
+
except NotFoundError as e:
|
|
40
|
+
print("Resource not found:", e.message)
|
|
41
|
+
except RateLimitError as e:
|
|
42
|
+
print("Rate limited. Retry after:", e.retry_after)
|
|
43
|
+
if e.retryable:
|
|
44
|
+
print("This error is retryable.")
|
|
45
|
+
except ServerError as e:
|
|
46
|
+
print("Server error:", e.message)
|
|
47
|
+
if e.retryable:
|
|
48
|
+
print("Consider retrying the request.")
|
|
49
|
+
|
|
50
|
+
# Example: check retryable before custom retry logic
|
|
51
|
+
try:
|
|
52
|
+
client.search(corpus_id, "test query")
|
|
53
|
+
except Knowledge2Error as e:
|
|
54
|
+
if e.retryable:
|
|
55
|
+
print("Transient error - safe to retry:", e.message)
|
|
56
|
+
else:
|
|
57
|
+
print("Non-retryable error:", e.message)
|
|
58
|
+
|
|
59
|
+
except Knowledge2Error as e:
|
|
60
|
+
print(f"API error: {e}")
|
|
61
|
+
raise
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Pagination: iter_documents vs list_documents manual pagination, iter with filters."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
from sdk import Knowledge2, Knowledge2Error
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
api_key = os.environ.get("K2_API_KEY")
|
|
11
|
+
if not api_key:
|
|
12
|
+
raise SystemExit("K2_API_KEY is required")
|
|
13
|
+
|
|
14
|
+
client = Knowledge2(api_key=api_key)
|
|
15
|
+
corpus_id = os.environ.get("K2_CORPUS_ID", "corpus-123")
|
|
16
|
+
|
|
17
|
+
# --- Page[T]: single-page results from list_* methods ---
|
|
18
|
+
page = client.list_documents(corpus_id, limit=20, offset=0) # returns Page[T]
|
|
19
|
+
|
|
20
|
+
# Page attributes and protocols
|
|
21
|
+
print(f"Total across all pages: {page.total}") # page.total
|
|
22
|
+
print(f"Items on this page: {page.items}") # page.items (list[T])
|
|
23
|
+
print(f"Page length: {len(page)}") # len(page)
|
|
24
|
+
print(f"Page is truthy: {bool(page)}") # bool(page)
|
|
25
|
+
|
|
26
|
+
for doc in page: # iterate over items on this page
|
|
27
|
+
print(doc.get("id"), doc.get("source_uri", ""))
|
|
28
|
+
|
|
29
|
+
# Manual pagination loop using Page[T]
|
|
30
|
+
offset = 0
|
|
31
|
+
limit = 20
|
|
32
|
+
while True:
|
|
33
|
+
page = client.list_documents(corpus_id, limit=limit, offset=offset)
|
|
34
|
+
if not page:
|
|
35
|
+
break
|
|
36
|
+
for doc in page:
|
|
37
|
+
print(doc.get("id"), doc.get("source_uri", ""))
|
|
38
|
+
offset += len(page)
|
|
39
|
+
if len(page) < limit:
|
|
40
|
+
break
|
|
41
|
+
|
|
42
|
+
# --- SyncPager[T]: auto-pagination from iter_* methods ---
|
|
43
|
+
# iter_documents returns a SyncPager that lazily fetches successive pages.
|
|
44
|
+
pager = client.iter_documents(corpus_id, limit=50) # SyncPager[T]
|
|
45
|
+
|
|
46
|
+
for item in pager:
|
|
47
|
+
print(item.get("id"), item.get("source_uri", ""))
|
|
48
|
+
|
|
49
|
+
# iter_documents with filters
|
|
50
|
+
for item in client.iter_documents(
|
|
51
|
+
corpus_id,
|
|
52
|
+
limit=50,
|
|
53
|
+
status="indexed",
|
|
54
|
+
source="doc://",
|
|
55
|
+
):
|
|
56
|
+
print(item.get("id"))
|
|
57
|
+
|
|
58
|
+
# Collect all items into a list
|
|
59
|
+
all_docs = list(client.iter_documents(corpus_id, limit=100))
|
|
60
|
+
print(f"Total documents: {len(all_docs)}")
|
|
61
|
+
|
|
62
|
+
except Knowledge2Error as e:
|
|
63
|
+
print(f"API error: {e}")
|
|
64
|
+
raise
|