knowledge2 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. knowledge2-0.4.0.dist-info/METADATA +556 -0
  2. knowledge2-0.4.0.dist-info/RECORD +139 -0
  3. knowledge2-0.4.0.dist-info/WHEEL +5 -0
  4. knowledge2-0.4.0.dist-info/top_level.txt +1 -0
  5. sdk/__init__.py +70 -0
  6. sdk/_async_base.py +525 -0
  7. sdk/_async_paging.py +57 -0
  8. sdk/_base.py +541 -0
  9. sdk/_logging.py +41 -0
  10. sdk/_paging.py +73 -0
  11. sdk/_preview.py +70 -0
  12. sdk/_raw_response.py +25 -0
  13. sdk/_request_options.py +51 -0
  14. sdk/_transport.py +144 -0
  15. sdk/_validation.py +25 -0
  16. sdk/_validation_response.py +36 -0
  17. sdk/_version.py +3 -0
  18. sdk/async_client.py +320 -0
  19. sdk/async_resources/__init__.py +45 -0
  20. sdk/async_resources/_mixin_base.py +42 -0
  21. sdk/async_resources/a2a.py +230 -0
  22. sdk/async_resources/agents.py +489 -0
  23. sdk/async_resources/audit.py +145 -0
  24. sdk/async_resources/auth.py +133 -0
  25. sdk/async_resources/console.py +409 -0
  26. sdk/async_resources/corpora.py +276 -0
  27. sdk/async_resources/deployments.py +106 -0
  28. sdk/async_resources/documents.py +592 -0
  29. sdk/async_resources/feeds.py +248 -0
  30. sdk/async_resources/indexes.py +208 -0
  31. sdk/async_resources/jobs.py +165 -0
  32. sdk/async_resources/metadata.py +48 -0
  33. sdk/async_resources/models.py +102 -0
  34. sdk/async_resources/onboarding.py +538 -0
  35. sdk/async_resources/orgs.py +37 -0
  36. sdk/async_resources/pipelines.py +523 -0
  37. sdk/async_resources/projects.py +90 -0
  38. sdk/async_resources/search.py +262 -0
  39. sdk/async_resources/training.py +357 -0
  40. sdk/async_resources/usage.py +91 -0
  41. sdk/client.py +417 -0
  42. sdk/config.py +182 -0
  43. sdk/errors.py +178 -0
  44. sdk/examples/auth_factory.py +34 -0
  45. sdk/examples/batch_operations.py +57 -0
  46. sdk/examples/document_upload.py +56 -0
  47. sdk/examples/e2e_lifecycle.py +213 -0
  48. sdk/examples/error_handling.py +61 -0
  49. sdk/examples/pagination.py +64 -0
  50. sdk/examples/quickstart.py +36 -0
  51. sdk/examples/request_options.py +44 -0
  52. sdk/examples/search.py +64 -0
  53. sdk/integrations/__init__.py +57 -0
  54. sdk/integrations/_client.py +101 -0
  55. sdk/integrations/langchain/__init__.py +6 -0
  56. sdk/integrations/langchain/retriever.py +166 -0
  57. sdk/integrations/langchain/tools.py +108 -0
  58. sdk/integrations/llamaindex/__init__.py +11 -0
  59. sdk/integrations/llamaindex/filters.py +78 -0
  60. sdk/integrations/llamaindex/retriever.py +162 -0
  61. sdk/integrations/llamaindex/tools.py +109 -0
  62. sdk/integrations/llamaindex/vector_store.py +320 -0
  63. sdk/models/__init__.py +18 -0
  64. sdk/models/_base.py +24 -0
  65. sdk/models/_registry.py +457 -0
  66. sdk/models/a2a.py +92 -0
  67. sdk/models/agents.py +109 -0
  68. sdk/models/audit.py +28 -0
  69. sdk/models/auth.py +49 -0
  70. sdk/models/chunks.py +20 -0
  71. sdk/models/common.py +14 -0
  72. sdk/models/console.py +103 -0
  73. sdk/models/corpora.py +48 -0
  74. sdk/models/deployments.py +13 -0
  75. sdk/models/documents.py +126 -0
  76. sdk/models/embeddings.py +24 -0
  77. sdk/models/evaluation.py +17 -0
  78. sdk/models/feedback.py +9 -0
  79. sdk/models/feeds.py +57 -0
  80. sdk/models/indexes.py +36 -0
  81. sdk/models/jobs.py +52 -0
  82. sdk/models/models.py +26 -0
  83. sdk/models/onboarding.py +323 -0
  84. sdk/models/orgs.py +11 -0
  85. sdk/models/pipelines.py +147 -0
  86. sdk/models/projects.py +19 -0
  87. sdk/models/search.py +149 -0
  88. sdk/models/training.py +57 -0
  89. sdk/models/usage.py +39 -0
  90. sdk/namespaces.py +386 -0
  91. sdk/py.typed +0 -0
  92. sdk/resources/__init__.py +45 -0
  93. sdk/resources/_mixin_base.py +40 -0
  94. sdk/resources/a2a.py +230 -0
  95. sdk/resources/agents.py +487 -0
  96. sdk/resources/audit.py +144 -0
  97. sdk/resources/auth.py +138 -0
  98. sdk/resources/console.py +411 -0
  99. sdk/resources/corpora.py +269 -0
  100. sdk/resources/deployments.py +105 -0
  101. sdk/resources/documents.py +597 -0
  102. sdk/resources/feeds.py +246 -0
  103. sdk/resources/indexes.py +210 -0
  104. sdk/resources/jobs.py +164 -0
  105. sdk/resources/metadata.py +53 -0
  106. sdk/resources/models.py +99 -0
  107. sdk/resources/onboarding.py +542 -0
  108. sdk/resources/orgs.py +35 -0
  109. sdk/resources/pipeline_builder.py +257 -0
  110. sdk/resources/pipelines.py +520 -0
  111. sdk/resources/projects.py +87 -0
  112. sdk/resources/search.py +277 -0
  113. sdk/resources/training.py +358 -0
  114. sdk/resources/usage.py +92 -0
  115. sdk/types/__init__.py +366 -0
  116. sdk/types/a2a.py +88 -0
  117. sdk/types/agents.py +133 -0
  118. sdk/types/audit.py +26 -0
  119. sdk/types/auth.py +45 -0
  120. sdk/types/chunks.py +18 -0
  121. sdk/types/common.py +10 -0
  122. sdk/types/console.py +99 -0
  123. sdk/types/corpora.py +42 -0
  124. sdk/types/deployments.py +11 -0
  125. sdk/types/documents.py +104 -0
  126. sdk/types/embeddings.py +22 -0
  127. sdk/types/evaluation.py +15 -0
  128. sdk/types/feedback.py +7 -0
  129. sdk/types/feeds.py +61 -0
  130. sdk/types/indexes.py +30 -0
  131. sdk/types/jobs.py +50 -0
  132. sdk/types/models.py +22 -0
  133. sdk/types/onboarding.py +395 -0
  134. sdk/types/orgs.py +9 -0
  135. sdk/types/pipelines.py +177 -0
  136. sdk/types/projects.py +14 -0
  137. sdk/types/search.py +116 -0
  138. sdk/types/training.py +55 -0
  139. sdk/types/usage.py +37 -0
sdk/errors.py ADDED
@@ -0,0 +1,178 @@
1
+ """Knowledge2 SDK exception hierarchy.
2
+
3
+ All SDK exceptions inherit from :class:`Knowledge2Error`, so callers can
4
+ use ``except Knowledge2Error`` as a catch-all.
5
+
6
+ Hierarchy::
7
+
8
+ Knowledge2Error (base)
9
+ ├── APIError (HTTP errors from the API)
10
+ │ ├── BadRequestError (400)
11
+ │ ├── AuthenticationError (401)
12
+ │ ├── PermissionDeniedError (403)
13
+ │ ├── NotFoundError (404)
14
+ │ ├── ConflictError (409)
15
+ │ ├── ValidationError (422)
16
+ │ ├── RateLimitError (429)
17
+ │ └── ServerError (500, 502, 503, 504)
18
+ ├── APIConnectionError (network / DNS failures)
19
+ ├── APITimeoutError (request timeout)
20
+ └── ConfirmationRequiredError (client-side deletion guard)
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from typing import Any
26
+
27
+
28
+ class Knowledge2Error(Exception):
29
+ """Base exception for all Knowledge2 SDK errors."""
30
+
31
+ def __init__(self, message: str) -> None:
32
+ super().__init__(message)
33
+ self.message = message
34
+
35
+ @property
36
+ def retryable(self) -> bool:
37
+ """Whether the operation that caused this error can be retried."""
38
+ return False
39
+
40
+
41
+ class APIError(Knowledge2Error):
42
+ """Error returned by the Knowledge2 API (HTTP 4xx / 5xx)."""
43
+
44
+ def __init__(
45
+ self,
46
+ message: str,
47
+ *,
48
+ status_code: int,
49
+ code: str | None = None,
50
+ details: Any = None,
51
+ request_id: str | None = None,
52
+ ) -> None:
53
+ super().__init__(message)
54
+ self.status_code = status_code
55
+ self.code = code
56
+ self.details = details
57
+ self.request_id = request_id
58
+
59
+
60
+ class BadRequestError(APIError):
61
+ """HTTP 400 — the request was malformed or missing required fields."""
62
+
63
+ @property
64
+ def retryable(self) -> bool:
65
+ return False
66
+
67
+
68
+ class AuthenticationError(APIError):
69
+ """HTTP 401 — invalid or missing API key / bearer token."""
70
+
71
+ @property
72
+ def retryable(self) -> bool:
73
+ return False
74
+
75
+
76
+ class PermissionDeniedError(APIError):
77
+ """HTTP 403 — the API key lacks the required scopes, or the requested feature is not enabled for the organization."""
78
+
79
+ @property
80
+ def retryable(self) -> bool:
81
+ return False
82
+
83
+
84
+ class NotFoundError(APIError):
85
+ """HTTP 404 — the requested resource does not exist."""
86
+
87
+ @property
88
+ def retryable(self) -> bool:
89
+ return False
90
+
91
+
92
+ class ConflictError(APIError):
93
+ """HTTP 409 — resource conflict (e.g. duplicate idempotency key)."""
94
+
95
+ @property
96
+ def retryable(self) -> bool:
97
+ return False
98
+
99
+
100
+ class ValidationError(APIError):
101
+ """HTTP 422 — request validation failed."""
102
+
103
+ @property
104
+ def retryable(self) -> bool:
105
+ return False
106
+
107
+
108
+ class RateLimitError(APIError):
109
+ """HTTP 429 — too many requests.
110
+
111
+ The :attr:`retry_after` attribute contains the server-suggested
112
+ wait time in seconds (from the ``Retry-After`` header), or *None*
113
+ if the header was absent.
114
+ """
115
+
116
+ def __init__(
117
+ self,
118
+ message: str,
119
+ *,
120
+ status_code: int = 429,
121
+ retry_after: float | None = None,
122
+ code: str | None = None,
123
+ details: Any = None,
124
+ request_id: str | None = None,
125
+ ) -> None:
126
+ super().__init__(
127
+ message,
128
+ status_code=status_code,
129
+ code=code,
130
+ details=details,
131
+ request_id=request_id,
132
+ )
133
+ self.retry_after = retry_after
134
+
135
+ @property
136
+ def retryable(self) -> bool:
137
+ return True
138
+
139
+
140
+ class ServerError(APIError):
141
+ """HTTP 500 / 502 / 503 / 504 — server-side failure."""
142
+
143
+ @property
144
+ def retryable(self) -> bool:
145
+ return True
146
+
147
+
148
+ class APIConnectionError(Knowledge2Error):
149
+ """Network connectivity failure (DNS, connection refused, etc.)."""
150
+
151
+ @property
152
+ def retryable(self) -> bool:
153
+ return True
154
+
155
+
156
+ class APITimeoutError(Knowledge2Error):
157
+ """The request timed out."""
158
+
159
+ @property
160
+ def retryable(self) -> bool:
161
+ return True
162
+
163
+
164
+ class ConfirmationRequiredError(Knowledge2Error):
165
+ """Raised when a destructive operation requires explicit confirmation.
166
+
167
+ Delete methods require ``confirm=True`` as a safety guard.
168
+ This exception is part of the :class:`Knowledge2Error` hierarchy,
169
+ so ``except Knowledge2Error`` catch-alls will handle it.
170
+ """
171
+
172
+ def __init__(self, resource_type: str, resource_id: str) -> None:
173
+ self.resource_type = resource_type
174
+ self.resource_id = resource_id
175
+ super().__init__(
176
+ f"{resource_type.capitalize()} {resource_id!r} deletion is irreversible. "
177
+ f"Pass confirm=True to proceed."
178
+ )
@@ -0,0 +1,34 @@
1
+ """Bearer token factory example — dynamic auth for OAuth/OIDC workloads.
2
+
3
+ Usage:
4
+ K2_API_HOST=http://localhost:8000 python -m sdk.examples.auth_factory
5
+ """
6
+
7
+ import os
8
+
9
+
10
+ def get_oauth_token() -> str:
11
+ """Simulate an OAuth token fetch (replace with your real provider)."""
12
+ # In production: call your OAuth provider here
13
+ return os.environ.get("K2_BEARER_TOKEN", "demo-token")
14
+
15
+
16
+ def main() -> None:
17
+ from sdk import Knowledge2
18
+
19
+ client = Knowledge2(
20
+ api_host=os.environ.get("K2_API_HOST", "https://api.knowledge2.ai"),
21
+ bearer_token_factory=get_oauth_token,
22
+ token_cache_ttl=300, # cache for 5 minutes
23
+ )
24
+
25
+ # Pre-flight check
26
+ if not client.is_authenticated():
27
+ print("No auth configured!")
28
+ return
29
+
30
+ print(f"Authenticated: {client.is_authenticated()}")
31
+
32
+
33
+ if __name__ == "__main__":
34
+ main()
@@ -0,0 +1,57 @@
1
+ """Batch workflow: upload multiple docs, build indexes with wait, batch search, iterate results."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+
7
+ from sdk import Knowledge2, Knowledge2Error
8
+
9
+ try:
10
+ api_key = os.environ.get("K2_API_KEY")
11
+ if not api_key:
12
+ raise SystemExit("K2_API_KEY is required")
13
+
14
+ client = Knowledge2(api_key=api_key)
15
+ corpus_id = os.environ.get("K2_CORPUS_ID", "corpus-123")
16
+
17
+ # Upload multiple documents with idempotency key
18
+ docs = [
19
+ {"source_uri": "doc://a", "raw_text": "Content A."},
20
+ {"source_uri": "doc://b", "raw_text": "Content B."},
21
+ {"source_uri": "doc://c", "raw_text": "Content C."},
22
+ ]
23
+ upload = client.upload_documents_batch(
24
+ corpus_id,
25
+ docs,
26
+ idempotency_key="batch-demo-abc123",
27
+ wait=True,
28
+ )
29
+ print("Upload job:", upload.get("job_id"))
30
+
31
+ # Build indexes with wait and idempotency
32
+ index_job = client.build_indexes(
33
+ corpus_id,
34
+ dense=True,
35
+ sparse=True,
36
+ mode="full",
37
+ idempotency_key="index-demo-abc123",
38
+ wait=True,
39
+ )
40
+ print("Index job:", index_job.get("job_id"))
41
+
42
+ # Batch search multiple queries
43
+ queries = ["What is A?", "What is B?", "What is C?"]
44
+ batch = client.search_batch(
45
+ corpus_id,
46
+ queries,
47
+ top_k=3,
48
+ return_config={"include_text": True},
49
+ )
50
+ for i, resp in enumerate(batch.get("responses", [])):
51
+ print(f"Query {i + 1}: {queries[i]}")
52
+ for r in resp.get("results", []):
53
+ print(" ", (r.get("text") or "")[:60])
54
+
55
+ except Knowledge2Error as e:
56
+ print(f"API error: {e}")
57
+ raise
@@ -0,0 +1,56 @@
1
+ """Document upload: single file, batch raw_text, URL ingestion. Shows wait=True pattern."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+
7
+ from sdk import Knowledge2, Knowledge2Error
8
+
9
+ try:
10
+ api_key = os.environ.get("K2_API_KEY")
11
+ if not api_key:
12
+ raise SystemExit("K2_API_KEY is required")
13
+
14
+ client = Knowledge2(api_key=api_key)
15
+ corpus_id = os.environ.get("K2_CORPUS_ID", "corpus-123")
16
+
17
+ # Single upload via file_path
18
+ resp = client.upload_document(
19
+ corpus_id,
20
+ file_path="/path/to/doc.pdf",
21
+ source_uri="doc://my-file",
22
+ idempotency_key="upload-1",
23
+ )
24
+ print("Uploaded:", resp.get("id"))
25
+
26
+ # Batch upload raw_text list (wait=True blocks until job completes)
27
+ docs = [
28
+ {"source_uri": "doc://a", "raw_text": "First document content."},
29
+ {"source_uri": "doc://b", "raw_text": "Second document content."},
30
+ ]
31
+ batch_resp = client.upload_documents_batch(
32
+ corpus_id,
33
+ docs,
34
+ idempotency_key="batch-1",
35
+ wait=True,
36
+ poll_s=5,
37
+ )
38
+ print("Batch job:", batch_resp.get("job_id"))
39
+
40
+ # URL ingestion with wait=True
41
+ urls = [
42
+ {"url": "https://example.com/page1", "title": "Page 1"},
43
+ {"url": "https://example.com/page2", "title": "Page 2"},
44
+ ]
45
+ url_resp = client.ingest_urls(
46
+ corpus_id,
47
+ urls,
48
+ idempotency_key="urls-1",
49
+ wait=True,
50
+ poll_s=5,
51
+ )
52
+ print("URL ingest job:", url_resp.get("job_id"))
53
+
54
+ except Knowledge2Error as e:
55
+ print(f"API error: {e}")
56
+ raise
@@ -0,0 +1,213 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import time
5
+ import uuid
6
+ from typing import Iterable
7
+
8
+ from sdk import Knowledge2, Knowledge2Error
9
+ from sdk.types.jobs import JobResponse
10
+ from sdk.types.search import SearchResult
11
+ from sdk.types.training import TuningRunDetailResponse
12
+
13
+ TERMINAL_JOB_STATUSES = {"succeeded", "failed", "canceled"}
14
+ TERMINAL_RUN_STATUSES = {"succeeded", "failed", "canceled"}
15
+
16
+
17
+ def _wait_for_job(
18
+ client: Knowledge2,
19
+ job_id: str,
20
+ *,
21
+ poll_s: float = 5.0,
22
+ timeout_s: float = 900.0,
23
+ ) -> JobResponse:
24
+ deadline = time.time() + timeout_s
25
+ while True:
26
+ job = client.get_job(job_id)
27
+ status = job.get("status")
28
+ if status in TERMINAL_JOB_STATUSES:
29
+ return job
30
+ if time.time() >= deadline:
31
+ raise TimeoutError(f"Timed out waiting for job {job_id}")
32
+ time.sleep(poll_s)
33
+
34
+
35
+ def _wait_for_tuning_run(
36
+ client: Knowledge2,
37
+ run_id: str,
38
+ *,
39
+ poll_s: float = 10.0,
40
+ timeout_s: float = 7200.0,
41
+ ) -> TuningRunDetailResponse:
42
+ deadline = time.time() + timeout_s
43
+ while True:
44
+ run = client.get_tuning_run(run_id)
45
+ status = run.get("status")
46
+ if status in TERMINAL_RUN_STATUSES:
47
+ return run
48
+ if time.time() >= deadline:
49
+ raise TimeoutError(f"Timed out waiting for tuning run {run_id}")
50
+ time.sleep(poll_s)
51
+
52
+
53
+ def _print_hits(results: Iterable[SearchResult]) -> None:
54
+ for idx, result in enumerate(results, start=1):
55
+ text = (result.get("text") or "").strip().replace("\n", " ")
56
+ print(f"{idx:02d}. score={result.get('score'):.4f} text={text[:120]}")
57
+
58
+
59
+ def main() -> None:
60
+ api_key = os.getenv("K2_API_KEY")
61
+ if not api_key:
62
+ raise SystemExit("K2_API_KEY is required")
63
+
64
+ client = Knowledge2(
65
+ api_host=os.getenv("K2_BASE_URL", "https://api.knowledge2.ai"),
66
+ api_key=api_key,
67
+ )
68
+ idempotency_suffix = os.getenv("K2_IDEMPOTENCY_SUFFIX", uuid.uuid4().hex[:8])
69
+
70
+ def _key(base: str) -> str:
71
+ return f"{base}-{idempotency_suffix}"
72
+
73
+ project_id = os.getenv("K2_PROJECT_ID")
74
+ if not project_id:
75
+ project = client.create_project("knowledge2-demo")
76
+ project_id = project["id"]
77
+
78
+ corpus = client.create_corpus(
79
+ project_id,
80
+ "knowledge2-demo-corpus",
81
+ description="Sample corpus for the Knowledge2 end-to-end lifecycle.",
82
+ )
83
+ corpus_id = corpus["id"]
84
+
85
+ docs = [
86
+ {
87
+ "source_uri": "doc://overview",
88
+ "raw_text": "Knowledge2 organizes knowledge into projects and corpora. Documents are chunked into passages that can be indexed for search. Stable source_uris let you update content without duplicates.",
89
+ "metadata": {"topic": "overview", "product": "knowledge2"},
90
+ },
91
+ {
92
+ "source_uri": "doc://ingestion",
93
+ "raw_text": "Batch ingestion accepts documents with source_uri, raw_text, and optional metadata. Use idempotency keys to prevent duplicate ingest jobs. Upload in batches to stay within API limits.",
94
+ "metadata": {"topic": "ingestion", "product": "knowledge2"},
95
+ },
96
+ {
97
+ "source_uri": "doc://indexing",
98
+ "raw_text": "Dense indexes capture semantic similarity while sparse indexes capture keyword matches. Building both enables hybrid retrieval. Rebuild indexes after large content updates.",
99
+ "metadata": {"topic": "indexing", "product": "knowledge2"},
100
+ },
101
+ {
102
+ "source_uri": "doc://hybrid-search",
103
+ "raw_text": "Hybrid retrieval blends dense and sparse scores using RRF or weighted fusion. Adjust dense_weight and sparse_weight to balance semantics vs exact terms. You can request scores and provenance in the response.",
104
+ "metadata": {"topic": "search", "product": "knowledge2"},
105
+ },
106
+ {
107
+ "source_uri": "doc://tuning",
108
+ "raw_text": "Tuning runs train a better embedding model from query-document pairs. Training data can be auto-built or uploaded as JSONL. Successful runs can be promoted to a deployable model.",
109
+ "metadata": {"topic": "tuning", "product": "knowledge2"},
110
+ },
111
+ {
112
+ "source_uri": "doc://deployments",
113
+ "raw_text": "Deployments attach a tuned model to a corpus and optionally trigger reindexing. Track the reindex job until it succeeds. Once complete, searches use the tuned model.",
114
+ "metadata": {"topic": "deployments", "product": "knowledge2"},
115
+ },
116
+ {
117
+ "source_uri": "doc://evaluation",
118
+ "raw_text": "Evaluation runs compute metrics like nDCG, recall, and MRR on labeled data. Compare baseline and tuned models before promotion. Keep eval sets representative of real queries.",
119
+ "metadata": {"topic": "evaluation", "product": "knowledge2"},
120
+ },
121
+ {
122
+ "source_uri": "doc://security",
123
+ "raw_text": "API keys authenticate requests and can be rotated without downtime. Admin tokens are required for org bootstrap and should be stored securely. Audit logs and usage endpoints help with governance.",
124
+ "metadata": {"topic": "security", "product": "knowledge2"},
125
+ },
126
+ ]
127
+ ingest = client.upload_documents_batch(
128
+ corpus_id,
129
+ docs,
130
+ idempotency_key=_key("demo-ingest-1"),
131
+ auto_index=False,
132
+ wait=False,
133
+ )
134
+ print("Ingest job:", ingest["job_id"])
135
+ _wait_for_job(client, ingest["job_id"])
136
+
137
+ index_job = client.build_indexes(
138
+ corpus_id,
139
+ dense=True,
140
+ sparse=True,
141
+ mode="full",
142
+ idempotency_key=_key("demo-index-1"),
143
+ wait=False,
144
+ )
145
+ print("Index build job:", index_job["job_id"])
146
+ _wait_for_job(client, index_job["job_id"])
147
+
148
+ baseline = client.search(
149
+ corpus_id,
150
+ "How does hybrid retrieval blend dense and sparse signals?",
151
+ top_k=5,
152
+ hybrid={
153
+ "enabled": True,
154
+ "fusion_mode": "rrf",
155
+ "rrf_k": 60,
156
+ "dense_weight": 0.6,
157
+ "sparse_weight": 0.4,
158
+ },
159
+ return_config={"include_text": True, "include_scores": True, "include_provenance": True},
160
+ )
161
+ print("Baseline hybrid results:")
162
+ _print_hits(baseline["results"])
163
+
164
+ training_build = client.build_training_data(
165
+ corpus_id,
166
+ idempotency_key=_key("demo-training-data-1"),
167
+ )
168
+ print("Training data build job:", training_build["job_id"])
169
+ _wait_for_job(client, training_build["job_id"])
170
+
171
+ tuning_run = client.create_tuning_run(
172
+ corpus_id,
173
+ idempotency_key=_key("demo-tuning-1"),
174
+ )
175
+
176
+ print("Tuning run:", tuning_run["run_id"])
177
+ run = _wait_for_tuning_run(client, tuning_run["run_id"])
178
+ if run.get("status") != "succeeded":
179
+ raise SystemExit(f"Tuning run failed with status {run.get('status')}")
180
+
181
+ try:
182
+ promoted = client.promote_tuning_run(tuning_run["run_id"])
183
+ except Knowledge2Error as exc:
184
+ raise SystemExit(f"Promotion failed: {exc}") from exc
185
+
186
+ model_id = promoted["model_id"]
187
+ deployment = client.create_deployment(corpus_id, model_id, reindex=True)
188
+ reindex_job_id = deployment.get("reindex_job_id")
189
+ if reindex_job_id:
190
+ print("Reindex job:", reindex_job_id)
191
+ _wait_for_job(client, reindex_job_id)
192
+
193
+ tuned = client.search(
194
+ corpus_id,
195
+ "Explain hybrid retrieval in Knowledge2.",
196
+ top_k=5,
197
+ hybrid={
198
+ "enabled": True,
199
+ "fusion_mode": "rrf",
200
+ "rrf_k": 60,
201
+ "dense_weight": 0.5,
202
+ "sparse_weight": 0.5,
203
+ },
204
+ return_config={"include_text": True, "include_scores": True, "include_provenance": True},
205
+ )
206
+ print("Tuned hybrid results:")
207
+ _print_hits(tuned["results"])
208
+
209
+ print("Eval runs will be generated automatically after tuning completes.")
210
+
211
+
212
+ if __name__ == "__main__":
213
+ main()
@@ -0,0 +1,61 @@
1
+ """Error handling: catch specific errors, use retryable, configure max_retries."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import sys
7
+
8
+ from sdk import (
9
+ AuthenticationError,
10
+ Knowledge2,
11
+ Knowledge2Error,
12
+ NotFoundError,
13
+ RateLimitError,
14
+ ServerError,
15
+ )
16
+
17
+ try:
18
+ api_key = os.environ.get("K2_API_KEY")
19
+ if not api_key:
20
+ raise SystemExit("K2_API_KEY is required")
21
+
22
+ # Client with custom max_retries (0 = disable auto-retry)
23
+ client = Knowledge2(
24
+ api_key=api_key,
25
+ max_retries=3,
26
+ )
27
+
28
+ # Pre-flight check
29
+ if not client.is_authenticated():
30
+ print("No credentials configured!")
31
+ sys.exit(1)
32
+
33
+ corpus_id = os.environ.get("K2_CORPUS_ID", "corpus-123")
34
+
35
+ try:
36
+ client.get_corpus("nonexistent-corpus-id")
37
+ except AuthenticationError as e:
38
+ print("Invalid API key:", e.message)
39
+ except NotFoundError as e:
40
+ print("Resource not found:", e.message)
41
+ except RateLimitError as e:
42
+ print("Rate limited. Retry after:", e.retry_after)
43
+ if e.retryable:
44
+ print("This error is retryable.")
45
+ except ServerError as e:
46
+ print("Server error:", e.message)
47
+ if e.retryable:
48
+ print("Consider retrying the request.")
49
+
50
+ # Example: check retryable before custom retry logic
51
+ try:
52
+ client.search(corpus_id, "test query")
53
+ except Knowledge2Error as e:
54
+ if e.retryable:
55
+ print("Transient error - safe to retry:", e.message)
56
+ else:
57
+ print("Non-retryable error:", e.message)
58
+
59
+ except Knowledge2Error as e:
60
+ print(f"API error: {e}")
61
+ raise
@@ -0,0 +1,64 @@
1
+ """Pagination: iter_documents vs list_documents manual pagination, iter with filters."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+
7
+ from sdk import Knowledge2, Knowledge2Error
8
+
9
+ try:
10
+ api_key = os.environ.get("K2_API_KEY")
11
+ if not api_key:
12
+ raise SystemExit("K2_API_KEY is required")
13
+
14
+ client = Knowledge2(api_key=api_key)
15
+ corpus_id = os.environ.get("K2_CORPUS_ID", "corpus-123")
16
+
17
+ # --- Page[T]: single-page results from list_* methods ---
18
+ page = client.list_documents(corpus_id, limit=20, offset=0) # returns Page[T]
19
+
20
+ # Page attributes and protocols
21
+ print(f"Total across all pages: {page.total}") # page.total
22
+ print(f"Items on this page: {page.items}") # page.items (list[T])
23
+ print(f"Page length: {len(page)}") # len(page)
24
+ print(f"Page is truthy: {bool(page)}") # bool(page)
25
+
26
+ for doc in page: # iterate over items on this page
27
+ print(doc.get("id"), doc.get("source_uri", ""))
28
+
29
+ # Manual pagination loop using Page[T]
30
+ offset = 0
31
+ limit = 20
32
+ while True:
33
+ page = client.list_documents(corpus_id, limit=limit, offset=offset)
34
+ if not page:
35
+ break
36
+ for doc in page:
37
+ print(doc.get("id"), doc.get("source_uri", ""))
38
+ offset += len(page)
39
+ if len(page) < limit:
40
+ break
41
+
42
+ # --- SyncPager[T]: auto-pagination from iter_* methods ---
43
+ # iter_documents returns a SyncPager that lazily fetches successive pages.
44
+ pager = client.iter_documents(corpus_id, limit=50) # SyncPager[T]
45
+
46
+ for item in pager:
47
+ print(item.get("id"), item.get("source_uri", ""))
48
+
49
+ # iter_documents with filters
50
+ for item in client.iter_documents(
51
+ corpus_id,
52
+ limit=50,
53
+ status="indexed",
54
+ source="doc://",
55
+ ):
56
+ print(item.get("id"))
57
+
58
+ # Collect all items into a list
59
+ all_docs = list(client.iter_documents(corpus_id, limit=100))
60
+ print(f"Total documents: {len(all_docs)}")
61
+
62
+ except Knowledge2Error as e:
63
+ print(f"API error: {e}")
64
+ raise