knowledge2 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. knowledge2-0.4.0.dist-info/METADATA +556 -0
  2. knowledge2-0.4.0.dist-info/RECORD +139 -0
  3. knowledge2-0.4.0.dist-info/WHEEL +5 -0
  4. knowledge2-0.4.0.dist-info/top_level.txt +1 -0
  5. sdk/__init__.py +70 -0
  6. sdk/_async_base.py +525 -0
  7. sdk/_async_paging.py +57 -0
  8. sdk/_base.py +541 -0
  9. sdk/_logging.py +41 -0
  10. sdk/_paging.py +73 -0
  11. sdk/_preview.py +70 -0
  12. sdk/_raw_response.py +25 -0
  13. sdk/_request_options.py +51 -0
  14. sdk/_transport.py +144 -0
  15. sdk/_validation.py +25 -0
  16. sdk/_validation_response.py +36 -0
  17. sdk/_version.py +3 -0
  18. sdk/async_client.py +320 -0
  19. sdk/async_resources/__init__.py +45 -0
  20. sdk/async_resources/_mixin_base.py +42 -0
  21. sdk/async_resources/a2a.py +230 -0
  22. sdk/async_resources/agents.py +489 -0
  23. sdk/async_resources/audit.py +145 -0
  24. sdk/async_resources/auth.py +133 -0
  25. sdk/async_resources/console.py +409 -0
  26. sdk/async_resources/corpora.py +276 -0
  27. sdk/async_resources/deployments.py +106 -0
  28. sdk/async_resources/documents.py +592 -0
  29. sdk/async_resources/feeds.py +248 -0
  30. sdk/async_resources/indexes.py +208 -0
  31. sdk/async_resources/jobs.py +165 -0
  32. sdk/async_resources/metadata.py +48 -0
  33. sdk/async_resources/models.py +102 -0
  34. sdk/async_resources/onboarding.py +538 -0
  35. sdk/async_resources/orgs.py +37 -0
  36. sdk/async_resources/pipelines.py +523 -0
  37. sdk/async_resources/projects.py +90 -0
  38. sdk/async_resources/search.py +262 -0
  39. sdk/async_resources/training.py +357 -0
  40. sdk/async_resources/usage.py +91 -0
  41. sdk/client.py +417 -0
  42. sdk/config.py +182 -0
  43. sdk/errors.py +178 -0
  44. sdk/examples/auth_factory.py +34 -0
  45. sdk/examples/batch_operations.py +57 -0
  46. sdk/examples/document_upload.py +56 -0
  47. sdk/examples/e2e_lifecycle.py +213 -0
  48. sdk/examples/error_handling.py +61 -0
  49. sdk/examples/pagination.py +64 -0
  50. sdk/examples/quickstart.py +36 -0
  51. sdk/examples/request_options.py +44 -0
  52. sdk/examples/search.py +64 -0
  53. sdk/integrations/__init__.py +57 -0
  54. sdk/integrations/_client.py +101 -0
  55. sdk/integrations/langchain/__init__.py +6 -0
  56. sdk/integrations/langchain/retriever.py +166 -0
  57. sdk/integrations/langchain/tools.py +108 -0
  58. sdk/integrations/llamaindex/__init__.py +11 -0
  59. sdk/integrations/llamaindex/filters.py +78 -0
  60. sdk/integrations/llamaindex/retriever.py +162 -0
  61. sdk/integrations/llamaindex/tools.py +109 -0
  62. sdk/integrations/llamaindex/vector_store.py +320 -0
  63. sdk/models/__init__.py +18 -0
  64. sdk/models/_base.py +24 -0
  65. sdk/models/_registry.py +457 -0
  66. sdk/models/a2a.py +92 -0
  67. sdk/models/agents.py +109 -0
  68. sdk/models/audit.py +28 -0
  69. sdk/models/auth.py +49 -0
  70. sdk/models/chunks.py +20 -0
  71. sdk/models/common.py +14 -0
  72. sdk/models/console.py +103 -0
  73. sdk/models/corpora.py +48 -0
  74. sdk/models/deployments.py +13 -0
  75. sdk/models/documents.py +126 -0
  76. sdk/models/embeddings.py +24 -0
  77. sdk/models/evaluation.py +17 -0
  78. sdk/models/feedback.py +9 -0
  79. sdk/models/feeds.py +57 -0
  80. sdk/models/indexes.py +36 -0
  81. sdk/models/jobs.py +52 -0
  82. sdk/models/models.py +26 -0
  83. sdk/models/onboarding.py +323 -0
  84. sdk/models/orgs.py +11 -0
  85. sdk/models/pipelines.py +147 -0
  86. sdk/models/projects.py +19 -0
  87. sdk/models/search.py +149 -0
  88. sdk/models/training.py +57 -0
  89. sdk/models/usage.py +39 -0
  90. sdk/namespaces.py +386 -0
  91. sdk/py.typed +0 -0
  92. sdk/resources/__init__.py +45 -0
  93. sdk/resources/_mixin_base.py +40 -0
  94. sdk/resources/a2a.py +230 -0
  95. sdk/resources/agents.py +487 -0
  96. sdk/resources/audit.py +144 -0
  97. sdk/resources/auth.py +138 -0
  98. sdk/resources/console.py +411 -0
  99. sdk/resources/corpora.py +269 -0
  100. sdk/resources/deployments.py +105 -0
  101. sdk/resources/documents.py +597 -0
  102. sdk/resources/feeds.py +246 -0
  103. sdk/resources/indexes.py +210 -0
  104. sdk/resources/jobs.py +164 -0
  105. sdk/resources/metadata.py +53 -0
  106. sdk/resources/models.py +99 -0
  107. sdk/resources/onboarding.py +542 -0
  108. sdk/resources/orgs.py +35 -0
  109. sdk/resources/pipeline_builder.py +257 -0
  110. sdk/resources/pipelines.py +520 -0
  111. sdk/resources/projects.py +87 -0
  112. sdk/resources/search.py +277 -0
  113. sdk/resources/training.py +358 -0
  114. sdk/resources/usage.py +92 -0
  115. sdk/types/__init__.py +366 -0
  116. sdk/types/a2a.py +88 -0
  117. sdk/types/agents.py +133 -0
  118. sdk/types/audit.py +26 -0
  119. sdk/types/auth.py +45 -0
  120. sdk/types/chunks.py +18 -0
  121. sdk/types/common.py +10 -0
  122. sdk/types/console.py +99 -0
  123. sdk/types/corpora.py +42 -0
  124. sdk/types/deployments.py +11 -0
  125. sdk/types/documents.py +104 -0
  126. sdk/types/embeddings.py +22 -0
  127. sdk/types/evaluation.py +15 -0
  128. sdk/types/feedback.py +7 -0
  129. sdk/types/feeds.py +61 -0
  130. sdk/types/indexes.py +30 -0
  131. sdk/types/jobs.py +50 -0
  132. sdk/types/models.py +22 -0
  133. sdk/types/onboarding.py +395 -0
  134. sdk/types/orgs.py +9 -0
  135. sdk/types/pipelines.py +177 -0
  136. sdk/types/projects.py +14 -0
  137. sdk/types/search.py +116 -0
  138. sdk/types/training.py +55 -0
  139. sdk/types/usage.py +37 -0
@@ -0,0 +1,162 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping
4
+ from typing import Any
5
+
6
+ from sdk import AsyncKnowledge2, Knowledge2
7
+ from sdk.integrations._client import (
8
+ merge_return_config,
9
+ resolve_async_client,
10
+ resolve_client,
11
+ resolve_corpus_id,
12
+ )
13
+ from sdk.integrations.llamaindex.filters import llama_filters_to_k2
14
+
15
+ try:
16
+ from llama_index.core import QueryBundle
17
+ from llama_index.core.retrievers import BaseRetriever
18
+ from llama_index.core.schema import NodeWithScore, TextNode
19
+ from llama_index.core.vector_stores.types import MetadataFilters
20
+ except ImportError as exc: # pragma: no cover - import-time dependency guard
21
+ raise ImportError(
22
+ "LlamaIndex integration requires llama-index-core. Install with `pip install .[llamaindex]`."
23
+ ) from exc
24
+
25
+
26
+ class K2LlamaIndexRetriever(BaseRetriever):
27
+ """LlamaIndex retriever backed by Knowledge2 search."""
28
+
29
+ def __init__(
30
+ self,
31
+ *,
32
+ corpus_id: str | None = None,
33
+ client: Knowledge2 | None = None,
34
+ api_key: str | None = None,
35
+ api_host: str | None = None,
36
+ top_k: int = 10,
37
+ filters: MetadataFilters | None = None,
38
+ hybrid: dict[str, Any] | None = None,
39
+ rerank: dict[str, Any] | None = None,
40
+ return_config: dict[str, Any] | None = None,
41
+ ) -> None:
42
+ super().__init__()
43
+ self._client = resolve_client(client=client, api_key=api_key, api_host=api_host)
44
+ self._async_client: AsyncKnowledge2 | None = None
45
+ self._corpus_id = resolve_corpus_id(corpus_id)
46
+ self._top_k = top_k
47
+ self._filters = filters
48
+ self._hybrid = hybrid
49
+ self._rerank = rerank
50
+ self._return_config = return_config
51
+
52
+ def _ensure_async_client(self) -> AsyncKnowledge2:
53
+ """Lazily create an AsyncKnowledge2 sharing the sync client's credentials."""
54
+ if self._async_client is None:
55
+ self._async_client = resolve_async_client(sync_client=self._client)
56
+ return self._async_client
57
+
58
+ async def aclose(self) -> None:
59
+ """Close the lazily-created async client, releasing connections."""
60
+ if self._async_client is not None:
61
+ await self._async_client.close()
62
+ self._async_client = None
63
+
64
+ @staticmethod
65
+ def _result_to_node_with_score(
66
+ result: Mapping[str, Any], corpus_id: str
67
+ ) -> NodeWithScore | None:
68
+ """Convert a single K2 search result to a LlamaIndex NodeWithScore."""
69
+ custom_meta = result.get("custom_metadata") or {}
70
+ system_meta = result.get("system_metadata") or {}
71
+ if not custom_meta and not system_meta:
72
+ legacy = result.get("metadata")
73
+ if isinstance(legacy, dict):
74
+ custom_meta = legacy
75
+ if not isinstance(custom_meta, dict):
76
+ custom_meta = {}
77
+ if not isinstance(system_meta, dict):
78
+ system_meta = {}
79
+ chunk_metadata = {**system_meta, **custom_meta}
80
+
81
+ chunk_id = result.get("chunk_id")
82
+ if not chunk_id:
83
+ return None
84
+
85
+ node = TextNode(
86
+ id_=chunk_id,
87
+ text=result.get("text") or "",
88
+ metadata={
89
+ **chunk_metadata,
90
+ "chunk_id": chunk_id,
91
+ "corpus_id": corpus_id,
92
+ "raw_score": result.get("raw_score"),
93
+ "offset_start": result.get("offset_start"),
94
+ "offset_end": result.get("offset_end"),
95
+ "page_start": result.get("page_start"),
96
+ "page_end": result.get("page_end"),
97
+ },
98
+ )
99
+ score = result.get("score")
100
+ if score is None:
101
+ score = result.get("raw_score")
102
+ return NodeWithScore(node=node, score=score)
103
+
104
+ def _retrieve(self, query_bundle: QueryBundle) -> list[NodeWithScore]:
105
+ query_text = query_bundle.query_str if hasattr(query_bundle, "query_str") else None
106
+ if not query_text:
107
+ raise ValueError("K2LlamaIndexRetriever requires a text query")
108
+
109
+ k2_filters = llama_filters_to_k2(self._filters)
110
+ response = self._client.search(
111
+ self._corpus_id,
112
+ query_text,
113
+ top_k=self._top_k,
114
+ filters=k2_filters,
115
+ hybrid=self._hybrid,
116
+ rerank=self._rerank,
117
+ return_config=merge_return_config(
118
+ base=self._return_config,
119
+ override=None,
120
+ include_text=True,
121
+ include_scores=True,
122
+ include_provenance=True,
123
+ ),
124
+ )
125
+
126
+ nodes: list[NodeWithScore] = []
127
+ for result in response.get("results", []):
128
+ node = self._result_to_node_with_score(result, self._corpus_id)
129
+ if node is not None:
130
+ nodes.append(node)
131
+ return nodes
132
+
133
+ async def _aretrieve(self, query_bundle: QueryBundle) -> list[NodeWithScore]:
134
+ """Async variant for event-loop-safe LlamaIndex integration."""
135
+ query_text = query_bundle.query_str if hasattr(query_bundle, "query_str") else None
136
+ if not query_text:
137
+ raise ValueError("K2LlamaIndexRetriever requires a text query")
138
+
139
+ async_client = self._ensure_async_client()
140
+ k2_filters = llama_filters_to_k2(self._filters)
141
+ response = await async_client.search(
142
+ self._corpus_id,
143
+ query_text,
144
+ top_k=self._top_k,
145
+ filters=k2_filters,
146
+ hybrid=self._hybrid,
147
+ rerank=self._rerank,
148
+ return_config=merge_return_config(
149
+ base=self._return_config,
150
+ override=None,
151
+ include_text=True,
152
+ include_scores=True,
153
+ include_provenance=True,
154
+ ),
155
+ )
156
+
157
+ nodes: list[NodeWithScore] = []
158
+ for result in response.get("results", []):
159
+ node = self._result_to_node_with_score(result, self._corpus_id)
160
+ if node is not None:
161
+ nodes.append(node)
162
+ return nodes
@@ -0,0 +1,109 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, cast
4
+
5
+ from sdk import Knowledge2
6
+ from sdk.integrations._client import merge_return_config, resolve_client, resolve_corpus_id
7
+
8
+ try:
9
+ from llama_index.core.tools import FunctionTool
10
+ except ImportError as exc: # pragma: no cover - import-time dependency guard
11
+ raise ImportError(
12
+ "LlamaIndex integration requires llama-index-core. Install with `pip install .[llamaindex]`."
13
+ ) from exc
14
+
15
+
16
+ def create_k2_llamaindex_tools(
17
+ *,
18
+ corpus_id: str | None = None,
19
+ client: Knowledge2 | None = None,
20
+ api_key: str | None = None,
21
+ api_host: str | None = None,
22
+ default_top_k: int = 10,
23
+ default_hybrid: dict[str, Any] | None = None,
24
+ default_generation: dict[str, Any] | None = None,
25
+ ) -> list[FunctionTool]:
26
+ """Create LlamaIndex FunctionTool instances for K2 workflows."""
27
+ resolved_client = resolve_client(client=client, api_key=api_key, api_host=api_host)
28
+ resolved_corpus_id = resolve_corpus_id(corpus_id)
29
+
30
+ def k2_search(
31
+ query: str,
32
+ top_k: int = default_top_k,
33
+ filters: dict[str, Any] | None = None,
34
+ ) -> dict[str, Any]:
35
+ """Search a K2 corpus and return scored chunks."""
36
+ return cast(
37
+ "dict[str, Any]",
38
+ resolved_client.search(
39
+ resolved_corpus_id,
40
+ query,
41
+ top_k=top_k,
42
+ filters=filters,
43
+ hybrid=default_hybrid,
44
+ return_config=merge_return_config(base=None, override=None),
45
+ ),
46
+ )
47
+
48
+ def k2_ingest_text(
49
+ raw_text: str,
50
+ source_uri: str | None = None,
51
+ metadata: dict[str, Any] | None = None,
52
+ auto_index: bool = False,
53
+ ) -> dict[str, Any]:
54
+ """Ingest a text document into K2."""
55
+ return cast(
56
+ "dict[str, Any]",
57
+ resolved_client.upload_document(
58
+ resolved_corpus_id,
59
+ raw_text=raw_text,
60
+ source_uri=source_uri,
61
+ metadata=metadata,
62
+ auto_index=auto_index,
63
+ ),
64
+ )
65
+
66
+ def k2_build_indexes(
67
+ dense: bool = True,
68
+ sparse: bool = True,
69
+ mode: str = "incremental",
70
+ wait: bool = True,
71
+ ) -> dict[str, Any]:
72
+ """Trigger K2 index build for the current corpus."""
73
+ return cast(
74
+ "dict[str, Any]",
75
+ resolved_client.build_indexes(
76
+ resolved_corpus_id,
77
+ dense=dense,
78
+ sparse=sparse,
79
+ mode=mode,
80
+ wait=wait,
81
+ ),
82
+ )
83
+
84
+ def k2_generate_answer(
85
+ query: str,
86
+ top_k: int = default_top_k,
87
+ filters: dict[str, Any] | None = None,
88
+ generation: dict[str, Any] | None = None,
89
+ ) -> dict[str, Any]:
90
+ """Generate a grounded answer using K2 retrieval + server-side LLM generation."""
91
+ return cast(
92
+ "dict[str, Any]",
93
+ resolved_client.search_generate(
94
+ resolved_corpus_id,
95
+ query,
96
+ top_k=top_k,
97
+ filters=filters,
98
+ hybrid=default_hybrid,
99
+ generation=generation if generation is not None else default_generation,
100
+ return_config=merge_return_config(base=None, override=None),
101
+ ),
102
+ )
103
+
104
+ return [
105
+ FunctionTool.from_defaults(fn=k2_search, name="k2_search"),
106
+ FunctionTool.from_defaults(fn=k2_ingest_text, name="k2_ingest_text"),
107
+ FunctionTool.from_defaults(fn=k2_build_indexes, name="k2_build_indexes"),
108
+ FunctionTool.from_defaults(fn=k2_generate_answer, name="k2_generate_answer"),
109
+ ]
@@ -0,0 +1,320 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import time
5
+ from typing import Any, ClassVar, Sequence
6
+
7
+ from pydantic import ConfigDict, Field, PrivateAttr
8
+
9
+ from sdk import Knowledge2
10
+ from sdk.integrations._client import merge_return_config, resolve_client, resolve_corpus_id
11
+ from sdk.integrations.llamaindex.filters import llama_filters_to_k2
12
+
13
+ try:
14
+ from llama_index.core.schema import BaseNode, MetadataMode, TextNode
15
+ from llama_index.core.vector_stores.types import (
16
+ BasePydanticVectorStore,
17
+ VectorStoreQuery,
18
+ VectorStoreQueryResult,
19
+ )
20
+ except ImportError as exc: # pragma: no cover - import-time dependency guard
21
+ raise ImportError(
22
+ "LlamaIndex integration requires llama-index-core. Install with `pip install .[llamaindex]`."
23
+ ) from exc
24
+
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ def _node_text(node: BaseNode) -> str:
30
+ """Extract text content from a LlamaIndex node."""
31
+ try:
32
+ text = node.get_content(metadata_mode=MetadataMode.NONE)
33
+ except Exception: # pragma: no cover - defensive for node variants
34
+ text = getattr(node, "text", "")
35
+ return text or ""
36
+
37
+
38
+ def _resolve_source_uri(
39
+ *,
40
+ node: BaseNode,
41
+ ref_doc_id: str | None,
42
+ source_uri_prefix: str,
43
+ ) -> str:
44
+ """Resolve a deterministic source URI for document ingestion."""
45
+ source_node = getattr(node, "source_node", None)
46
+ source_node_id: str | None = None
47
+
48
+ if isinstance(source_node, str):
49
+ source_node_id = source_node.strip() or None
50
+ elif source_node is not None:
51
+ # LlamaIndex usually exposes RelatedNodeInfo here, but some callers may
52
+ # surface a BaseNode-like object. Prefer stable node identifiers.
53
+ raw_source_node_id = getattr(source_node, "node_id", None) or getattr(
54
+ source_node, "id_", None
55
+ )
56
+ if raw_source_node_id:
57
+ source_node_id = str(raw_source_node_id).strip() or None
58
+
59
+ if source_node_id:
60
+ if "://" in source_node_id:
61
+ return source_node_id
62
+ return f"{source_uri_prefix}{source_node_id}"
63
+ if ref_doc_id:
64
+ return f"{source_uri_prefix}{ref_doc_id}"
65
+ return f"{source_uri_prefix}{node.node_id}"
66
+
67
+
68
+ def _resolve_result_doc_id(*, chunk_id: str, metadata: dict[str, Any]) -> str:
69
+ """Resolve a stable document identifier from chunk metadata when available."""
70
+ provenance = metadata.get("provenance")
71
+ candidates: list[Any] = []
72
+ if isinstance(provenance, dict):
73
+ candidates.extend(
74
+ (
75
+ provenance.get("document_id"),
76
+ provenance.get("doc_id"),
77
+ )
78
+ )
79
+ candidates.extend(
80
+ (
81
+ metadata.get("document_id"),
82
+ metadata.get("doc_id"),
83
+ )
84
+ )
85
+ for candidate in candidates:
86
+ if candidate is None:
87
+ continue
88
+ value = str(candidate).strip()
89
+ if value:
90
+ return value
91
+ return chunk_id
92
+
93
+
94
+ class K2LlamaIndexVectorStore(BasePydanticVectorStore):
95
+ """Doc-centric LlamaIndex VectorStore adapter for Knowledge2.
96
+
97
+ This adapter maps LlamaIndex vector-store operations onto K2 document/search APIs.
98
+ """
99
+
100
+ stores_text: bool = True
101
+
102
+ k2_client: Any | None = Field(default=None, alias="client", exclude=True)
103
+ api_key: str | None = Field(default=None, exclude=True, repr=False)
104
+ api_host: str | None = None
105
+ corpus_id: str | None = None
106
+
107
+ top_k: int = 10
108
+ filters: dict[str, Any] | None = None
109
+ hybrid: dict[str, Any] | None = None
110
+ rerank: dict[str, Any] | None = None
111
+ return_config: dict[str, Any] | None = None
112
+
113
+ auto_index_on_add: bool = False
114
+ # LlamaIndex vector-store APIs are typically synchronous. K2 ingestion happens via
115
+ # background jobs, so we optionally wait for ingest completion on add to avoid
116
+ # surprising "no chunks to index" failures when users build indexes immediately.
117
+ wait_for_ingest_on_add: bool = True
118
+ ingest_poll_s: int = 2
119
+ ingest_timeout_s: float | None = 300.0
120
+ source_uri_prefix: str = "llamaindex://node/"
121
+
122
+ _client: Knowledge2 | Any = PrivateAttr()
123
+ _corpus_id: str = PrivateAttr()
124
+ _node_to_doc_id: dict[str, str] = PrivateAttr(default_factory=dict)
125
+
126
+ model_config: ClassVar[ConfigDict] = ConfigDict(
127
+ arbitrary_types_allowed=True,
128
+ populate_by_name=True,
129
+ )
130
+
131
+ def model_post_init(self, __context: Any) -> None:
132
+ self._client = resolve_client(
133
+ client=self.k2_client, api_key=self.api_key, api_host=self.api_host
134
+ )
135
+ self._corpus_id = resolve_corpus_id(self.corpus_id)
136
+
137
+ @property
138
+ def client(self) -> Any:
139
+ """Expose the underlying Knowledge2 client per BasePydanticVectorStore contract."""
140
+ return self._client
141
+
142
+ def get(self, text_id: str) -> list[float]:
143
+ """K2 does not expose direct embedding lookup by id."""
144
+ raise NotImplementedError("K2 does not support vector lookup by text_id")
145
+
146
+ def _wait_for_ingest_job(self, job_id: str) -> None:
147
+ if not hasattr(self._client, "get_job"):
148
+ logger.debug(
149
+ "Skipping ingest wait; no public get_job method on client for job=%s",
150
+ job_id,
151
+ )
152
+ return
153
+
154
+ start = time.monotonic()
155
+ while True:
156
+ job = self._client.get_job(job_id)
157
+ status = job.get("status")
158
+ if status in {"succeeded", "failed", "canceled"}:
159
+ if status != "succeeded":
160
+ error_message = job.get("error_message")
161
+ if not error_message:
162
+ error_message = f"Job {job_id} ended with status={status}"
163
+ raise RuntimeError(error_message)
164
+ return
165
+
166
+ if (
167
+ self.ingest_timeout_s is not None
168
+ and (time.monotonic() - start) > self.ingest_timeout_s
169
+ ):
170
+ raise TimeoutError(f"Timed out waiting for ingest job {job_id}")
171
+
172
+ time.sleep(self.ingest_poll_s)
173
+
174
+ def add(self, nodes: Sequence[BaseNode], **add_kwargs: Any) -> list[str]:
175
+ """Add nodes by ingesting documents into K2."""
176
+ added_doc_ids: list[str] = []
177
+ wait_for_ingest = add_kwargs.get("wait")
178
+ if wait_for_ingest is None:
179
+ wait_for_ingest = self.wait_for_ingest_on_add
180
+ wait_for_ingest = bool(wait_for_ingest)
181
+ log_jobs = bool(add_kwargs.get("log_jobs", False))
182
+
183
+ for node in nodes:
184
+ node_id = node.node_id
185
+ ref_doc_id = getattr(node, "ref_doc_id", None)
186
+ source_uri = _resolve_source_uri(
187
+ node=node,
188
+ ref_doc_id=ref_doc_id,
189
+ source_uri_prefix=self.source_uri_prefix,
190
+ )
191
+
192
+ metadata = dict(getattr(node, "metadata", {}) or {})
193
+ metadata.setdefault("llama_node_id", node_id)
194
+ if ref_doc_id:
195
+ metadata.setdefault("llama_ref_doc_id", ref_doc_id)
196
+
197
+ response = self._client.upload_document(
198
+ self._corpus_id,
199
+ raw_text=_node_text(node),
200
+ source_uri=source_uri,
201
+ metadata=metadata,
202
+ auto_index=False,
203
+ )
204
+ doc_id = response["id"]
205
+ added_doc_ids.append(doc_id)
206
+ self._node_to_doc_id[node_id] = doc_id
207
+ if ref_doc_id:
208
+ self._node_to_doc_id[ref_doc_id] = doc_id
209
+
210
+ job_id = response.get("job_id")
211
+ if wait_for_ingest and job_id:
212
+ if log_jobs:
213
+ # Avoid noisy polling here; the smoke runner already prints job transitions.
214
+ # This just makes job creation visible in logs when desired.
215
+ logger.info(
216
+ "[job] job_id=%s job_type=ingest_document status=created doc_id=%s",
217
+ job_id,
218
+ doc_id,
219
+ )
220
+ self._wait_for_ingest_job(job_id)
221
+
222
+ if self.auto_index_on_add and added_doc_ids:
223
+ self._client.build_indexes(
224
+ self._corpus_id, dense=True, sparse=True, mode="incremental", wait=True
225
+ )
226
+
227
+ return added_doc_ids
228
+
229
+ def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
230
+ """Delete a document from K2 by mapped ref_doc_id or raw doc_id."""
231
+ doc_id = self._node_to_doc_id.get(ref_doc_id, ref_doc_id)
232
+ reindex = bool(delete_kwargs.get("reindex", False))
233
+ self._client.delete_document(self._corpus_id, doc_id, confirm=True, reindex=reindex)
234
+
235
+ drop_keys = [key for key, value in self._node_to_doc_id.items() if value == doc_id]
236
+ for key in drop_keys:
237
+ self._node_to_doc_id.pop(key, None)
238
+
239
+ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
240
+ """Query K2 and return LlamaIndex vector-store query results."""
241
+ query_str = query.query_str or kwargs.get("query_str")
242
+ if not query_str:
243
+ raise ValueError(
244
+ "K2LlamaIndexVectorStore requires text queries; embedding-only VectorStoreQuery is unsupported"
245
+ )
246
+
247
+ query_top_k = query.similarity_top_k or kwargs.get("similarity_top_k") or self.top_k
248
+
249
+ query_filters = kwargs.get("filters")
250
+ if query_filters is None and query.filters is not None:
251
+ query_filters = llama_filters_to_k2(query.filters)
252
+ if query_filters is None:
253
+ query_filters = self.filters
254
+
255
+ response = self._client.search(
256
+ self._corpus_id,
257
+ query_str,
258
+ top_k=int(query_top_k),
259
+ filters=query_filters,
260
+ hybrid=self.hybrid,
261
+ rerank=self.rerank,
262
+ return_config=merge_return_config(
263
+ base=self.return_config,
264
+ override=None,
265
+ include_text=True,
266
+ include_scores=True,
267
+ include_provenance=True,
268
+ ),
269
+ )
270
+
271
+ ids: list[str] = []
272
+ nodes: list[BaseNode] = []
273
+ similarities: list[float] = []
274
+
275
+ for result in response.get("results", []):
276
+ chunk_id = result.get("chunk_id")
277
+ if not chunk_id:
278
+ continue
279
+
280
+ custom_meta = result.get("custom_metadata") or {}
281
+ system_meta = result.get("system_metadata") or {}
282
+ if not custom_meta and not system_meta:
283
+ legacy = result.get("metadata")
284
+ if isinstance(legacy, dict):
285
+ custom_meta = legacy
286
+ if not isinstance(custom_meta, dict):
287
+ custom_meta = {}
288
+ if not isinstance(system_meta, dict):
289
+ system_meta = {}
290
+ chunk_metadata = {**system_meta, **custom_meta}
291
+ doc_id = _resolve_result_doc_id(chunk_id=chunk_id, metadata=chunk_metadata)
292
+
293
+ score = result.get("score")
294
+ if score is None:
295
+ score = result.get("raw_score")
296
+ if score is None:
297
+ score = 0.0
298
+
299
+ node = TextNode(
300
+ id_=chunk_id,
301
+ text=result.get("text") or "",
302
+ metadata={
303
+ **chunk_metadata,
304
+ "document_id": doc_id,
305
+ "chunk_id": chunk_id,
306
+ "corpus_id": self._corpus_id,
307
+ "raw_score": result.get("raw_score"),
308
+ },
309
+ )
310
+
311
+ # Keep query IDs delete-compatible with doc-centric write/delete semantics.
312
+ ids.append(doc_id)
313
+ nodes.append(node)
314
+ similarities.append(float(score))
315
+
316
+ return VectorStoreQueryResult(nodes=nodes, ids=ids, similarities=similarities)
317
+
318
+ def persist(self, persist_path: str, fs: Any = None) -> None:
319
+ """No-op: K2 persists state remotely in the K2 backend."""
320
+ return
sdk/models/__init__.py ADDED
@@ -0,0 +1,18 @@
1
+ """Pydantic response models for the Knowledge2 SDK.
2
+
3
+ Requires: pip install knowledge2[pydantic]
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+
9
+ def __getattr__(name: str):
10
+ """Provide a helpful error when importing model classes without pydantic."""
11
+ try:
12
+ import pydantic
13
+ except ImportError:
14
+ raise ImportError(
15
+ f"Cannot import '{name}' — Pydantic response models require the optional "
16
+ "'pydantic' dependency. Install with: pip install 'knowledge2[pydantic]'"
17
+ ) from None
18
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
sdk/models/_base.py ADDED
@@ -0,0 +1,24 @@
1
+ """Base Pydantic model for Knowledge2 API responses."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pydantic import BaseModel, ConfigDict
6
+
7
+
8
+ class K2BaseModel(BaseModel):
9
+ """Base model for all Knowledge2 API response models.
10
+
11
+ Configuration:
12
+ - extra="allow": unknown server fields are preserved, not rejected.
13
+ - validate_assignment=True: re-validates when fields are set post-init.
14
+ - use_enum_values=True: enum fields store the value, not the enum member.
15
+ - populate_by_name=True: allows field access by both Python name and alias.
16
+ """
17
+
18
+ model_config = ConfigDict(
19
+ extra="allow",
20
+ validate_assignment=True,
21
+ validate_by_alias=True,
22
+ use_enum_values=True,
23
+ populate_by_name=True,
24
+ )