knowledge2 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowledge2-0.4.0.dist-info/METADATA +556 -0
- knowledge2-0.4.0.dist-info/RECORD +139 -0
- knowledge2-0.4.0.dist-info/WHEEL +5 -0
- knowledge2-0.4.0.dist-info/top_level.txt +1 -0
- sdk/__init__.py +70 -0
- sdk/_async_base.py +525 -0
- sdk/_async_paging.py +57 -0
- sdk/_base.py +541 -0
- sdk/_logging.py +41 -0
- sdk/_paging.py +73 -0
- sdk/_preview.py +70 -0
- sdk/_raw_response.py +25 -0
- sdk/_request_options.py +51 -0
- sdk/_transport.py +144 -0
- sdk/_validation.py +25 -0
- sdk/_validation_response.py +36 -0
- sdk/_version.py +3 -0
- sdk/async_client.py +320 -0
- sdk/async_resources/__init__.py +45 -0
- sdk/async_resources/_mixin_base.py +42 -0
- sdk/async_resources/a2a.py +230 -0
- sdk/async_resources/agents.py +489 -0
- sdk/async_resources/audit.py +145 -0
- sdk/async_resources/auth.py +133 -0
- sdk/async_resources/console.py +409 -0
- sdk/async_resources/corpora.py +276 -0
- sdk/async_resources/deployments.py +106 -0
- sdk/async_resources/documents.py +592 -0
- sdk/async_resources/feeds.py +248 -0
- sdk/async_resources/indexes.py +208 -0
- sdk/async_resources/jobs.py +165 -0
- sdk/async_resources/metadata.py +48 -0
- sdk/async_resources/models.py +102 -0
- sdk/async_resources/onboarding.py +538 -0
- sdk/async_resources/orgs.py +37 -0
- sdk/async_resources/pipelines.py +523 -0
- sdk/async_resources/projects.py +90 -0
- sdk/async_resources/search.py +262 -0
- sdk/async_resources/training.py +357 -0
- sdk/async_resources/usage.py +91 -0
- sdk/client.py +417 -0
- sdk/config.py +182 -0
- sdk/errors.py +178 -0
- sdk/examples/auth_factory.py +34 -0
- sdk/examples/batch_operations.py +57 -0
- sdk/examples/document_upload.py +56 -0
- sdk/examples/e2e_lifecycle.py +213 -0
- sdk/examples/error_handling.py +61 -0
- sdk/examples/pagination.py +64 -0
- sdk/examples/quickstart.py +36 -0
- sdk/examples/request_options.py +44 -0
- sdk/examples/search.py +64 -0
- sdk/integrations/__init__.py +57 -0
- sdk/integrations/_client.py +101 -0
- sdk/integrations/langchain/__init__.py +6 -0
- sdk/integrations/langchain/retriever.py +166 -0
- sdk/integrations/langchain/tools.py +108 -0
- sdk/integrations/llamaindex/__init__.py +11 -0
- sdk/integrations/llamaindex/filters.py +78 -0
- sdk/integrations/llamaindex/retriever.py +162 -0
- sdk/integrations/llamaindex/tools.py +109 -0
- sdk/integrations/llamaindex/vector_store.py +320 -0
- sdk/models/__init__.py +18 -0
- sdk/models/_base.py +24 -0
- sdk/models/_registry.py +457 -0
- sdk/models/a2a.py +92 -0
- sdk/models/agents.py +109 -0
- sdk/models/audit.py +28 -0
- sdk/models/auth.py +49 -0
- sdk/models/chunks.py +20 -0
- sdk/models/common.py +14 -0
- sdk/models/console.py +103 -0
- sdk/models/corpora.py +48 -0
- sdk/models/deployments.py +13 -0
- sdk/models/documents.py +126 -0
- sdk/models/embeddings.py +24 -0
- sdk/models/evaluation.py +17 -0
- sdk/models/feedback.py +9 -0
- sdk/models/feeds.py +57 -0
- sdk/models/indexes.py +36 -0
- sdk/models/jobs.py +52 -0
- sdk/models/models.py +26 -0
- sdk/models/onboarding.py +323 -0
- sdk/models/orgs.py +11 -0
- sdk/models/pipelines.py +147 -0
- sdk/models/projects.py +19 -0
- sdk/models/search.py +149 -0
- sdk/models/training.py +57 -0
- sdk/models/usage.py +39 -0
- sdk/namespaces.py +386 -0
- sdk/py.typed +0 -0
- sdk/resources/__init__.py +45 -0
- sdk/resources/_mixin_base.py +40 -0
- sdk/resources/a2a.py +230 -0
- sdk/resources/agents.py +487 -0
- sdk/resources/audit.py +144 -0
- sdk/resources/auth.py +138 -0
- sdk/resources/console.py +411 -0
- sdk/resources/corpora.py +269 -0
- sdk/resources/deployments.py +105 -0
- sdk/resources/documents.py +597 -0
- sdk/resources/feeds.py +246 -0
- sdk/resources/indexes.py +210 -0
- sdk/resources/jobs.py +164 -0
- sdk/resources/metadata.py +53 -0
- sdk/resources/models.py +99 -0
- sdk/resources/onboarding.py +542 -0
- sdk/resources/orgs.py +35 -0
- sdk/resources/pipeline_builder.py +257 -0
- sdk/resources/pipelines.py +520 -0
- sdk/resources/projects.py +87 -0
- sdk/resources/search.py +277 -0
- sdk/resources/training.py +358 -0
- sdk/resources/usage.py +92 -0
- sdk/types/__init__.py +366 -0
- sdk/types/a2a.py +88 -0
- sdk/types/agents.py +133 -0
- sdk/types/audit.py +26 -0
- sdk/types/auth.py +45 -0
- sdk/types/chunks.py +18 -0
- sdk/types/common.py +10 -0
- sdk/types/console.py +99 -0
- sdk/types/corpora.py +42 -0
- sdk/types/deployments.py +11 -0
- sdk/types/documents.py +104 -0
- sdk/types/embeddings.py +22 -0
- sdk/types/evaluation.py +15 -0
- sdk/types/feedback.py +7 -0
- sdk/types/feeds.py +61 -0
- sdk/types/indexes.py +30 -0
- sdk/types/jobs.py +50 -0
- sdk/types/models.py +22 -0
- sdk/types/onboarding.py +395 -0
- sdk/types/orgs.py +9 -0
- sdk/types/pipelines.py +177 -0
- sdk/types/projects.py +14 -0
- sdk/types/search.py +116 -0
- sdk/types/training.py +55 -0
- sdk/types/usage.py +37 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Minimal example: create client, project, corpus, upload document, build indexes, search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
from sdk import Knowledge2, Knowledge2Error
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
api_key = os.environ.get("K2_API_KEY")
|
|
11
|
+
if not api_key:
|
|
12
|
+
raise SystemExit("K2_API_KEY is required")
|
|
13
|
+
|
|
14
|
+
client = Knowledge2(
|
|
15
|
+
api_host=os.environ.get("K2_BASE_URL", "https://api.knowledge2.ai"),
|
|
16
|
+
api_key=api_key,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
project = client.create_project("quickstart-demo")
|
|
20
|
+
corpus = client.create_corpus(project["id"], "quickstart-corpus")
|
|
21
|
+
|
|
22
|
+
client.upload_document(
|
|
23
|
+
corpus["id"],
|
|
24
|
+
raw_text="Knowledge2 enables semantic search over your documents.",
|
|
25
|
+
source_uri="doc://intro",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
client.build_indexes(corpus["id"], dense=True, sparse=True, wait=True)
|
|
29
|
+
|
|
30
|
+
results = client.search(corpus["id"], "What does Knowledge2 do?", top_k=5)
|
|
31
|
+
for r in results.get("results", []):
|
|
32
|
+
print((r.get("text") or "")[:80])
|
|
33
|
+
|
|
34
|
+
except Knowledge2Error as e:
|
|
35
|
+
print(f"API error: {e}")
|
|
36
|
+
raise
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Per-call request options — override timeout and retry for specific calls.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
K2_API_KEY=... python -m sdk.examples.request_options
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
from sdk import ClientTimeouts, Knowledge2, RequestOptions
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def main() -> None:
|
|
13
|
+
client = Knowledge2(
|
|
14
|
+
api_host=os.environ.get("K2_API_HOST", "https://api.knowledge2.ai"),
|
|
15
|
+
api_key=os.environ["K2_API_KEY"],
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Longer timeout for a known-slow bulk operation
|
|
19
|
+
slow_opts = RequestOptions(
|
|
20
|
+
timeout=ClientTimeouts(read=300),
|
|
21
|
+
max_retries=5,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Passthrough tracing headers
|
|
25
|
+
traced_opts = RequestOptions(
|
|
26
|
+
passthrough_headers={
|
|
27
|
+
"X-Request-ID": "example-123",
|
|
28
|
+
"X-Correlation-ID": "trace-abc",
|
|
29
|
+
},
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Use per-call options
|
|
33
|
+
corpora = client.list_corpora(request_options=traced_opts)
|
|
34
|
+
print(f"Found {corpora.total} corpora (with tracing headers)")
|
|
35
|
+
|
|
36
|
+
# Raw response access
|
|
37
|
+
raw = client.with_raw_response.list_corpora()
|
|
38
|
+
print(f"Status: {raw.status_code}")
|
|
39
|
+
print(f"Headers: {dict(list(raw.headers.items())[:3])}...")
|
|
40
|
+
print(f"Parsed: {type(raw.parsed).__name__} with {len(raw.parsed)} items")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
if __name__ == "__main__":
|
|
44
|
+
main()
|
sdk/examples/search.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Search examples: basic search, hybrid config, filters, search_generate (RAG)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
from sdk import Knowledge2, Knowledge2Error
|
|
8
|
+
from sdk.types.search import SearchHybridConfig
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
api_key = os.environ.get("K2_API_KEY")
|
|
12
|
+
if not api_key:
|
|
13
|
+
raise SystemExit("K2_API_KEY is required")
|
|
14
|
+
|
|
15
|
+
client = Knowledge2(api_key=api_key)
|
|
16
|
+
corpus_id = os.environ.get("K2_CORPUS_ID", "corpus-123")
|
|
17
|
+
|
|
18
|
+
# Basic search
|
|
19
|
+
basic = client.search(corpus_id, "machine learning basics", top_k=5)
|
|
20
|
+
for r in basic.get("results", []):
|
|
21
|
+
print((r.get("text") or "")[:80])
|
|
22
|
+
|
|
23
|
+
# Hybrid search with TypedDict config
|
|
24
|
+
hybrid_config: SearchHybridConfig = {
|
|
25
|
+
"enabled": True,
|
|
26
|
+
"fusion_mode": "rrf",
|
|
27
|
+
"rrf_k": 60,
|
|
28
|
+
"dense_weight": 0.6,
|
|
29
|
+
"sparse_weight": 0.4,
|
|
30
|
+
}
|
|
31
|
+
hybrid = client.search(
|
|
32
|
+
corpus_id,
|
|
33
|
+
"neural networks",
|
|
34
|
+
top_k=5,
|
|
35
|
+
hybrid=hybrid_config,
|
|
36
|
+
return_config={"include_text": True, "include_scores": True},
|
|
37
|
+
)
|
|
38
|
+
for r in hybrid.get("results", []):
|
|
39
|
+
print(r.get("score"), (r.get("text") or "")[:60])
|
|
40
|
+
|
|
41
|
+
# Search with metadata filters
|
|
42
|
+
filtered = client.search(
|
|
43
|
+
corpus_id,
|
|
44
|
+
"API documentation",
|
|
45
|
+
top_k=5,
|
|
46
|
+
filters={"topic": "docs", "org": "acme"},
|
|
47
|
+
)
|
|
48
|
+
for r in filtered.get("results", []):
|
|
49
|
+
print(r.get("chunk_id"), (r.get("text") or "")[:60])
|
|
50
|
+
|
|
51
|
+
# RAG: search_generate returns answer + retrieval results
|
|
52
|
+
rag = client.search_generate(
|
|
53
|
+
corpus_id,
|
|
54
|
+
"Summarize best practices for hybrid search.",
|
|
55
|
+
top_k=5,
|
|
56
|
+
generation={"temperature": 0.3, "max_tokens": 500},
|
|
57
|
+
)
|
|
58
|
+
print("Answer:", (rag.get("answer") or "")[:200])
|
|
59
|
+
for r in rag.get("results", []):
|
|
60
|
+
print("Source:", (r.get("text") or "")[:80])
|
|
61
|
+
|
|
62
|
+
except Knowledge2Error as e:
|
|
63
|
+
print(f"API error: {e}")
|
|
64
|
+
raise
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Framework integrations for the Knowledge2 SDK.
|
|
2
|
+
|
|
3
|
+
These imports are lazy so users can install only one framework extra.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"K2LangChainRetriever",
|
|
12
|
+
"K2LlamaIndexRetriever",
|
|
13
|
+
"K2LlamaIndexVectorStore",
|
|
14
|
+
"create_k2_langchain_tools",
|
|
15
|
+
"create_k2_llamaindex_tools",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
19
|
+
# These are resolved lazily at runtime via __getattr__ so optional dependencies
|
|
20
|
+
# (langchain-core / llama-index-core) are not required for base installs.
|
|
21
|
+
from sdk.integrations.langchain import K2LangChainRetriever, create_k2_langchain_tools
|
|
22
|
+
from sdk.integrations.llamaindex import (
|
|
23
|
+
K2LlamaIndexRetriever,
|
|
24
|
+
K2LlamaIndexVectorStore,
|
|
25
|
+
create_k2_llamaindex_tools,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def __getattr__(name: str):
|
|
30
|
+
if name in {"K2LangChainRetriever", "create_k2_langchain_tools"}:
|
|
31
|
+
from sdk.integrations.langchain import K2LangChainRetriever, create_k2_langchain_tools
|
|
32
|
+
|
|
33
|
+
mapping = {
|
|
34
|
+
"K2LangChainRetriever": K2LangChainRetriever,
|
|
35
|
+
"create_k2_langchain_tools": create_k2_langchain_tools,
|
|
36
|
+
}
|
|
37
|
+
return mapping[name]
|
|
38
|
+
|
|
39
|
+
if name in {
|
|
40
|
+
"K2LlamaIndexRetriever",
|
|
41
|
+
"K2LlamaIndexVectorStore",
|
|
42
|
+
"create_k2_llamaindex_tools",
|
|
43
|
+
}:
|
|
44
|
+
from sdk.integrations.llamaindex import (
|
|
45
|
+
K2LlamaIndexRetriever,
|
|
46
|
+
K2LlamaIndexVectorStore,
|
|
47
|
+
create_k2_llamaindex_tools,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
mapping = {
|
|
51
|
+
"K2LlamaIndexRetriever": K2LlamaIndexRetriever,
|
|
52
|
+
"K2LlamaIndexVectorStore": K2LlamaIndexVectorStore,
|
|
53
|
+
"create_k2_llamaindex_tools": create_k2_llamaindex_tools,
|
|
54
|
+
}
|
|
55
|
+
return mapping[name]
|
|
56
|
+
|
|
57
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from sdk import AsyncKnowledge2, Knowledge2
|
|
6
|
+
|
|
7
|
+
DEFAULT_K2_API_HOST = "https://api.knowledge2.ai"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def resolve_async_client(
|
|
11
|
+
*,
|
|
12
|
+
sync_client: Knowledge2 | None = None,
|
|
13
|
+
api_key: str | None = None,
|
|
14
|
+
api_host: str | None = None,
|
|
15
|
+
) -> AsyncKnowledge2:
|
|
16
|
+
"""Return a configured AsyncKnowledge2 client.
|
|
17
|
+
|
|
18
|
+
When a *sync_client* is provided, the async client is created with
|
|
19
|
+
the same credentials (api_key, base_url, admin_token).
|
|
20
|
+
|
|
21
|
+
Preference order:
|
|
22
|
+
1) credentials from sync_client
|
|
23
|
+
2) api_key/api_host parameters
|
|
24
|
+
3) environment variables K2_API_KEY/K2_BASE_URL
|
|
25
|
+
"""
|
|
26
|
+
if sync_client is not None:
|
|
27
|
+
# NOTE: validate_responses is intentionally NOT propagated here.
|
|
28
|
+
# Framework integrations (LangChain, LlamaIndex) call .get() on
|
|
29
|
+
# results expecting raw dicts; Pydantic models would break them.
|
|
30
|
+
return AsyncKnowledge2(
|
|
31
|
+
api_key=sync_client.api_key,
|
|
32
|
+
api_host=sync_client.base_url,
|
|
33
|
+
admin_token=sync_client.admin_token,
|
|
34
|
+
bearer_token=sync_client.bearer_token,
|
|
35
|
+
bearer_token_factory=sync_client._bearer_token_factory,
|
|
36
|
+
token_cache_ttl=sync_client._token_cache_ttl,
|
|
37
|
+
org_id=sync_client.org_id,
|
|
38
|
+
headers=dict(sync_client._default_headers) if sync_client._default_headers else None,
|
|
39
|
+
max_retries=sync_client._max_retries,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
resolved_api_key = api_key or os.getenv("K2_API_KEY")
|
|
43
|
+
if not resolved_api_key:
|
|
44
|
+
raise ValueError("K2 API key required: pass api_key/client or set K2_API_KEY")
|
|
45
|
+
|
|
46
|
+
resolved_api_host = api_host or os.getenv("K2_BASE_URL") or DEFAULT_K2_API_HOST
|
|
47
|
+
return AsyncKnowledge2(api_key=resolved_api_key, api_host=resolved_api_host)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def resolve_client(
|
|
51
|
+
*,
|
|
52
|
+
client: Knowledge2 | None,
|
|
53
|
+
api_key: str | None,
|
|
54
|
+
api_host: str | None,
|
|
55
|
+
) -> Knowledge2:
|
|
56
|
+
"""Return a configured Knowledge2 client.
|
|
57
|
+
|
|
58
|
+
Preference order:
|
|
59
|
+
1) explicit client
|
|
60
|
+
2) api_key/api_host parameters
|
|
61
|
+
3) environment variables K2_API_KEY/K2_BASE_URL
|
|
62
|
+
"""
|
|
63
|
+
if client is not None:
|
|
64
|
+
return client
|
|
65
|
+
|
|
66
|
+
resolved_api_key = api_key or os.getenv("K2_API_KEY")
|
|
67
|
+
if not resolved_api_key:
|
|
68
|
+
raise ValueError("K2 API key required: pass api_key/client or set K2_API_KEY")
|
|
69
|
+
|
|
70
|
+
# NOTE: typeshed's os.getenv() typing keeps this as Optional[str] even with a default.
|
|
71
|
+
resolved_api_host = api_host or os.getenv("K2_BASE_URL") or DEFAULT_K2_API_HOST
|
|
72
|
+
return Knowledge2(api_key=resolved_api_key, api_host=resolved_api_host)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def resolve_corpus_id(corpus_id: str | None) -> str:
|
|
76
|
+
"""Resolve corpus_id from argument or environment."""
|
|
77
|
+
resolved_corpus_id = corpus_id or os.getenv("K2_CORPUS_ID")
|
|
78
|
+
if not resolved_corpus_id:
|
|
79
|
+
raise ValueError("K2 corpus_id required: pass corpus_id or set K2_CORPUS_ID")
|
|
80
|
+
return resolved_corpus_id
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def merge_return_config(
|
|
84
|
+
*,
|
|
85
|
+
base: dict[str, object] | None,
|
|
86
|
+
override: dict[str, object] | None,
|
|
87
|
+
include_text: bool = True,
|
|
88
|
+
include_scores: bool = True,
|
|
89
|
+
include_provenance: bool = True,
|
|
90
|
+
) -> dict[str, object]:
|
|
91
|
+
"""Merge return configs with safe defaults for framework adapters."""
|
|
92
|
+
merged: dict[str, object] = {
|
|
93
|
+
"include_text": include_text,
|
|
94
|
+
"include_scores": include_scores,
|
|
95
|
+
"include_provenance": include_provenance,
|
|
96
|
+
}
|
|
97
|
+
if base:
|
|
98
|
+
merged.update(base)
|
|
99
|
+
if override:
|
|
100
|
+
merged.update(override)
|
|
101
|
+
return merged
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, ClassVar
|
|
4
|
+
|
|
5
|
+
from pydantic import ConfigDict, Field, PrivateAttr
|
|
6
|
+
|
|
7
|
+
from sdk import AsyncKnowledge2, Knowledge2
|
|
8
|
+
from sdk.integrations._client import (
|
|
9
|
+
merge_return_config,
|
|
10
|
+
resolve_async_client,
|
|
11
|
+
resolve_client,
|
|
12
|
+
resolve_corpus_id,
|
|
13
|
+
)
|
|
14
|
+
from sdk.types import SearchResult
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from langchain_core.callbacks import (
|
|
18
|
+
AsyncCallbackManagerForRetrieverRun,
|
|
19
|
+
CallbackManagerForRetrieverRun,
|
|
20
|
+
)
|
|
21
|
+
from langchain_core.documents import Document
|
|
22
|
+
from langchain_core.retrievers import BaseRetriever
|
|
23
|
+
except ImportError as exc: # pragma: no cover - import-time dependency guard
|
|
24
|
+
raise ImportError(
|
|
25
|
+
"LangChain integration requires langchain-core. Install with `pip install .[langchain]`."
|
|
26
|
+
) from exc
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class K2LangChainRetriever(BaseRetriever):
|
|
30
|
+
"""LangChain retriever backed by Knowledge2 search APIs."""
|
|
31
|
+
|
|
32
|
+
model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True)
|
|
33
|
+
|
|
34
|
+
client: Any | None = Field(default=None, exclude=True)
|
|
35
|
+
api_key: str | None = Field(default=None, exclude=True, repr=False)
|
|
36
|
+
api_host: str | None = None
|
|
37
|
+
corpus_id: str | None = None
|
|
38
|
+
|
|
39
|
+
top_k: int = 10
|
|
40
|
+
filters: dict[str, Any] | None = None
|
|
41
|
+
hybrid: dict[str, Any] | None = None
|
|
42
|
+
rerank: dict[str, Any] | None = None
|
|
43
|
+
return_config: dict[str, Any] | None = None
|
|
44
|
+
|
|
45
|
+
_client: Knowledge2 | Any = PrivateAttr()
|
|
46
|
+
_async_client: AsyncKnowledge2 | None = PrivateAttr(default=None)
|
|
47
|
+
_corpus_id: str = PrivateAttr()
|
|
48
|
+
|
|
49
|
+
def model_post_init(self, __context: Any) -> None:
|
|
50
|
+
self._client = resolve_client(
|
|
51
|
+
client=self.client, api_key=self.api_key, api_host=self.api_host
|
|
52
|
+
)
|
|
53
|
+
self._corpus_id = resolve_corpus_id(self.corpus_id)
|
|
54
|
+
|
|
55
|
+
def _ensure_async_client(self) -> AsyncKnowledge2:
|
|
56
|
+
"""Lazily create an AsyncKnowledge2 sharing the sync client's credentials."""
|
|
57
|
+
if self._async_client is None:
|
|
58
|
+
self._async_client = resolve_async_client(sync_client=self._client)
|
|
59
|
+
return self._async_client
|
|
60
|
+
|
|
61
|
+
async def aclose(self) -> None:
|
|
62
|
+
"""Close the lazily-created async client, releasing connections."""
|
|
63
|
+
if self._async_client is not None:
|
|
64
|
+
await self._async_client.close()
|
|
65
|
+
self._async_client = None
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def _result_to_document(result: SearchResult, corpus_id: str) -> Document:
|
|
69
|
+
custom_meta = result.get("custom_metadata") or {}
|
|
70
|
+
system_meta = result.get("system_metadata") or {}
|
|
71
|
+
if not custom_meta and not system_meta:
|
|
72
|
+
legacy = result.get("metadata")
|
|
73
|
+
if isinstance(legacy, dict):
|
|
74
|
+
custom_meta = legacy
|
|
75
|
+
if not isinstance(custom_meta, dict):
|
|
76
|
+
custom_meta = {}
|
|
77
|
+
if not isinstance(system_meta, dict):
|
|
78
|
+
system_meta = {}
|
|
79
|
+
result_metadata = {**system_meta, **custom_meta}
|
|
80
|
+
|
|
81
|
+
metadata: dict[str, Any] = {
|
|
82
|
+
"source": "knowledge2",
|
|
83
|
+
"corpus_id": corpus_id,
|
|
84
|
+
"chunk_id": result.get("chunk_id"),
|
|
85
|
+
"score": result.get("score"),
|
|
86
|
+
"raw_score": result.get("raw_score"),
|
|
87
|
+
"offset_start": result.get("offset_start"),
|
|
88
|
+
"offset_end": result.get("offset_end"),
|
|
89
|
+
"page_start": result.get("page_start"),
|
|
90
|
+
"page_end": result.get("page_end"),
|
|
91
|
+
}
|
|
92
|
+
metadata.update(result_metadata)
|
|
93
|
+
|
|
94
|
+
return Document(page_content=result.get("text") or "", metadata=metadata)
|
|
95
|
+
|
|
96
|
+
def _search(
|
|
97
|
+
self,
|
|
98
|
+
query: str,
|
|
99
|
+
*,
|
|
100
|
+
top_k: int | None = None,
|
|
101
|
+
filters: dict[str, Any] | None = None,
|
|
102
|
+
hybrid: dict[str, Any] | None = None,
|
|
103
|
+
rerank: dict[str, Any] | None = None,
|
|
104
|
+
return_config: dict[str, Any] | None = None,
|
|
105
|
+
) -> list[SearchResult]:
|
|
106
|
+
payload_return_config = merge_return_config(
|
|
107
|
+
base=self.return_config,
|
|
108
|
+
override=return_config,
|
|
109
|
+
include_text=True,
|
|
110
|
+
include_scores=True,
|
|
111
|
+
include_provenance=True,
|
|
112
|
+
)
|
|
113
|
+
response = self._client.search(
|
|
114
|
+
self._corpus_id,
|
|
115
|
+
query,
|
|
116
|
+
top_k=top_k if top_k is not None else self.top_k,
|
|
117
|
+
filters=filters if filters is not None else self.filters,
|
|
118
|
+
hybrid=hybrid if hybrid is not None else self.hybrid,
|
|
119
|
+
rerank=rerank if rerank is not None else self.rerank,
|
|
120
|
+
return_config=payload_return_config,
|
|
121
|
+
)
|
|
122
|
+
return response.get("results", [])
|
|
123
|
+
|
|
124
|
+
def _get_relevant_documents(
|
|
125
|
+
self,
|
|
126
|
+
query: str,
|
|
127
|
+
*,
|
|
128
|
+
run_manager: CallbackManagerForRetrieverRun,
|
|
129
|
+
**kwargs: Any,
|
|
130
|
+
) -> list[Document]:
|
|
131
|
+
results = self._search(
|
|
132
|
+
query,
|
|
133
|
+
top_k=kwargs.get("top_k"),
|
|
134
|
+
filters=kwargs.get("filters"),
|
|
135
|
+
hybrid=kwargs.get("hybrid"),
|
|
136
|
+
rerank=kwargs.get("rerank"),
|
|
137
|
+
return_config=kwargs.get("return_config"),
|
|
138
|
+
)
|
|
139
|
+
return [self._result_to_document(result, self._corpus_id) for result in results]
|
|
140
|
+
|
|
141
|
+
async def _aget_relevant_documents(
|
|
142
|
+
self,
|
|
143
|
+
query: str,
|
|
144
|
+
*,
|
|
145
|
+
run_manager: AsyncCallbackManagerForRetrieverRun,
|
|
146
|
+
**kwargs: Any,
|
|
147
|
+
) -> list[Document]:
|
|
148
|
+
async_client = self._ensure_async_client()
|
|
149
|
+
payload_return_config = merge_return_config(
|
|
150
|
+
base=self.return_config,
|
|
151
|
+
override=kwargs.get("return_config"),
|
|
152
|
+
include_text=True,
|
|
153
|
+
include_scores=True,
|
|
154
|
+
include_provenance=True,
|
|
155
|
+
)
|
|
156
|
+
response = await async_client.search(
|
|
157
|
+
self._corpus_id,
|
|
158
|
+
query,
|
|
159
|
+
top_k=kwargs.get("top_k") if kwargs.get("top_k") is not None else self.top_k, # type: ignore[arg-type]
|
|
160
|
+
filters=kwargs.get("filters") if kwargs.get("filters") is not None else self.filters,
|
|
161
|
+
hybrid=kwargs.get("hybrid") if kwargs.get("hybrid") is not None else self.hybrid,
|
|
162
|
+
rerank=kwargs.get("rerank") if kwargs.get("rerank") is not None else self.rerank,
|
|
163
|
+
return_config=payload_return_config,
|
|
164
|
+
)
|
|
165
|
+
results = response.get("results", [])
|
|
166
|
+
return [self._result_to_document(result, self._corpus_id) for result in results]
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, cast
|
|
4
|
+
|
|
5
|
+
from sdk import Knowledge2
|
|
6
|
+
from sdk.integrations._client import merge_return_config, resolve_client, resolve_corpus_id
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from langchain_core.tools import BaseTool, tool
|
|
10
|
+
except ImportError as exc: # pragma: no cover - import-time dependency guard
|
|
11
|
+
raise ImportError(
|
|
12
|
+
"LangChain integration requires langchain-core. Install with `pip install .[langchain]`."
|
|
13
|
+
) from exc
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def create_k2_langchain_tools(
|
|
17
|
+
*,
|
|
18
|
+
corpus_id: str | None = None,
|
|
19
|
+
client: Knowledge2 | None = None,
|
|
20
|
+
api_key: str | None = None,
|
|
21
|
+
api_host: str | None = None,
|
|
22
|
+
default_top_k: int = 10,
|
|
23
|
+
default_hybrid: dict[str, Any] | None = None,
|
|
24
|
+
default_generation: dict[str, Any] | None = None,
|
|
25
|
+
) -> list[BaseTool]:
|
|
26
|
+
"""Create LangChain tools for K2 search and ingestion flows."""
|
|
27
|
+
resolved_client = resolve_client(client=client, api_key=api_key, api_host=api_host)
|
|
28
|
+
resolved_corpus_id = resolve_corpus_id(corpus_id)
|
|
29
|
+
|
|
30
|
+
@tool("k2_search")
|
|
31
|
+
def k2_search(
|
|
32
|
+
query: str,
|
|
33
|
+
top_k: int = default_top_k,
|
|
34
|
+
filters: dict[str, Any] | None = None,
|
|
35
|
+
) -> dict[str, Any]:
|
|
36
|
+
"""Search a K2 corpus and return scored chunks."""
|
|
37
|
+
return cast(
|
|
38
|
+
"dict[str, Any]",
|
|
39
|
+
resolved_client.search(
|
|
40
|
+
resolved_corpus_id,
|
|
41
|
+
query,
|
|
42
|
+
top_k=top_k,
|
|
43
|
+
filters=filters,
|
|
44
|
+
hybrid=default_hybrid,
|
|
45
|
+
return_config=merge_return_config(base=None, override=None),
|
|
46
|
+
),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
@tool("k2_ingest_text")
|
|
50
|
+
def k2_ingest_text(
|
|
51
|
+
raw_text: str,
|
|
52
|
+
source_uri: str | None = None,
|
|
53
|
+
metadata: dict[str, Any] | None = None,
|
|
54
|
+
auto_index: bool = False,
|
|
55
|
+
) -> dict[str, Any]:
|
|
56
|
+
"""Ingest a text document into K2."""
|
|
57
|
+
return cast(
|
|
58
|
+
"dict[str, Any]",
|
|
59
|
+
resolved_client.upload_document(
|
|
60
|
+
resolved_corpus_id,
|
|
61
|
+
raw_text=raw_text,
|
|
62
|
+
source_uri=source_uri,
|
|
63
|
+
metadata=metadata,
|
|
64
|
+
auto_index=auto_index,
|
|
65
|
+
),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
@tool("k2_build_indexes")
|
|
69
|
+
def k2_build_indexes(
|
|
70
|
+
dense: bool = True,
|
|
71
|
+
sparse: bool = True,
|
|
72
|
+
mode: str = "incremental",
|
|
73
|
+
wait: bool = True,
|
|
74
|
+
) -> dict[str, Any]:
|
|
75
|
+
"""Trigger K2 index build for the current corpus."""
|
|
76
|
+
return cast(
|
|
77
|
+
"dict[str, Any]",
|
|
78
|
+
resolved_client.build_indexes(
|
|
79
|
+
resolved_corpus_id,
|
|
80
|
+
dense=dense,
|
|
81
|
+
sparse=sparse,
|
|
82
|
+
mode=mode,
|
|
83
|
+
wait=wait,
|
|
84
|
+
),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
@tool("k2_generate_answer")
|
|
88
|
+
def k2_generate_answer(
|
|
89
|
+
query: str,
|
|
90
|
+
top_k: int = default_top_k,
|
|
91
|
+
filters: dict[str, Any] | None = None,
|
|
92
|
+
generation: dict[str, Any] | None = None,
|
|
93
|
+
) -> dict[str, Any]:
|
|
94
|
+
"""Generate a grounded answer using K2 retrieval + server-side LLM generation."""
|
|
95
|
+
return cast(
|
|
96
|
+
"dict[str, Any]",
|
|
97
|
+
resolved_client.search_generate(
|
|
98
|
+
resolved_corpus_id,
|
|
99
|
+
query,
|
|
100
|
+
top_k=top_k,
|
|
101
|
+
filters=filters,
|
|
102
|
+
hybrid=default_hybrid,
|
|
103
|
+
generation=generation if generation is not None else default_generation,
|
|
104
|
+
return_config=merge_return_config(base=None, override=None),
|
|
105
|
+
),
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
return [k2_search, k2_ingest_text, k2_build_indexes, k2_generate_answer]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""LlamaIndex integrations for Knowledge2."""
|
|
2
|
+
|
|
3
|
+
from sdk.integrations.llamaindex.retriever import K2LlamaIndexRetriever
|
|
4
|
+
from sdk.integrations.llamaindex.tools import create_k2_llamaindex_tools
|
|
5
|
+
from sdk.integrations.llamaindex.vector_store import K2LlamaIndexVectorStore
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"K2LlamaIndexRetriever",
|
|
9
|
+
"K2LlamaIndexVectorStore",
|
|
10
|
+
"create_k2_llamaindex_tools",
|
|
11
|
+
]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from llama_index.core.vector_stores.types import MetadataFilter, MetadataFilters
|
|
7
|
+
except ImportError as exc: # pragma: no cover - import-time dependency guard
|
|
8
|
+
raise ImportError(
|
|
9
|
+
"LlamaIndex integration requires llama-index-core. Install with `pip install .[llamaindex]`."
|
|
10
|
+
) from exc
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
_OPERATOR_MAP = {
|
|
14
|
+
"eq": "==",
|
|
15
|
+
"==": "==",
|
|
16
|
+
"ne": "!=",
|
|
17
|
+
"!=": "!=",
|
|
18
|
+
"gt": ">",
|
|
19
|
+
">": ">",
|
|
20
|
+
"gte": ">=",
|
|
21
|
+
">=": ">=",
|
|
22
|
+
"lt": "<",
|
|
23
|
+
"<": "<",
|
|
24
|
+
"lte": "<=",
|
|
25
|
+
"<=": "<=",
|
|
26
|
+
"in": "in",
|
|
27
|
+
"nin": "not_in",
|
|
28
|
+
"not_in": "not_in",
|
|
29
|
+
"contains": "contains",
|
|
30
|
+
"text_match": "text_match",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _normalize_enum_name(value: Any, *, default: str = "") -> str:
|
|
35
|
+
if value is None:
|
|
36
|
+
return default
|
|
37
|
+
if hasattr(value, "value"):
|
|
38
|
+
raw = str(value.value)
|
|
39
|
+
else:
|
|
40
|
+
raw = str(value)
|
|
41
|
+
return raw.split(".")[-1].strip().lower()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def llama_filters_to_k2(filters: MetadataFilters | None) -> dict[str, Any] | None:
|
|
45
|
+
"""Convert LlamaIndex MetadataFilters to K2 structured filter format.
|
|
46
|
+
|
|
47
|
+
Supports all K2 filter operators and both AND/OR conditions.
|
|
48
|
+
Returns the structured format: {"filters": [...], "condition": "and|or"}
|
|
49
|
+
"""
|
|
50
|
+
if filters is None:
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
condition = _normalize_enum_name(getattr(filters, "condition", None), default="and")
|
|
54
|
+
|
|
55
|
+
converted: list[dict[str, Any]] = []
|
|
56
|
+
for item in getattr(filters, "filters", []):
|
|
57
|
+
if not isinstance(item, MetadataFilter):
|
|
58
|
+
raise ValueError(f"Unsupported metadata filter node type: {type(item).__name__}")
|
|
59
|
+
|
|
60
|
+
key = getattr(item, "key", None)
|
|
61
|
+
if not key:
|
|
62
|
+
raise ValueError("MetadataFilter key must be set")
|
|
63
|
+
|
|
64
|
+
operator_name = _normalize_enum_name(getattr(item, "operator", None), default="eq")
|
|
65
|
+
k2_op = _OPERATOR_MAP.get(operator_name)
|
|
66
|
+
if k2_op is None:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"Unsupported LlamaIndex operator: {operator_name!r}. "
|
|
69
|
+
f"Supported: {', '.join(sorted(_OPERATOR_MAP))}"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
value = getattr(item, "value", None)
|
|
73
|
+
converted.append({"key": key, "op": k2_op, "value": value})
|
|
74
|
+
|
|
75
|
+
if not converted:
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
return {"filters": converted, "condition": condition}
|