langflow-base-nightly 0.5.0.dev30__py3-none-any.whl → 0.5.0.dev31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langflow/api/router.py +2 -0
- langflow/api/v1/__init__.py +2 -0
- langflow/api/v1/knowledge_bases.py +437 -0
- langflow/base/data/kb_utils.py +104 -0
- langflow/components/data/__init__.py +4 -0
- langflow/components/data/kb_ingest.py +585 -0
- langflow/components/data/kb_retrieval.py +254 -0
- langflow/frontend/assets/{SlackIcon-D2PxMQjX.js → SlackIcon-Bikuxo8x.js} +1 -1
- langflow/frontend/assets/{Wikipedia-BNM0lBPs.js → Wikipedia-B6aCFf5-.js} +1 -1
- langflow/frontend/assets/{Wolfram-COQyGyeC.js → Wolfram-CekL_M-a.js} +1 -1
- langflow/frontend/assets/{index-CTpfN0Cy.js → index-09CVJwsY.js} +1 -1
- langflow/frontend/assets/{index-DWUG3nTC.js → index-1MEYR1La.js} +1 -1
- langflow/frontend/assets/{index-Ds9y6kEK.js → index-2vQdFIK_.js} +1 -1
- langflow/frontend/assets/{index-DRdKSzTn.js → index-4Tl3Nxdo.js} +1 -1
- langflow/frontend/assets/{index-O_vPh7iD.js → index-5G402gB8.js} +1 -1
- langflow/frontend/assets/{index-D15h4ir2.js → index-5hW8VleF.js} +1 -1
- langflow/frontend/assets/{index-BydnMWnM.js → index-6GWpsedd.js} +1 -1
- langflow/frontend/assets/{index-4vIU43o6.js → index-7x3wNZ-4.js} +1 -1
- langflow/frontend/assets/{index-DrFpyu9Z.js → index-9gkURvG2.js} +1 -1
- langflow/frontend/assets/{index-DRe5h2N_.js → index-AOX7bbjJ.js} +1 -1
- langflow/frontend/assets/{index-fJyq3ZWN.js → index-B20KmxhS.js} +1 -1
- langflow/frontend/assets/{index-D_sHnnuS.js → index-B2EmwqKj.js} +1 -1
- langflow/frontend/assets/{index-DEc_2ba8.js → index-B4AtFbkN.js} +1 -1
- langflow/frontend/assets/{index-D_zQiboE.js → index-B4xLpgbM.js} +1 -1
- langflow/frontend/assets/{index-Db8Xgs-K.js → index-B9KRIJFi.js} +1 -1
- langflow/frontend/assets/{index-BzCZNz2f.js → index-B9uOBe6Y.js} +1 -1
- langflow/frontend/assets/{index-pFTvwRsJ.js → index-BDmbsLY2.js} +1 -1
- langflow/frontend/assets/{index-CGef2axA.js → index-BIKbxmIh.js} +1 -1
- langflow/frontend/assets/{index-BTl_mLju.js → index-BIjUtp6d.js} +1 -1
- langflow/frontend/assets/{index-Jze67eTW.js → index-BJIsQS8D.js} +1 -1
- langflow/frontend/assets/{index-DV-gdr7l.js → index-BO4fl1uU.js} +1 -1
- langflow/frontend/assets/{index-BUVmswbg.js → index-BRE8A4Q_.js} +1 -1
- langflow/frontend/assets/{index-CTzWsu8S.js → index-BRNhftot.js} +1 -1
- langflow/frontend/assets/{index-DFYBo38q.js → index-BRizlHaN.js} +1 -1
- langflow/frontend/assets/{index-DbPP5vss.js → index-BRwkzs92.js} +1 -1
- langflow/frontend/assets/{index-BzE7oL1n.js → index-BZCt_UnJ.js} +1 -1
- langflow/frontend/assets/{index-BhRSkpxu.js → index-B_ytx_iA.js} +1 -1
- langflow/frontend/assets/{index-ByCunkn4.js → index-BcqeL_f4.js} +1 -1
- langflow/frontend/assets/{index-CAAZbdRp.js → index-Bgd7yLoW.js} +1 -1
- langflow/frontend/assets/{index-DpDbxNdQ.js → index-BlRTHXW5.js} +1 -1
- langflow/frontend/assets/{index-jXSPQ_JS.js → index-BllNr21U.js} +1 -1
- langflow/frontend/assets/{index-fpMcQS2L.js → index-Bm7a2vMS.js} +1 -1
- langflow/frontend/assets/{index-BFQzmLDT.js → index-Bn4HAVDG.js} +1 -1
- langflow/frontend/assets/{index-D8EpAMC3.js → index-BwlYjc56.js} +1 -1
- langflow/frontend/assets/{index-BcCN9mpu.js → index-BzCjyHto.js} +1 -1
- langflow/frontend/assets/{index-D6-jZ4sc.js → index-C3RZz8WE.js} +1 -1
- langflow/frontend/assets/{index-D66JmFlL.js → index-C69gdJqw.js} +1 -1
- langflow/frontend/assets/{index-pYD0BTGu.js → index-C6P0vvSP.js} +1 -1
- langflow/frontend/assets/{index-CIjw_ZkP.js → index-C7wDSVVH.js} +1 -1
- langflow/frontend/assets/{index-BCTEK38J.js → index-CAzSTGAM.js} +1 -1
- langflow/frontend/assets/{index-8FjgS_Vj.js → index-CEn_71Wk.js} +1 -1
- langflow/frontend/assets/{index-BFiCUM5l.js → index-CGVDXKtN.js} +1 -1
- langflow/frontend/assets/{index-BIH2K0v8.js → index-CIYzjH2y.js} +1 -1
- langflow/frontend/assets/{index-gM8j2Wvk.js → index-COqjpsdy.js} +1 -1
- langflow/frontend/assets/{index-2q8IFBNP.js → index-CP0tFKwN.js} +1 -1
- langflow/frontend/assets/{index-CXpZa4H9.js → index-CPIdMJkX.js} +1 -1
- langflow/frontend/assets/{index-B-YjnRWx.js → index-CSRizl2S.js} +1 -1
- langflow/frontend/assets/{index-DFo0yfS5.js → index-CUe1ivTn.js} +1 -1
- langflow/frontend/assets/{index-C2x5hzgY.js → index-CVphnxXi.js} +1 -1
- langflow/frontend/assets/{index-Bz3QnhLZ.js → index-CY6LUi4V.js} +1 -1
- langflow/frontend/assets/{index-Cq6gk34q.js → index-C_2G2ZqJ.js} +1 -1
- langflow/frontend/assets/{index-CSXUVElo.js → index-C_K6Tof7.js} +1 -1
- langflow/frontend/assets/{index-1D7jZ8vz.js → index-C_UkF-RJ.js} +1 -1
- langflow/frontend/assets/{index-BVGZcHHC.js → index-Cbwk3f-p.js} +1 -1
- langflow/frontend/assets/{index-kiqvo0Zi.js → index-CdwjD4IX.js} +1 -1
- langflow/frontend/assets/{index-BNy3Al2s.js → index-CgbINWS8.js} +1 -1
- langflow/frontend/assets/{index-BXJpd9hg.js → index-CglSqvB5.js} +1 -1
- langflow/frontend/assets/{index-D9CF_54p.js → index-CmiRgF_-.js} +1 -1
- langflow/frontend/assets/{index-ez1EW657.js → index-Cp7Pmn03.js} +1 -1
- langflow/frontend/assets/{index-aypzjPzG.js → index-Cq30cQcP.js} +1 -1
- langflow/frontend/assets/index-CqS7zir1.css +1 -0
- langflow/frontend/assets/{index-DKv0y9Dp.js → index-Cr2oy5K2.js} +1 -1
- langflow/frontend/assets/{index-DrfwVxtD.js → index-Crq_yhkG.js} +1 -1
- langflow/frontend/assets/{index-CzJzRS6i.js → index-Cs_jt3dj.js} +1 -1
- langflow/frontend/assets/{index-DO0mS8FQ.js → index-Cy-ZEfWh.js} +1 -1
- langflow/frontend/assets/{index-Q0bwuTZY.js → index-Cyk3aCmP.js} +1 -1
- langflow/frontend/assets/{index-DToZROdu.js → index-D-HTZ68O.js} +1 -1
- langflow/frontend/assets/{index-C0AEZF1v.js → index-D1RgjMON.js} +1 -1
- langflow/frontend/assets/{index-DilRRF2S.js → index-D29n5mus.js} +1 -1
- langflow/frontend/assets/{index-CKLOrtrx.js → index-D2nHdRne.js} +1 -1
- langflow/frontend/assets/{index-sfFDGjjd.js → index-D7Vx6mgS.js} +1 -1
- langflow/frontend/assets/{index-BAHhLqW9.js → index-D7nFs6oq.js} +1 -1
- langflow/frontend/assets/{index-C7jY4x98.js → index-DAJafn16.js} +1 -1
- langflow/frontend/assets/{index-BefwTGbP.js → index-DDcpxWU4.js} +1 -1
- langflow/frontend/assets/{index-CTZ9iXFr.js → index-DEuXrfXH.js} +1 -1
- langflow/frontend/assets/{index-DFfr0xSt.js → index-DF0oWRdd.js} +1 -1
- langflow/frontend/assets/{index-Bh5pQAZC.js → index-DI0zAExi.js} +1 -1
- langflow/frontend/assets/{index-CG-Suo0F.js → index-DJs6FoYC.js} +1 -1
- langflow/frontend/assets/{index-dvTTQhKz.js → index-DNS4La1f.js} +1 -1
- langflow/frontend/assets/{index-nLDaeeZg.js → index-DOI0ceS-.js} +1 -1
- langflow/frontend/assets/{index-DakdEtbq.js → index-DOb9c2bf.js} +1 -1
- langflow/frontend/assets/{index-CEVnRp4_.js → index-DS4F_Phe.js} +1 -1
- langflow/frontend/assets/{index-DGRg2M1l.js → index-DTJX3yQa.js} +1 -1
- langflow/frontend/assets/{index-BjAsd-Vo.js → index-DVV_etfW.js} +1 -1
- langflow/frontend/assets/{index-BrIuZD2A.js → index-DX_InNVT.js} +1 -1
- langflow/frontend/assets/{index-jG-zLXRN.js → index-DbmqjLy6.js} +1 -1
- langflow/frontend/assets/{index-DSvOFGJR.js → index-Dc0p1Oxl.js} +1 -1
- langflow/frontend/assets/{index-87GFtXu5.js → index-DkJCCraf.js} +1 -1
- langflow/frontend/assets/{index-BXidWkLM.js → index-DlMAYATX.js} +1 -1
- langflow/frontend/assets/{index-sbTxhltT.js → index-DmaQAn3K.js} +1 -1
- langflow/frontend/assets/{index-DkC5vMvx.js → index-DmvjdU1N.js} +1 -1
- langflow/frontend/assets/{index-CSUglByd.js → index-DnusMCK1.js} +1 -1
- langflow/frontend/assets/{index-DZOTHXs0.js → index-DoFlaGDx.js} +1 -1
- langflow/frontend/assets/{index-CZkMjaa8.js → index-DqDQk0Cu.js} +1 -1
- langflow/frontend/assets/{index-lc10GnwG.js → index-DrvRK4_i.js} +1 -1
- langflow/frontend/assets/{index-BNm-yAYc.js → index-DtCsjX48.js} +1 -1
- langflow/frontend/assets/{index-BeLnhfG-.js → index-Dy7ehgeV.js} +1 -1
- langflow/frontend/assets/{index-RGG9hk9J.js → index-Dz0r9Idb.js} +1 -1
- langflow/frontend/assets/{index-Bcq2yA-p.js → index-DzDNhMMW.js} +1 -1
- langflow/frontend/assets/{index-P3f-GeAm.js → index-FYcoJPMP.js} +1 -1
- langflow/frontend/assets/{index-DQwvl_Rp.js → index-Iamzh9ZT.js} +1 -1
- langflow/frontend/assets/{index-Cy6n8tA9.js → index-J0pvFqLk.js} +1 -1
- langflow/frontend/assets/{index-D1XTMye3.js → index-J98sU-1p.js} +1 -1
- langflow/frontend/assets/{index-BZ0rL0tK.js → index-JHCxbvlW.js} +1 -1
- langflow/frontend/assets/{index-DmSH63k1.js → index-KnS52ylc.js} +1 -1
- langflow/frontend/assets/{index-WGZ88ShH.js → index-L7FKc9QN.js} +1 -1
- langflow/frontend/assets/{index-BIoFnUtx.js → index-RveG4dl9.js} +1 -1
- langflow/frontend/assets/{index-BDdkPrzu.js → index-T2jJOG85.js} +1 -1
- langflow/frontend/assets/{index-2839k6WO.js → index-TRyDa01A.js} +1 -1
- langflow/frontend/assets/{index-DvOdMz35.js → index-U7J1YiWE.js} +1 -1
- langflow/frontend/assets/{index-DzUx1-Bl.js → index-UI2ws3qp.js} +1984 -1984
- langflow/frontend/assets/{index-8Fx5I2fx.js → index-VO-pk-Hg.js} +1 -1
- langflow/frontend/assets/{index-e-RKmhti.js → index-_3qag0I4.js} +1 -1
- langflow/frontend/assets/{index-X67tRPXo.js → index-dfaj9-hY.js} +1 -1
- langflow/frontend/assets/{index-CHexGuNQ.js → index-eJwu5YEi.js} +1 -1
- langflow/frontend/assets/{index-Dz5YIK1W.js → index-in188l0A.js} +1 -1
- langflow/frontend/assets/{index-CTwkLLMr.js → index-pkOi9P45.js} +1 -1
- langflow/frontend/assets/{index-D6BaTmee.js → index-qXcoVIRo.js} +1 -1
- langflow/frontend/assets/{index-euS8RcNY.js → index-xVx59Op-.js} +1 -1
- langflow/frontend/assets/{index-C4WueQ4k.js → index-yIh6-LZT.js} +1 -1
- langflow/frontend/assets/lazyIconImports-kvf_Kak2.js +2 -0
- langflow/frontend/assets/{use-post-add-user-CA-_peAV.js → use-post-add-user-Bt6vZvvT.js} +1 -1
- langflow/frontend/index.html +2 -2
- langflow/initial_setup/starter_projects/Knowledge Ingestion.json +1052 -0
- langflow/initial_setup/starter_projects/Knowledge Retrieval.json +707 -0
- langflow/services/settings/base.py +3 -0
- {langflow_base_nightly-0.5.0.dev30.dist-info → langflow_base_nightly-0.5.0.dev31.dist-info}/METADATA +2 -1
- {langflow_base_nightly-0.5.0.dev30.dist-info → langflow_base_nightly-0.5.0.dev31.dist-info}/RECORD +140 -134
- langflow/frontend/assets/index-DIcdzk44.css +0 -1
- langflow/frontend/assets/lazyIconImports-lnczjBhY.js +0 -2
- {langflow_base_nightly-0.5.0.dev30.dist-info → langflow_base_nightly-0.5.0.dev31.dist-info}/WHEEL +0 -0
- {langflow_base_nightly-0.5.0.dev30.dist-info → langflow_base_nightly-0.5.0.dev31.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from cryptography.fernet import InvalidToken
|
|
6
|
+
from langchain_chroma import Chroma
|
|
7
|
+
from loguru import logger
|
|
8
|
+
|
|
9
|
+
from langflow.custom import Component
|
|
10
|
+
from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput
|
|
11
|
+
from langflow.schema.data import Data
|
|
12
|
+
from langflow.schema.dataframe import DataFrame
|
|
13
|
+
from langflow.services.auth.utils import decrypt_api_key
|
|
14
|
+
from langflow.services.deps import get_settings_service
|
|
15
|
+
|
|
16
|
+
settings = get_settings_service().settings
|
|
17
|
+
knowledge_directory = settings.knowledge_bases_dir
|
|
18
|
+
if not knowledge_directory:
|
|
19
|
+
msg = "Knowledge bases directory is not set in the settings."
|
|
20
|
+
raise ValueError(msg)
|
|
21
|
+
KNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class KBRetrievalComponent(Component):
|
|
25
|
+
display_name = "Knowledge Retrieval"
|
|
26
|
+
description = "Search and retrieve data from knowledge."
|
|
27
|
+
icon = "database"
|
|
28
|
+
name = "KBRetrieval"
|
|
29
|
+
|
|
30
|
+
inputs = [
|
|
31
|
+
DropdownInput(
|
|
32
|
+
name="knowledge_base",
|
|
33
|
+
display_name="Knowledge",
|
|
34
|
+
info="Select the knowledge to load data from.",
|
|
35
|
+
required=True,
|
|
36
|
+
options=[
|
|
37
|
+
str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(".") and d.is_dir()
|
|
38
|
+
]
|
|
39
|
+
if KNOWLEDGE_BASES_ROOT_PATH.exists()
|
|
40
|
+
else [],
|
|
41
|
+
refresh_button=True,
|
|
42
|
+
real_time_refresh=True,
|
|
43
|
+
),
|
|
44
|
+
SecretStrInput(
|
|
45
|
+
name="api_key",
|
|
46
|
+
display_name="Embedding Provider API Key",
|
|
47
|
+
info="API key for the embedding provider to generate embeddings.",
|
|
48
|
+
advanced=True,
|
|
49
|
+
required=False,
|
|
50
|
+
),
|
|
51
|
+
MessageTextInput(
|
|
52
|
+
name="search_query",
|
|
53
|
+
display_name="Search Query",
|
|
54
|
+
info="Optional search query to filter knowledge base data.",
|
|
55
|
+
),
|
|
56
|
+
IntInput(
|
|
57
|
+
name="top_k",
|
|
58
|
+
display_name="Top K Results",
|
|
59
|
+
info="Number of top results to return from the knowledge base.",
|
|
60
|
+
value=5,
|
|
61
|
+
advanced=True,
|
|
62
|
+
required=False,
|
|
63
|
+
),
|
|
64
|
+
BoolInput(
|
|
65
|
+
name="include_metadata",
|
|
66
|
+
display_name="Include Metadata",
|
|
67
|
+
info="Whether to include all metadata and embeddings in the output. If false, only content is returned.",
|
|
68
|
+
value=True,
|
|
69
|
+
advanced=True,
|
|
70
|
+
),
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
outputs = [
|
|
74
|
+
Output(
|
|
75
|
+
name="chroma_kb_data",
|
|
76
|
+
display_name="Results",
|
|
77
|
+
method="get_chroma_kb_data",
|
|
78
|
+
info="Returns the data from the selected knowledge base.",
|
|
79
|
+
),
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
def _get_knowledge_bases(self) -> list[str]:
|
|
83
|
+
"""Retrieve a list of available knowledge bases.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
A list of knowledge base names.
|
|
87
|
+
"""
|
|
88
|
+
if not KNOWLEDGE_BASES_ROOT_PATH.exists():
|
|
89
|
+
return []
|
|
90
|
+
|
|
91
|
+
return [str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(".") and d.is_dir()]
|
|
92
|
+
|
|
93
|
+
def update_build_config(self, build_config, field_value, field_name=None): # noqa: ARG002
|
|
94
|
+
if field_name == "knowledge_base":
|
|
95
|
+
# Update the knowledge base options dynamically
|
|
96
|
+
build_config["knowledge_base"]["options"] = self._get_knowledge_bases()
|
|
97
|
+
|
|
98
|
+
# If the selected knowledge base is not available, reset it
|
|
99
|
+
if build_config["knowledge_base"]["value"] not in build_config["knowledge_base"]["options"]:
|
|
100
|
+
build_config["knowledge_base"]["value"] = None
|
|
101
|
+
|
|
102
|
+
return build_config
|
|
103
|
+
|
|
104
|
+
def _get_kb_metadata(self, kb_path: Path) -> dict:
|
|
105
|
+
"""Load and process knowledge base metadata."""
|
|
106
|
+
metadata: dict[str, Any] = {}
|
|
107
|
+
metadata_file = kb_path / "embedding_metadata.json"
|
|
108
|
+
if not metadata_file.exists():
|
|
109
|
+
logger.warning(f"Embedding metadata file not found at {metadata_file}")
|
|
110
|
+
return metadata
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
with metadata_file.open("r", encoding="utf-8") as f:
|
|
114
|
+
metadata = json.load(f)
|
|
115
|
+
except json.JSONDecodeError:
|
|
116
|
+
logger.error(f"Error decoding JSON from {metadata_file}")
|
|
117
|
+
return {}
|
|
118
|
+
|
|
119
|
+
# Decrypt API key if it exists
|
|
120
|
+
if "api_key" in metadata and metadata.get("api_key"):
|
|
121
|
+
settings_service = get_settings_service()
|
|
122
|
+
try:
|
|
123
|
+
decrypted_key = decrypt_api_key(metadata["api_key"], settings_service)
|
|
124
|
+
metadata["api_key"] = decrypted_key
|
|
125
|
+
except (InvalidToken, TypeError, ValueError) as e:
|
|
126
|
+
logger.error(f"Could not decrypt API key. Please provide it manually. Error: {e}")
|
|
127
|
+
metadata["api_key"] = None
|
|
128
|
+
return metadata
|
|
129
|
+
|
|
130
|
+
def _build_embeddings(self, metadata: dict):
|
|
131
|
+
"""Build embedding model from metadata."""
|
|
132
|
+
provider = metadata.get("embedding_provider")
|
|
133
|
+
model = metadata.get("embedding_model")
|
|
134
|
+
api_key = metadata.get("api_key")
|
|
135
|
+
chunk_size = metadata.get("chunk_size")
|
|
136
|
+
|
|
137
|
+
# If user provided a key in the input, it overrides the stored one.
|
|
138
|
+
if self.api_key and self.api_key.get_secret_value():
|
|
139
|
+
api_key = self.api_key.get_secret_value()
|
|
140
|
+
|
|
141
|
+
# Handle various providers
|
|
142
|
+
if provider == "OpenAI":
|
|
143
|
+
from langchain_openai import OpenAIEmbeddings
|
|
144
|
+
|
|
145
|
+
if not api_key:
|
|
146
|
+
msg = "OpenAI API key is required. Provide it in the component's advanced settings."
|
|
147
|
+
raise ValueError(msg)
|
|
148
|
+
return OpenAIEmbeddings(
|
|
149
|
+
model=model,
|
|
150
|
+
api_key=api_key,
|
|
151
|
+
chunk_size=chunk_size,
|
|
152
|
+
)
|
|
153
|
+
if provider == "HuggingFace":
|
|
154
|
+
from langchain_huggingface import HuggingFaceEmbeddings
|
|
155
|
+
|
|
156
|
+
return HuggingFaceEmbeddings(
|
|
157
|
+
model=model,
|
|
158
|
+
)
|
|
159
|
+
if provider == "Cohere":
|
|
160
|
+
from langchain_cohere import CohereEmbeddings
|
|
161
|
+
|
|
162
|
+
if not api_key:
|
|
163
|
+
msg = "Cohere API key is required when using Cohere provider"
|
|
164
|
+
raise ValueError(msg)
|
|
165
|
+
return CohereEmbeddings(
|
|
166
|
+
model=model,
|
|
167
|
+
cohere_api_key=api_key,
|
|
168
|
+
)
|
|
169
|
+
if provider == "Custom":
|
|
170
|
+
# For custom embedding models, we would need additional configuration
|
|
171
|
+
msg = "Custom embedding models not yet supported"
|
|
172
|
+
raise NotImplementedError(msg)
|
|
173
|
+
# Add other providers here if they become supported in ingest
|
|
174
|
+
msg = f"Embedding provider '{provider}' is not supported for retrieval."
|
|
175
|
+
raise NotImplementedError(msg)
|
|
176
|
+
|
|
177
|
+
def get_chroma_kb_data(self) -> DataFrame:
|
|
178
|
+
"""Retrieve data from the selected knowledge base by reading the Chroma collection.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
A DataFrame containing the data rows from the knowledge base.
|
|
182
|
+
"""
|
|
183
|
+
kb_path = KNOWLEDGE_BASES_ROOT_PATH / self.knowledge_base
|
|
184
|
+
|
|
185
|
+
metadata = self._get_kb_metadata(kb_path)
|
|
186
|
+
if not metadata:
|
|
187
|
+
msg = f"Metadata not found for knowledge base: {self.knowledge_base}. Ensure it has been indexed."
|
|
188
|
+
raise ValueError(msg)
|
|
189
|
+
|
|
190
|
+
# Build the embedder for the knowledge base
|
|
191
|
+
embedding_function = self._build_embeddings(metadata)
|
|
192
|
+
|
|
193
|
+
# Load vector store
|
|
194
|
+
chroma = Chroma(
|
|
195
|
+
persist_directory=str(kb_path),
|
|
196
|
+
embedding_function=embedding_function,
|
|
197
|
+
collection_name=self.knowledge_base,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# If a search query is provided, perform a similarity search
|
|
201
|
+
if self.search_query:
|
|
202
|
+
# Use the search query to perform a similarity search
|
|
203
|
+
logger.info(f"Performing similarity search with query: {self.search_query}")
|
|
204
|
+
results = chroma.similarity_search_with_score(
|
|
205
|
+
query=self.search_query or "",
|
|
206
|
+
k=self.top_k,
|
|
207
|
+
)
|
|
208
|
+
else:
|
|
209
|
+
results = chroma.similarity_search(
|
|
210
|
+
query=self.search_query or "",
|
|
211
|
+
k=self.top_k,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# For each result, make it a tuple to match the expected output format
|
|
215
|
+
results = [(doc, 0) for doc in results] # Assign a dummy score of 0
|
|
216
|
+
|
|
217
|
+
# If metadata is enabled, get embeddings for the results
|
|
218
|
+
id_to_embedding = {}
|
|
219
|
+
if self.include_metadata and results:
|
|
220
|
+
doc_ids = [doc[0].metadata.get("_id") for doc in results if doc[0].metadata.get("_id")]
|
|
221
|
+
|
|
222
|
+
# Only proceed if we have valid document IDs
|
|
223
|
+
if doc_ids:
|
|
224
|
+
# Access underlying client to get embeddings
|
|
225
|
+
collection = chroma._client.get_collection(name=self.knowledge_base)
|
|
226
|
+
embeddings_result = collection.get(where={"_id": {"$in": doc_ids}}, include=["embeddings", "metadatas"])
|
|
227
|
+
|
|
228
|
+
# Create a mapping from document ID to embedding
|
|
229
|
+
for i, metadata in enumerate(embeddings_result.get("metadatas", [])):
|
|
230
|
+
if metadata and "_id" in metadata:
|
|
231
|
+
id_to_embedding[metadata["_id"]] = embeddings_result["embeddings"][i]
|
|
232
|
+
|
|
233
|
+
# Build output data based on include_metadata setting
|
|
234
|
+
data_list = []
|
|
235
|
+
for doc in results:
|
|
236
|
+
if self.include_metadata:
|
|
237
|
+
# Include all metadata, embeddings, and content
|
|
238
|
+
kwargs = {
|
|
239
|
+
"content": doc[0].page_content,
|
|
240
|
+
**doc[0].metadata,
|
|
241
|
+
}
|
|
242
|
+
if self.search_query:
|
|
243
|
+
kwargs["_score"] = -1 * doc[1]
|
|
244
|
+
kwargs["_embeddings"] = id_to_embedding.get(doc[0].metadata.get("_id"))
|
|
245
|
+
else:
|
|
246
|
+
# Only include content
|
|
247
|
+
kwargs = {
|
|
248
|
+
"content": doc[0].page_content,
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
data_list.append(Data(**kwargs))
|
|
252
|
+
|
|
253
|
+
# Return the DataFrame containing the data
|
|
254
|
+
return DataFrame(data=data_list)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{j as a}from"./index-
|
|
1
|
+
import{j as a}from"./index-UI2ws3qp.js";const s=l=>a.jsx("svg",{xmlns:"http://www.w3.org/2000/svg",viewBox:"0 0 54 54",width:"1em",height:"1em",...l,children:a.jsxs("g",{fill:"none",fillRule:"evenodd",children:[a.jsx("path",{fill:"#36C5F0",d:"M19.712.133a5.381 5.381 0 0 0-5.376 5.387 5.381 5.381 0 0 0 5.376 5.386h5.376V5.52A5.381 5.381 0 0 0 19.712.133m0 14.365H5.376A5.381 5.381 0 0 0 0 19.884a5.381 5.381 0 0 0 5.376 5.387h14.336a5.381 5.381 0 0 0 5.376-5.387 5.381 5.381 0 0 0-5.376-5.386"}),a.jsx("path",{fill:"#2EB67D",d:"M53.76 19.884a5.381 5.381 0 0 0-5.376-5.386 5.381 5.381 0 0 0-5.376 5.386v5.387h5.376a5.381 5.381 0 0 0 5.376-5.387m-14.336 0V5.52A5.381 5.381 0 0 0 34.048.133a5.381 5.381 0 0 0-5.376 5.387v14.364a5.381 5.381 0 0 0 5.376 5.387 5.381 5.381 0 0 0 5.376-5.387"}),a.jsx("path",{fill:"#ECB22E",d:"M34.048 54a5.381 5.381 0 0 0 5.376-5.387 5.381 5.381 0 0 0-5.376-5.386h-5.376v5.386A5.381 5.381 0 0 0 34.048 54m0-14.365h14.336a5.381 5.381 0 0 0 5.376-5.386 5.381 5.381 0 0 0-5.376-5.387H34.048a5.381 5.381 0 0 0-5.376 5.387 5.381 5.381 0 0 0 5.376 5.386"}),a.jsx("path",{fill:"#E01E5A",d:"M0 34.249a5.381 5.381 0 0 0 5.376 5.386 5.381 5.381 0 0 0 5.376-5.386v-5.387H5.376A5.381 5.381 0 0 0 0 34.25m14.336-.001v14.364A5.381 5.381 0 0 0 19.712 54a5.381 5.381 0 0 0 5.376-5.387V34.25a5.381 5.381 0 0 0-5.376-5.387 5.381 5.381 0 0 0-5.376 5.387"})]})});export{s as default};
|