langflow-base-nightly 0.5.1.dev1__py3-none-any.whl → 0.5.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langflow/api/v1/endpoints.py +10 -7
- langflow/api/v1/schemas.py +5 -2
- langflow/base/knowledge_bases/__init__.py +0 -0
- langflow/components/agents/agent.py +1 -0
- langflow/components/data/__init__.py +0 -4
- langflow/components/datastax/__init__.py +2 -2
- langflow/components/knowledge_bases/__init__.py +34 -0
- langflow/components/{data/kb_ingest.py → knowledge_bases/ingestion.py} +23 -15
- langflow/components/{data/kb_retrieval.py → knowledge_bases/retrieval.py} +26 -22
- langflow/components/processing/dataframe_operations.py +12 -1
- langflow/frontend/assets/{SlackIcon-Cr3Q15Px.js → SlackIcon-Cc7Qnzki.js} +1 -1
- langflow/frontend/assets/{Wikipedia-GxM5sPdM.js → Wikipedia-7ulMZY46.js} +1 -1
- langflow/frontend/assets/{Wolfram-BN3-VOCA.js → Wolfram-By9PGsHS.js} +1 -1
- langflow/frontend/assets/{index-Kwdl-e29.js → index--e0oQqZh.js} +1 -1
- langflow/frontend/assets/{index-CD-PqGCY.js → index-3jlSQi5Y.js} +1 -1
- langflow/frontend/assets/{index-DQ7VYqQc.js → index-4JIEdyIM.js} +1 -1
- langflow/frontend/assets/{index-C9Cxnkl8.js → index-5-CSw2-z.js} +1 -1
- langflow/frontend/assets/{index-DUpri6zF.js → index-7yAHPRxv.js} +1 -1
- langflow/frontend/assets/{index-Dl5amdBz.js → index-9FL5xjkL.js} +1 -1
- langflow/frontend/assets/{index-X0guhYF8.js → index-AALDfCyt.js} +1 -1
- langflow/frontend/assets/{index-BLTxEeTi.js → index-AKVkmT4S.js} +1 -1
- langflow/frontend/assets/{index-CRcMqCIj.js → index-B3GvPjhD.js} +1 -1
- langflow/frontend/assets/{index-D14EWPyZ.js → index-B5LHnuQR.js} +1 -1
- langflow/frontend/assets/{index-C3yvArUT.js → index-BAn-AzCS.js} +1 -1
- langflow/frontend/assets/{index-CRPKJZw9.js → index-BCXhKCOK.js} +1 -1
- langflow/frontend/assets/{index-CuFXdTx4.js → index-BGt6jQ4x.js} +1 -1
- langflow/frontend/assets/{index-AWCSdofD.js → index-BH7AyHxp.js} +1 -1
- langflow/frontend/assets/{index-CBc8fEAE.js → index-BISPW-f6.js} +1 -1
- langflow/frontend/assets/{index-Bf0IYKLd.js → index-BIqEYjNT.js} +1 -1
- langflow/frontend/assets/{index-DiGWASY5.js → index-BLEWsL1U.js} +1 -1
- langflow/frontend/assets/{index-D-KY3kkq.js → index-BLXN681C.js} +1 -1
- langflow/frontend/assets/{index-C-Xfg4cD.js → index-BMpKFGhI.js} +1 -1
- langflow/frontend/assets/{index-BVwJDmw-.js → index-BMvp94tO.js} +1 -1
- langflow/frontend/assets/{index-3wW7BClE.js → index-BSwBVwyF.js} +1 -1
- langflow/frontend/assets/{index-Cewy7JZE.js → index-BWFIrwW1.js} +1 -1
- langflow/frontend/assets/{index-CiixOzDG.js → index-BWnKMRFJ.js} +1 -1
- langflow/frontend/assets/{index-BZcw4827.js → index-BX_asvRB.js} +1 -1
- langflow/frontend/assets/{index-CiR1dxI4.js → index-BZ-A4K98.js} +1 -1
- langflow/frontend/assets/{index-CpzXS6md.js → index-BZSa2qz7.js} +1 -1
- langflow/frontend/assets/{index-CdIf07Rw.js → index-B_kBTgxV.js} +1 -1
- langflow/frontend/assets/{index-ClsuDmR6.js → index-BdjfHsrf.js} +1 -1
- langflow/frontend/assets/{index-hbndqB9B.js → index-Bhcv5M0n.js} +1 -1
- langflow/frontend/assets/{index-dJWNxIRH.js → index-BhqVw9WQ.js} +1 -1
- langflow/frontend/assets/{index-Tw3Os-DN.js → index-Bl7RpmrB.js} +1 -1
- langflow/frontend/assets/{index-C-EdnFdA.js → index-BlDsBQ_1.js} +1 -1
- langflow/frontend/assets/{index-z3SRY-mX.js → index-Bm9i8F4W.js} +1 -1
- langflow/frontend/assets/{index-CMZ79X-Y.js → index-BnCnYnao.js} +1 -1
- langflow/frontend/assets/{index-DXRfN4HV.js → index-BrDz-PxE.js} +1 -1
- langflow/frontend/assets/{index-CVWQfRYZ.js → index-BsdLyYMY.js} +1 -1
- langflow/frontend/assets/{index-BTKOU4xC.js → index-BusCv3bR.js} +1 -1
- langflow/frontend/assets/{index-D2N3l-cw.js → index-BvRIG6P5.js} +1 -1
- langflow/frontend/assets/{index-DpVWih90.js → index-Bw-TIIC6.js} +1 -1
- langflow/frontend/assets/{index-D-1tA8Dt.js → index-ByxGmq5p.js} +1 -1
- langflow/frontend/assets/{index-BWYuQ2Sj.js → index-C-2MRYoJ.js} +1 -1
- langflow/frontend/assets/{index-CZqRL9DE.js → index-C-bjC2sz.js} +1 -1
- langflow/frontend/assets/{index-o0D2S7xW.js → index-C-wnbBBY.js} +1 -1
- langflow/frontend/assets/{index-D-_B1a8v.js → index-C51yNvIL.js} +1 -1
- langflow/frontend/assets/{index-DJP-ss47.js → index-C676MS3I.js} +1 -1
- langflow/frontend/assets/{index-lZX9AvZW.js → index-C6nzdeYx.js} +1 -1
- langflow/frontend/assets/{index-6pyH3ZJB.js → index-C8pI0lzi.js} +1 -1
- langflow/frontend/assets/{index-ovFJ_0J6.js → index-CDphUsa3.js} +1 -1
- langflow/frontend/assets/{index-J38wh62w.js → index-CF4_Og1m.js} +1 -1
- langflow/frontend/assets/{index-C1f2wMat.js → index-CJ5A6STv.js} +1 -1
- langflow/frontend/assets/{index-C3KequvP.js → index-CKPZpkQk.js} +1 -1
- langflow/frontend/assets/{index-BiKKN6FR.js → index-CLcaktde.js} +1 -1
- langflow/frontend/assets/{index-28oOcafk.js → index-CNh0rwur.js} +1 -1
- langflow/frontend/assets/{index-CGO1CiUr.js → index-COoTCxvs.js} +1 -1
- langflow/frontend/assets/{index-BC65VuWx.js → index-CPiM2oyj.js} +1 -1
- langflow/frontend/assets/{index-BWdLILDG.js → index-CQQ-4XMS.js} +1 -1
- langflow/frontend/assets/{index-pYJJOcma.js → index-CU16NJD7.js} +1 -1
- langflow/frontend/assets/{index-h_aSZHf3.js → index-CUzlcce2.js} +1 -1
- langflow/frontend/assets/{index-BrJV8psX.js → index-CVkIdc6y.js} +1 -1
- langflow/frontend/assets/{index-lTpteg8t.js → index-C_157Mb-.js} +1 -1
- langflow/frontend/assets/{index-Cyd2HtHK.js → index-C_MhBX6R.js} +1 -1
- langflow/frontend/assets/{index-DrDrcajG.js → index-C_NwzK6j.js} +1 -1
- langflow/frontend/assets/{index-DlD4dXlZ.js → index-Ca1b7Iag.js} +1 -1
- langflow/frontend/assets/{index-BS8Vo8nc.js → index-Cb5G9Ifd.js} +1 -1
- langflow/frontend/assets/{index-CH5UVA9b.js → index-CeswGUz3.js} +1 -1
- langflow/frontend/assets/{index-BCDSei1q.js → index-ChsGhZn3.js} +1 -1
- langflow/frontend/assets/{index-DiB3CTo8.js → index-CiklyQU3.js} +1 -1
- langflow/frontend/assets/{index-dcJ8-agu.js → index-Co__gFM1.js} +1 -1
- langflow/frontend/assets/{index-eo2mAtL-.js → index-Coi86oqP.js} +1 -1
- langflow/frontend/assets/{index-CBvrGgID.js → index-Cu2Xr6_j.js} +1 -1
- langflow/frontend/assets/{index-2wSXqBtB.js → index-Cu7vC48Y.js} +1 -1
- langflow/frontend/assets/{index-Bbi87Ve4.js → index-CvSoff-8.js} +1 -1
- langflow/frontend/assets/{index-DA6-bvgN.js → index-Cw0UComa.js} +1 -1
- langflow/frontend/assets/{index-CWPzZtSx.js → index-D-SnFlhU.js} +1 -1
- langflow/frontend/assets/{index-DHgomBdh.js → index-D-WStJI6.js} +1 -1
- langflow/frontend/assets/{index-BpxbUiZD.js → index-D234yKNJ.js} +186 -186
- langflow/frontend/assets/{index-BkXec1Yf.js → index-D5c2nNvp.js} +1 -1
- langflow/frontend/assets/{index-DwQEZe3C.js → index-DFY8YFbC.js} +1 -1
- langflow/frontend/assets/{index-Bief6eyJ.js → index-DJ6HD14g.js} +1 -1
- langflow/frontend/assets/{index-Bx7dBY26.js → index-DMCerPJM.js} +1 -1
- langflow/frontend/assets/{index-DDWBeudF.js → index-DOj_QWqG.js} +1 -1
- langflow/frontend/assets/{index-CmEYYRN1.js → index-DP1oE6QB.js} +1 -1
- langflow/frontend/assets/{index-C3ZjKdCD.js → index-DTCrijba.js} +1 -1
- langflow/frontend/assets/{index-CtVIONP2.js → index-DVLIDc2_.js} +1 -1
- langflow/frontend/assets/{index-DyJFTK24.js → index-DX7JcSMz.js} +1 -1
- langflow/frontend/assets/{index-DmeiHnfl.js → index-DZVgPCio.js} +1 -1
- langflow/frontend/assets/{index-BwLWcUXL.js → index-DbfS_UH-.js} +1 -1
- langflow/frontend/assets/{index-DV3utZDZ.js → index-DcApTyZ7.js} +1 -1
- langflow/frontend/assets/{index-DDcMAaG4.js → index-Deu8rlaZ.js} +1 -1
- langflow/frontend/assets/{index-iJngutFo.js → index-Df6psZEj.js} +1 -1
- langflow/frontend/assets/{index-CRPyCfYy.js → index-DiblXWmk.js} +1 -1
- langflow/frontend/assets/{index-CMzfJKiW.js → index-DjQElpEg.js} +1 -1
- langflow/frontend/assets/{index-Dmu-X5-4.js → index-DmVt5Jlx.js} +1 -1
- langflow/frontend/assets/{index-CPHEscq9.js → index-DmYLDQag.js} +1 -1
- langflow/frontend/assets/{index-BKseQQ2I.js → index-DnlVWWU8.js} +1 -1
- langflow/frontend/assets/{index-D5ETnvJa.js → index-Dp7ZQyL3.js} +1 -1
- langflow/frontend/assets/{index-Co20d-eQ.js → index-DpWrk8mA.js} +1 -1
- langflow/frontend/assets/{index-CVl6MbaM.js → index-DrXXKzpD.js} +1 -1
- langflow/frontend/assets/{index-OwPvCmpW.js → index-Drg8me2a.js} +1 -1
- langflow/frontend/assets/{index-CVwWoX99.js → index-DsEZjOcp.js} +1 -1
- langflow/frontend/assets/{index-DwPkMTaY.js → index-DznH7Jbq.js} +1 -1
- langflow/frontend/assets/{index-CNw1H-Wc.js → index-GzOGB_fo.js} +1 -1
- langflow/frontend/assets/{index-C3l0zYn0.js → index-MVW4HTEk.js} +1 -1
- langflow/frontend/assets/{index-DhtZ5hx8.js → index-OsUvqIUr.js} +1 -1
- langflow/frontend/assets/{index-B2ptVQGM.js → index-RH_I78z_.js} +1 -1
- langflow/frontend/assets/{index-DdtMEn6I.js → index-RjeC0kaX.js} +1 -1
- langflow/frontend/assets/{index-hG24k5xJ.js → index-S-sc0Cm9.js} +1 -1
- langflow/frontend/assets/{index-Bg5nrMRh.js → index-S8uJXTOq.js} +1 -1
- langflow/frontend/assets/{index-m8QA6VNM.js → index-SB4rw8D5.js} +1 -1
- langflow/frontend/assets/{index-Du-pc0KE.js → index-YJsAl7vm.js} +1 -1
- langflow/frontend/assets/{index-DfDhMHgQ.js → index-ZjeocHyu.js} +1 -1
- langflow/frontend/assets/{index-Bnl6QHtP.js → index-_szO7sta.js} +1 -1
- langflow/frontend/assets/{index-xvFOmxx4.js → index-aAgSKWb3.js} +1 -1
- langflow/frontend/assets/{index-Db9dYSzy.js → index-aWnZIwHd.js} +1 -1
- langflow/frontend/assets/{index-BJy50PvP.js → index-bMhyLtgS.js} +1 -1
- langflow/frontend/assets/{index-Cqpzl1J4.js → index-cYFKmtmg.js} +1 -1
- langflow/frontend/assets/{index-CLJeJYjH.js → index-hg2y9OAt.js} +1 -1
- langflow/frontend/assets/{index-D7kquVv2.js → index-jwzN3Jd_.js} +1 -1
- langflow/frontend/assets/{index-BiC280Nx.js → index-k9jP5chN.js} +1 -1
- langflow/frontend/assets/{index-B3TANVes.js → index-lnF9Eqr2.js} +1 -1
- langflow/frontend/assets/{index-B4yCvZKV.js → index-mjwtJmkP.js} +1 -1
- langflow/frontend/assets/{index-CfwLpbMM.js → index-nw3WF9lY.js} +1 -1
- langflow/frontend/assets/{index-CUVDws8F.js → index-qiVTWUuf.js} +1 -1
- langflow/frontend/assets/{index-DTqbvGC0.js → index-uybez8MR.js} +1 -1
- langflow/frontend/assets/{index-Dfe7qfvf.js → index-v8eXbWlM.js} +1 -1
- langflow/frontend/assets/{index-B2Zgv_xv.js → index-xN8ogFdo.js} +1 -1
- langflow/frontend/assets/{index-BRg1f4Mu.js → index-xV6ystWy.js} +1 -1
- langflow/frontend/assets/{index-sI75DsdM.js → index-yyAaYjLR.js} +1 -1
- langflow/frontend/assets/lazyIconImports-Ci-S9xBA.js +2 -0
- langflow/frontend/assets/{use-post-add-user-C0MdTpQ5.js → use-post-add-user-JUeLDErC.js} +1 -1
- langflow/frontend/index.html +1 -1
- langflow/initial_setup/starter_projects/Hybrid Search RAG.json +1280 -1276
- langflow/initial_setup/starter_projects/Instagram Copywriter.json +1 -1
- langflow/initial_setup/starter_projects/Invoice Summarizer.json +1 -1
- langflow/initial_setup/starter_projects/Knowledge Ingestion.json +46 -47
- langflow/initial_setup/starter_projects/Knowledge Retrieval.json +73 -56
- langflow/initial_setup/starter_projects/Market Research.json +1 -1
- langflow/initial_setup/starter_projects/News Aggregator.json +1 -1
- langflow/initial_setup/starter_projects/Nvidia Remix.json +1 -1
- langflow/initial_setup/starter_projects/Pok/303/251dex Agent.json" +1 -1
- langflow/initial_setup/starter_projects/Price Deal Finder.json +1 -1
- langflow/initial_setup/starter_projects/Research Agent.json +1 -1
- langflow/initial_setup/starter_projects/SaaS Pricing.json +1 -1
- langflow/initial_setup/starter_projects/Search agent.json +1 -1
- langflow/initial_setup/starter_projects/Sequential Tasks Agents.json +3 -3
- langflow/initial_setup/starter_projects/Simple Agent.json +1 -1
- langflow/initial_setup/starter_projects/Social Media Agent.json +1 -1
- langflow/initial_setup/starter_projects/Travel Planning Agents.json +3 -3
- langflow/initial_setup/starter_projects/Vector Store RAG.json +1093 -1108
- langflow/initial_setup/starter_projects/Youtube Analysis.json +1 -1
- langflow/services/auth/utils.py +78 -1
- langflow/services/settings/auth.py +4 -0
- {langflow_base_nightly-0.5.1.dev1.dist-info → langflow_base_nightly-0.5.1.dev3.dist-info}/METADATA +1 -1
- {langflow_base_nightly-0.5.1.dev1.dist-info → langflow_base_nightly-0.5.1.dev3.dist-info}/RECORD +171 -169
- langflow/frontend/assets/lazyIconImports-D97HEZkE.js +0 -2
- /langflow/base/{data/kb_utils.py → knowledge_bases/knowledge_base_utils.py} +0 -0
- /langflow/components/datastax/{astradb.py → astradb_vectorstore.py} +0 -0
- {langflow_base_nightly-0.5.1.dev1.dist-info → langflow_base_nightly-0.5.1.dev3.dist-info}/WHEEL +0 -0
- {langflow_base_nightly-0.5.1.dev1.dist-info → langflow_base_nightly-0.5.1.dev3.dist-info}/entry_points.txt +0 -0
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
"data": {
|
|
8
8
|
"sourceHandle": {
|
|
9
9
|
"dataType": "ChatInput",
|
|
10
|
-
"id": "ChatInput-
|
|
10
|
+
"id": "ChatInput-insg9",
|
|
11
11
|
"name": "message",
|
|
12
12
|
"output_types": [
|
|
13
13
|
"Message"
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
},
|
|
16
16
|
"targetHandle": {
|
|
17
17
|
"fieldName": "question",
|
|
18
|
-
"id": "Prompt-
|
|
18
|
+
"id": "Prompt-wgAUs",
|
|
19
19
|
"inputTypes": [
|
|
20
20
|
"Message",
|
|
21
21
|
"Text"
|
|
@@ -23,12 +23,12 @@
|
|
|
23
23
|
"type": "str"
|
|
24
24
|
}
|
|
25
25
|
},
|
|
26
|
-
"id": "reactflow__edge-ChatInput-
|
|
26
|
+
"id": "reactflow__edge-ChatInput-insg9{œdataTypeœ:œChatInputœ,œidœ:œChatInput-insg9œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-wgAUs{œfieldNameœ:œquestionœ,œidœ:œPrompt-wgAUsœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}",
|
|
27
27
|
"selected": false,
|
|
28
|
-
"source": "ChatInput-
|
|
29
|
-
"sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-
|
|
30
|
-
"target": "Prompt-
|
|
31
|
-
"targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-
|
|
28
|
+
"source": "ChatInput-insg9",
|
|
29
|
+
"sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-insg9œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
|
|
30
|
+
"target": "Prompt-wgAUs",
|
|
31
|
+
"targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-wgAUsœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}"
|
|
32
32
|
},
|
|
33
33
|
{
|
|
34
34
|
"animated": false,
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"data": {
|
|
37
37
|
"sourceHandle": {
|
|
38
38
|
"dataType": "parser",
|
|
39
|
-
"id": "parser-
|
|
39
|
+
"id": "parser-gcmKF",
|
|
40
40
|
"name": "parsed_text",
|
|
41
41
|
"output_types": [
|
|
42
42
|
"Message"
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
},
|
|
45
45
|
"targetHandle": {
|
|
46
46
|
"fieldName": "context",
|
|
47
|
-
"id": "Prompt-
|
|
47
|
+
"id": "Prompt-wgAUs",
|
|
48
48
|
"inputTypes": [
|
|
49
49
|
"Message",
|
|
50
50
|
"Text"
|
|
@@ -52,125 +52,100 @@
|
|
|
52
52
|
"type": "str"
|
|
53
53
|
}
|
|
54
54
|
},
|
|
55
|
-
"id": "reactflow__edge-parser-
|
|
55
|
+
"id": "reactflow__edge-parser-gcmKF{œdataTypeœ:œparserœ,œidœ:œparser-gcmKFœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-wgAUs{œfieldNameœ:œcontextœ,œidœ:œPrompt-wgAUsœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}",
|
|
56
56
|
"selected": false,
|
|
57
|
-
"source": "parser-
|
|
58
|
-
"sourceHandle": "{œdataTypeœ: œparserœ, œidœ: œparser-
|
|
59
|
-
"target": "Prompt-
|
|
60
|
-
"targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-
|
|
57
|
+
"source": "parser-gcmKF",
|
|
58
|
+
"sourceHandle": "{œdataTypeœ: œparserœ, œidœ: œparser-gcmKFœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}",
|
|
59
|
+
"target": "Prompt-wgAUs",
|
|
60
|
+
"targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-wgAUsœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}"
|
|
61
61
|
},
|
|
62
62
|
{
|
|
63
63
|
"animated": false,
|
|
64
64
|
"className": "",
|
|
65
65
|
"data": {
|
|
66
66
|
"sourceHandle": {
|
|
67
|
-
"dataType": "
|
|
68
|
-
"id": "
|
|
69
|
-
"name": "
|
|
70
|
-
"output_types": [
|
|
71
|
-
"Embeddings"
|
|
72
|
-
]
|
|
73
|
-
},
|
|
74
|
-
"targetHandle": {
|
|
75
|
-
"fieldName": "embedding_model",
|
|
76
|
-
"id": "AstraDB-W6NB4",
|
|
77
|
-
"inputTypes": [
|
|
78
|
-
"Embeddings"
|
|
79
|
-
],
|
|
80
|
-
"type": "other"
|
|
81
|
-
}
|
|
82
|
-
},
|
|
83
|
-
"id": "reactflow__edge-OpenAIEmbeddings-oFtHy{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-oFtHyœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-W6NB4{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-W6NB4œ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}",
|
|
84
|
-
"selected": false,
|
|
85
|
-
"source": "OpenAIEmbeddings-oFtHy",
|
|
86
|
-
"sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-oFtHyœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}",
|
|
87
|
-
"target": "AstraDB-W6NB4",
|
|
88
|
-
"targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-W6NB4œ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}"
|
|
89
|
-
},
|
|
90
|
-
{
|
|
91
|
-
"animated": false,
|
|
92
|
-
"className": "",
|
|
93
|
-
"data": {
|
|
94
|
-
"sourceHandle": {
|
|
95
|
-
"dataType": "OpenAIEmbeddings",
|
|
96
|
-
"id": "OpenAIEmbeddings-v0rcw",
|
|
97
|
-
"name": "embeddings",
|
|
67
|
+
"dataType": "File",
|
|
68
|
+
"id": "File-wqFzl",
|
|
69
|
+
"name": "message",
|
|
98
70
|
"output_types": [
|
|
99
|
-
"
|
|
71
|
+
"Message"
|
|
100
72
|
]
|
|
101
73
|
},
|
|
102
74
|
"targetHandle": {
|
|
103
|
-
"fieldName": "
|
|
104
|
-
"id": "
|
|
75
|
+
"fieldName": "data_inputs",
|
|
76
|
+
"id": "SplitText-50nDI",
|
|
105
77
|
"inputTypes": [
|
|
106
|
-
"
|
|
78
|
+
"Data",
|
|
79
|
+
"DataFrame",
|
|
80
|
+
"Message"
|
|
107
81
|
],
|
|
108
82
|
"type": "other"
|
|
109
83
|
}
|
|
110
84
|
},
|
|
111
|
-
"id": "reactflow__edge-
|
|
85
|
+
"id": "reactflow__edge-File-wqFzl{œdataTypeœ:œFileœ,œidœ:œFile-wqFzlœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-SplitText-50nDI{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-50nDIœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
|
|
112
86
|
"selected": false,
|
|
113
|
-
"source": "
|
|
114
|
-
"sourceHandle": "{œdataTypeœ:
|
|
115
|
-
"target": "
|
|
116
|
-
"targetHandle": "{œfieldNameœ: œ
|
|
87
|
+
"source": "File-wqFzl",
|
|
88
|
+
"sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-wqFzlœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
|
|
89
|
+
"target": "SplitText-50nDI",
|
|
90
|
+
"targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-50nDIœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
|
|
117
91
|
},
|
|
118
92
|
{
|
|
119
93
|
"animated": false,
|
|
120
94
|
"className": "",
|
|
121
95
|
"data": {
|
|
122
96
|
"sourceHandle": {
|
|
123
|
-
"dataType": "
|
|
124
|
-
"id": "
|
|
125
|
-
"name": "
|
|
97
|
+
"dataType": "Prompt",
|
|
98
|
+
"id": "Prompt-wgAUs",
|
|
99
|
+
"name": "prompt",
|
|
126
100
|
"output_types": [
|
|
127
101
|
"Message"
|
|
128
102
|
]
|
|
129
103
|
},
|
|
130
104
|
"targetHandle": {
|
|
131
|
-
"fieldName": "
|
|
132
|
-
"id": "
|
|
105
|
+
"fieldName": "input_value",
|
|
106
|
+
"id": "LanguageModelComponent-nQYc0",
|
|
133
107
|
"inputTypes": [
|
|
134
108
|
"Message"
|
|
135
109
|
],
|
|
136
|
-
"type": "
|
|
110
|
+
"type": "str"
|
|
137
111
|
}
|
|
138
112
|
},
|
|
139
|
-
"id": "reactflow__edge-
|
|
113
|
+
"id": "reactflow__edge-Prompt-wgAUs{œdataTypeœ:œPromptœ,œidœ:œPrompt-wgAUsœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-LanguageModelComponent-nQYc0{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-nQYc0œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
|
|
140
114
|
"selected": false,
|
|
141
|
-
"source": "
|
|
142
|
-
"sourceHandle": "{œdataTypeœ:
|
|
143
|
-
"target": "
|
|
144
|
-
"targetHandle": "{œfieldNameœ: œ
|
|
115
|
+
"source": "Prompt-wgAUs",
|
|
116
|
+
"sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-wgAUsœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}",
|
|
117
|
+
"target": "LanguageModelComponent-nQYc0",
|
|
118
|
+
"targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œLanguageModelComponent-nQYc0œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}"
|
|
145
119
|
},
|
|
146
120
|
{
|
|
147
121
|
"animated": false,
|
|
148
122
|
"className": "",
|
|
149
123
|
"data": {
|
|
150
124
|
"sourceHandle": {
|
|
151
|
-
"dataType": "
|
|
152
|
-
"id": "
|
|
153
|
-
"name": "
|
|
125
|
+
"dataType": "LanguageModelComponent",
|
|
126
|
+
"id": "LanguageModelComponent-nQYc0",
|
|
127
|
+
"name": "text_output",
|
|
154
128
|
"output_types": [
|
|
155
|
-
"
|
|
129
|
+
"Message"
|
|
156
130
|
]
|
|
157
131
|
},
|
|
158
132
|
"targetHandle": {
|
|
159
|
-
"fieldName": "
|
|
160
|
-
"id": "
|
|
133
|
+
"fieldName": "input_value",
|
|
134
|
+
"id": "ChatOutput-VG394",
|
|
161
135
|
"inputTypes": [
|
|
136
|
+
"Data",
|
|
162
137
|
"DataFrame",
|
|
163
|
-
"
|
|
138
|
+
"Message"
|
|
164
139
|
],
|
|
165
|
-
"type": "
|
|
140
|
+
"type": "str"
|
|
166
141
|
}
|
|
167
142
|
},
|
|
168
|
-
"id": "reactflow__edge-
|
|
143
|
+
"id": "reactflow__edge-LanguageModelComponent-nQYc0{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-nQYc0œ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-VG394{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-VG394œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}",
|
|
169
144
|
"selected": false,
|
|
170
|
-
"source": "
|
|
171
|
-
"sourceHandle": "{œdataTypeœ:
|
|
172
|
-
"target": "
|
|
173
|
-
"targetHandle": "{œfieldNameœ: œ
|
|
145
|
+
"source": "LanguageModelComponent-nQYc0",
|
|
146
|
+
"sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-nQYc0œ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}",
|
|
147
|
+
"target": "ChatOutput-VG394",
|
|
148
|
+
"targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-VG394œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œstrœ}"
|
|
174
149
|
},
|
|
175
150
|
{
|
|
176
151
|
"animated": false,
|
|
@@ -178,7 +153,7 @@
|
|
|
178
153
|
"data": {
|
|
179
154
|
"sourceHandle": {
|
|
180
155
|
"dataType": "SplitText",
|
|
181
|
-
"id": "SplitText-
|
|
156
|
+
"id": "SplitText-50nDI",
|
|
182
157
|
"name": "dataframe",
|
|
183
158
|
"output_types": [
|
|
184
159
|
"DataFrame"
|
|
@@ -186,7 +161,7 @@
|
|
|
186
161
|
},
|
|
187
162
|
"targetHandle": {
|
|
188
163
|
"fieldName": "ingest_data",
|
|
189
|
-
"id": "AstraDB-
|
|
164
|
+
"id": "AstraDB-t8lcj",
|
|
190
165
|
"inputTypes": [
|
|
191
166
|
"Data",
|
|
192
167
|
"DataFrame"
|
|
@@ -194,100 +169,65 @@
|
|
|
194
169
|
"type": "other"
|
|
195
170
|
}
|
|
196
171
|
},
|
|
197
|
-
"id": "reactflow__edge-SplitText-
|
|
172
|
+
"id": "reactflow__edge-SplitText-50nDI{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-50nDIœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-AstraDB-t8lcj{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-t8lcjœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}",
|
|
198
173
|
"selected": false,
|
|
199
|
-
"source": "SplitText-
|
|
200
|
-
"sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-
|
|
201
|
-
"target": "AstraDB-
|
|
202
|
-
"targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-
|
|
174
|
+
"source": "SplitText-50nDI",
|
|
175
|
+
"sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-50nDIœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}",
|
|
176
|
+
"target": "AstraDB-t8lcj",
|
|
177
|
+
"targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-t8lcjœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}"
|
|
203
178
|
},
|
|
204
179
|
{
|
|
205
|
-
"animated": false,
|
|
206
180
|
"className": "",
|
|
207
181
|
"data": {
|
|
208
182
|
"sourceHandle": {
|
|
209
|
-
"dataType": "
|
|
210
|
-
"id": "
|
|
183
|
+
"dataType": "ChatInput",
|
|
184
|
+
"id": "ChatInput-insg9",
|
|
211
185
|
"name": "message",
|
|
212
186
|
"output_types": [
|
|
213
187
|
"Message"
|
|
214
188
|
]
|
|
215
189
|
},
|
|
216
190
|
"targetHandle": {
|
|
217
|
-
"fieldName": "
|
|
218
|
-
"id": "
|
|
219
|
-
"inputTypes": [
|
|
220
|
-
"Data",
|
|
221
|
-
"DataFrame",
|
|
222
|
-
"Message"
|
|
223
|
-
],
|
|
224
|
-
"type": "other"
|
|
225
|
-
}
|
|
226
|
-
},
|
|
227
|
-
"id": "reactflow__edge-File-vusZ2{œdataTypeœ:œFileœ,œidœ:œFile-vusZ2œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-SplitText-6H5cD{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-6H5cDœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
|
|
228
|
-
"selected": false,
|
|
229
|
-
"source": "File-vusZ2",
|
|
230
|
-
"sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-vusZ2œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
|
|
231
|
-
"target": "SplitText-6H5cD",
|
|
232
|
-
"targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-6H5cDœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
|
|
233
|
-
},
|
|
234
|
-
{
|
|
235
|
-
"animated": false,
|
|
236
|
-
"className": "",
|
|
237
|
-
"data": {
|
|
238
|
-
"sourceHandle": {
|
|
239
|
-
"dataType": "Prompt",
|
|
240
|
-
"id": "Prompt-V3tlJ",
|
|
241
|
-
"name": "prompt",
|
|
242
|
-
"output_types": [
|
|
243
|
-
"Message"
|
|
244
|
-
]
|
|
245
|
-
},
|
|
246
|
-
"targetHandle": {
|
|
247
|
-
"fieldName": "input_value",
|
|
248
|
-
"id": "LanguageModelComponent-1uhUK",
|
|
191
|
+
"fieldName": "search_query",
|
|
192
|
+
"id": "AstraDB-CLCyc",
|
|
249
193
|
"inputTypes": [
|
|
250
194
|
"Message"
|
|
251
195
|
],
|
|
252
|
-
"type": "
|
|
196
|
+
"type": "query"
|
|
253
197
|
}
|
|
254
198
|
},
|
|
255
|
-
"id": "
|
|
256
|
-
"
|
|
257
|
-
"
|
|
258
|
-
"
|
|
259
|
-
"
|
|
260
|
-
"targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œLanguageModelComponent-1uhUKœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}"
|
|
199
|
+
"id": "xy-edge__ChatInput-insg9{œdataTypeœ:œChatInputœ,œidœ:œChatInput-insg9œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-CLCyc{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-CLCycœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}",
|
|
200
|
+
"source": "ChatInput-insg9",
|
|
201
|
+
"sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-insg9œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
|
|
202
|
+
"target": "AstraDB-CLCyc",
|
|
203
|
+
"targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-CLCycœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}"
|
|
261
204
|
},
|
|
262
205
|
{
|
|
263
|
-
"animated": false,
|
|
264
206
|
"className": "",
|
|
265
207
|
"data": {
|
|
266
208
|
"sourceHandle": {
|
|
267
|
-
"dataType": "
|
|
268
|
-
"id": "
|
|
269
|
-
"name": "
|
|
209
|
+
"dataType": "AstraDB",
|
|
210
|
+
"id": "AstraDB-CLCyc",
|
|
211
|
+
"name": "dataframe",
|
|
270
212
|
"output_types": [
|
|
271
|
-
"
|
|
213
|
+
"DataFrame"
|
|
272
214
|
]
|
|
273
215
|
},
|
|
274
216
|
"targetHandle": {
|
|
275
|
-
"fieldName": "
|
|
276
|
-
"id": "
|
|
217
|
+
"fieldName": "input_data",
|
|
218
|
+
"id": "parser-gcmKF",
|
|
277
219
|
"inputTypes": [
|
|
278
|
-
"Data",
|
|
279
220
|
"DataFrame",
|
|
280
|
-
"
|
|
221
|
+
"Data"
|
|
281
222
|
],
|
|
282
|
-
"type": "
|
|
223
|
+
"type": "other"
|
|
283
224
|
}
|
|
284
225
|
},
|
|
285
|
-
"id": "
|
|
286
|
-
"
|
|
287
|
-
"
|
|
288
|
-
"
|
|
289
|
-
"
|
|
290
|
-
"targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-ZaYDWœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œstrœ}"
|
|
226
|
+
"id": "xy-edge__AstraDB-CLCyc{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-CLCycœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-parser-gcmKF{œfieldNameœ:œinput_dataœ,œidœ:œparser-gcmKFœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
|
|
227
|
+
"source": "AstraDB-CLCyc",
|
|
228
|
+
"sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-CLCycœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}",
|
|
229
|
+
"target": "parser-gcmKF",
|
|
230
|
+
"targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œparser-gcmKFœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}"
|
|
291
231
|
}
|
|
292
232
|
],
|
|
293
233
|
"nodes": [
|
|
@@ -295,7 +235,7 @@
|
|
|
295
235
|
"data": {
|
|
296
236
|
"description": "Get chat inputs from the Playground.",
|
|
297
237
|
"display_name": "Chat Input",
|
|
298
|
-
"id": "ChatInput-
|
|
238
|
+
"id": "ChatInput-insg9",
|
|
299
239
|
"node": {
|
|
300
240
|
"base_classes": [
|
|
301
241
|
"Message"
|
|
@@ -574,7 +514,7 @@
|
|
|
574
514
|
},
|
|
575
515
|
"dragging": false,
|
|
576
516
|
"height": 234,
|
|
577
|
-
"id": "ChatInput-
|
|
517
|
+
"id": "ChatInput-insg9",
|
|
578
518
|
"measured": {
|
|
579
519
|
"height": 234,
|
|
580
520
|
"width": 320
|
|
@@ -595,7 +535,7 @@
|
|
|
595
535
|
"data": {
|
|
596
536
|
"description": "Create a prompt template with dynamic variables.",
|
|
597
537
|
"display_name": "Prompt",
|
|
598
|
-
"id": "Prompt-
|
|
538
|
+
"id": "Prompt-wgAUs",
|
|
599
539
|
"node": {
|
|
600
540
|
"base_classes": [
|
|
601
541
|
"Message"
|
|
@@ -759,7 +699,7 @@
|
|
|
759
699
|
},
|
|
760
700
|
"dragging": false,
|
|
761
701
|
"height": 433,
|
|
762
|
-
"id": "Prompt-
|
|
702
|
+
"id": "Prompt-wgAUs",
|
|
763
703
|
"measured": {
|
|
764
704
|
"height": 433,
|
|
765
705
|
"width": 320
|
|
@@ -780,7 +720,7 @@
|
|
|
780
720
|
"data": {
|
|
781
721
|
"description": "Split text into chunks based on specified criteria.",
|
|
782
722
|
"display_name": "Split Text",
|
|
783
|
-
"id": "SplitText-
|
|
723
|
+
"id": "SplitText-50nDI",
|
|
784
724
|
"node": {
|
|
785
725
|
"base_classes": [
|
|
786
726
|
"Data"
|
|
@@ -982,7 +922,7 @@
|
|
|
982
922
|
},
|
|
983
923
|
"dragging": false,
|
|
984
924
|
"height": 475,
|
|
985
|
-
"id": "SplitText-
|
|
925
|
+
"id": "SplitText-50nDI",
|
|
986
926
|
"measured": {
|
|
987
927
|
"height": 475,
|
|
988
928
|
"width": 320
|
|
@@ -1001,7 +941,7 @@
|
|
|
1001
941
|
},
|
|
1002
942
|
{
|
|
1003
943
|
"data": {
|
|
1004
|
-
"id": "note-
|
|
944
|
+
"id": "note-0MDGm",
|
|
1005
945
|
"node": {
|
|
1006
946
|
"description": "## 🐕 2. Retriever Flow\n\nThis flow answers your questions with contextual data retrieved from your vector database.\n\nOpen the **Playground** and ask, \n\n```\nWhat is this document about?\n```\n",
|
|
1007
947
|
"display_name": "",
|
|
@@ -1014,7 +954,7 @@
|
|
|
1014
954
|
},
|
|
1015
955
|
"dragging": false,
|
|
1016
956
|
"height": 324,
|
|
1017
|
-
"id": "note-
|
|
957
|
+
"id": "note-0MDGm",
|
|
1018
958
|
"measured": {
|
|
1019
959
|
"height": 324,
|
|
1020
960
|
"width": 324
|
|
@@ -1038,7 +978,7 @@
|
|
|
1038
978
|
},
|
|
1039
979
|
{
|
|
1040
980
|
"data": {
|
|
1041
|
-
"id": "note-
|
|
981
|
+
"id": "note-mSH4A",
|
|
1042
982
|
"node": {
|
|
1043
983
|
"description": "Retrieval Augmented Generation (RAG) is a way of providing additional context to a Large Language Model (LLM) by preloading a vector database with embeddings for relevant content. When a user chats with the LLM, a _similarity search_ retrieves relevant content by comparing an embedding for the user's query against the embeddings in the vector database.\nFor example, a RAG chatbot could be pre-loaded with product data, and then it can help customers find specific products based on their queries.\nThis template has two sub-flows. One flow loads data into your vector store, and the other is the user-driven chat flow that compares a new query against the existing content in your vector database.\n\n## Quickstart\n1. Add your OpenAI API key to the **Language Model** component and the two **Embeddings** components.\n2. Add an Astra application token to the **Astra DB** vector store components, or replace these components with other vector store components available in the **Components** menu.\n**💡 Store your credentials as Langflow global variables 🌐 to simplify token management and reuse in your flows.**\n\n## Run the flows\n1. Load your data into a vector database with the 📚 **Load Data** flow. Select a file to upload in the **File** component, and then click **Play** ▶️ on the **Astra DB** component to run the **Load Data** flow.\n2. Open the **Playground** to start a chat with the 🐕 **Retriever** flow.\n\nOnly the run the **Load Data** flow when you need to populate your vector database with baseline content, such as product data.\nThe **Retriever** flow is the user-facing chat flow. This flow generates an embedding from chat input, runs a similarity search against the vector database to retrieve relevant content, and then passes the original query and the retrieved content to the LLM, which produces the chat response sent to the user.\n\n## Next steps\nExperiment by changing the prompt and the loaded data to see how the LLM's responses change.",
|
|
1044
984
|
"display_name": "Read Me",
|
|
@@ -1051,7 +991,7 @@
|
|
|
1051
991
|
},
|
|
1052
992
|
"dragging": false,
|
|
1053
993
|
"height": 556,
|
|
1054
|
-
"id": "note-
|
|
994
|
+
"id": "note-mSH4A",
|
|
1055
995
|
"measured": {
|
|
1056
996
|
"height": 556,
|
|
1057
997
|
"width": 389
|
|
@@ -1077,7 +1017,7 @@
|
|
|
1077
1017
|
"data": {
|
|
1078
1018
|
"description": "Display a chat message in the Playground.",
|
|
1079
1019
|
"display_name": "Chat Output",
|
|
1080
|
-
"id": "ChatOutput-
|
|
1020
|
+
"id": "ChatOutput-VG394",
|
|
1081
1021
|
"node": {
|
|
1082
1022
|
"base_classes": [
|
|
1083
1023
|
"Message"
|
|
@@ -1380,7 +1320,7 @@
|
|
|
1380
1320
|
},
|
|
1381
1321
|
"dragging": false,
|
|
1382
1322
|
"height": 234,
|
|
1383
|
-
"id": "ChatOutput-
|
|
1323
|
+
"id": "ChatOutput-VG394",
|
|
1384
1324
|
"measured": {
|
|
1385
1325
|
"height": 234,
|
|
1386
1326
|
"width": 320
|
|
@@ -1399,7 +1339,7 @@
|
|
|
1399
1339
|
},
|
|
1400
1340
|
{
|
|
1401
1341
|
"data": {
|
|
1402
|
-
"id": "OpenAIEmbeddings-
|
|
1342
|
+
"id": "OpenAIEmbeddings-y2ymc",
|
|
1403
1343
|
"node": {
|
|
1404
1344
|
"base_classes": [
|
|
1405
1345
|
"Embeddings"
|
|
@@ -1892,7 +1832,7 @@
|
|
|
1892
1832
|
},
|
|
1893
1833
|
"dragging": false,
|
|
1894
1834
|
"height": 320,
|
|
1895
|
-
"id": "OpenAIEmbeddings-
|
|
1835
|
+
"id": "OpenAIEmbeddings-y2ymc",
|
|
1896
1836
|
"measured": {
|
|
1897
1837
|
"height": 320,
|
|
1898
1838
|
"width": 320
|
|
@@ -1911,7 +1851,7 @@
|
|
|
1911
1851
|
},
|
|
1912
1852
|
{
|
|
1913
1853
|
"data": {
|
|
1914
|
-
"id": "note-
|
|
1854
|
+
"id": "note-8ieVo",
|
|
1915
1855
|
"node": {
|
|
1916
1856
|
"description": "## 📚 1. Load Data Flow\n\nRun this first! Load data from a local file and embed it into the vector database.\n\nSelect a Database and a Collection, or create new ones. \n\nClick **Run component** on the **Astra DB** component to load your data.\n\n\n### Next steps:\n Experiment by changing the prompt and the contextual data to see how the retrieval flow's responses change.",
|
|
1917
1857
|
"display_name": "",
|
|
@@ -1924,7 +1864,7 @@
|
|
|
1924
1864
|
},
|
|
1925
1865
|
"dragging": false,
|
|
1926
1866
|
"height": 460,
|
|
1927
|
-
"id": "note-
|
|
1867
|
+
"id": "note-8ieVo",
|
|
1928
1868
|
"measured": {
|
|
1929
1869
|
"height": 460,
|
|
1930
1870
|
"width": 340
|
|
@@ -1948,7 +1888,7 @@
|
|
|
1948
1888
|
},
|
|
1949
1889
|
{
|
|
1950
1890
|
"data": {
|
|
1951
|
-
"id": "OpenAIEmbeddings-
|
|
1891
|
+
"id": "OpenAIEmbeddings-cusQX",
|
|
1952
1892
|
"node": {
|
|
1953
1893
|
"base_classes": [
|
|
1954
1894
|
"Embeddings"
|
|
@@ -2441,7 +2381,7 @@
|
|
|
2441
2381
|
},
|
|
2442
2382
|
"dragging": false,
|
|
2443
2383
|
"height": 320,
|
|
2444
|
-
"id": "OpenAIEmbeddings-
|
|
2384
|
+
"id": "OpenAIEmbeddings-cusQX",
|
|
2445
2385
|
"measured": {
|
|
2446
2386
|
"height": 320,
|
|
2447
2387
|
"width": 320
|
|
@@ -2460,7 +2400,7 @@
|
|
|
2460
2400
|
},
|
|
2461
2401
|
{
|
|
2462
2402
|
"data": {
|
|
2463
|
-
"id": "note-
|
|
2403
|
+
"id": "note-Sk1It",
|
|
2464
2404
|
"node": {
|
|
2465
2405
|
"description": "### 💡 Add your OpenAI API key here 👇",
|
|
2466
2406
|
"display_name": "",
|
|
@@ -2473,7 +2413,7 @@
|
|
|
2473
2413
|
},
|
|
2474
2414
|
"dragging": false,
|
|
2475
2415
|
"height": 324,
|
|
2476
|
-
"id": "note-
|
|
2416
|
+
"id": "note-Sk1It",
|
|
2477
2417
|
"measured": {
|
|
2478
2418
|
"height": 324,
|
|
2479
2419
|
"width": 324
|
|
@@ -2492,7 +2432,7 @@
|
|
|
2492
2432
|
},
|
|
2493
2433
|
{
|
|
2494
2434
|
"data": {
|
|
2495
|
-
"id": "note-
|
|
2435
|
+
"id": "note-qmLAt",
|
|
2496
2436
|
"node": {
|
|
2497
2437
|
"description": "### 💡 Add your OpenAI API key here 👇",
|
|
2498
2438
|
"display_name": "",
|
|
@@ -2505,7 +2445,7 @@
|
|
|
2505
2445
|
},
|
|
2506
2446
|
"dragging": false,
|
|
2507
2447
|
"height": 324,
|
|
2508
|
-
"id": "note-
|
|
2448
|
+
"id": "note-qmLAt",
|
|
2509
2449
|
"measured": {
|
|
2510
2450
|
"height": 324,
|
|
2511
2451
|
"width": 324
|
|
@@ -2524,7 +2464,7 @@
|
|
|
2524
2464
|
},
|
|
2525
2465
|
{
|
|
2526
2466
|
"data": {
|
|
2527
|
-
"id": "note-
|
|
2467
|
+
"id": "note-UCSuM",
|
|
2528
2468
|
"node": {
|
|
2529
2469
|
"description": "### 💡 Add your OpenAI API key here 👇",
|
|
2530
2470
|
"display_name": "",
|
|
@@ -2537,7 +2477,7 @@
|
|
|
2537
2477
|
},
|
|
2538
2478
|
"dragging": false,
|
|
2539
2479
|
"height": 324,
|
|
2540
|
-
"id": "note-
|
|
2480
|
+
"id": "note-UCSuM",
|
|
2541
2481
|
"measured": {
|
|
2542
2482
|
"height": 324,
|
|
2543
2483
|
"width": 324
|
|
@@ -2556,7 +2496,7 @@
|
|
|
2556
2496
|
},
|
|
2557
2497
|
{
|
|
2558
2498
|
"data": {
|
|
2559
|
-
"id": "parser-
|
|
2499
|
+
"id": "parser-gcmKF",
|
|
2560
2500
|
"node": {
|
|
2561
2501
|
"base_classes": [
|
|
2562
2502
|
"Message"
|
|
@@ -2718,7 +2658,7 @@
|
|
|
2718
2658
|
"type": "parser"
|
|
2719
2659
|
},
|
|
2720
2660
|
"dragging": false,
|
|
2721
|
-
"id": "parser-
|
|
2661
|
+
"id": "parser-gcmKF",
|
|
2722
2662
|
"measured": {
|
|
2723
2663
|
"height": 361,
|
|
2724
2664
|
"width": 320
|
|
@@ -2732,46 +2672,32 @@
|
|
|
2732
2672
|
},
|
|
2733
2673
|
{
|
|
2734
2674
|
"data": {
|
|
2735
|
-
"id": "
|
|
2675
|
+
"id": "File-wqFzl",
|
|
2736
2676
|
"node": {
|
|
2737
2677
|
"base_classes": [
|
|
2738
|
-
"
|
|
2739
|
-
"DataFrame",
|
|
2740
|
-
"VectorStore"
|
|
2678
|
+
"Message"
|
|
2741
2679
|
],
|
|
2742
2680
|
"beta": false,
|
|
2743
2681
|
"conditional_paths": [],
|
|
2744
2682
|
"custom_fields": {},
|
|
2745
|
-
"description": "
|
|
2746
|
-
"display_name": "
|
|
2747
|
-
"documentation": "
|
|
2683
|
+
"description": "Loads content from one or more files as a DataFrame.",
|
|
2684
|
+
"display_name": "File",
|
|
2685
|
+
"documentation": "",
|
|
2748
2686
|
"edited": false,
|
|
2749
2687
|
"field_order": [
|
|
2750
|
-
"
|
|
2751
|
-
"
|
|
2752
|
-
"
|
|
2753
|
-
"
|
|
2754
|
-
"
|
|
2755
|
-
"
|
|
2756
|
-
"
|
|
2757
|
-
"
|
|
2758
|
-
"
|
|
2759
|
-
"should_cache_vector_store",
|
|
2760
|
-
"search_method",
|
|
2761
|
-
"reranker",
|
|
2762
|
-
"lexical_terms",
|
|
2763
|
-
"number_of_results",
|
|
2764
|
-
"search_type",
|
|
2765
|
-
"search_score_threshold",
|
|
2766
|
-
"advanced_search_filter",
|
|
2767
|
-
"autodetect_collection",
|
|
2768
|
-
"content_field",
|
|
2769
|
-
"deletion_field",
|
|
2770
|
-
"ignore_invalid_documents",
|
|
2771
|
-
"astradb_vectorstore_kwargs"
|
|
2688
|
+
"path",
|
|
2689
|
+
"file_path",
|
|
2690
|
+
"separator",
|
|
2691
|
+
"silent_errors",
|
|
2692
|
+
"delete_server_file_after_processing",
|
|
2693
|
+
"ignore_unsupported_extensions",
|
|
2694
|
+
"ignore_unspecified_files",
|
|
2695
|
+
"use_multithreading",
|
|
2696
|
+
"concurrency_multithreading"
|
|
2772
2697
|
],
|
|
2773
2698
|
"frozen": false,
|
|
2774
|
-
"icon": "
|
|
2699
|
+
"icon": "file-text",
|
|
2700
|
+
"last_updated": "2025-08-27T14:19:16.203Z",
|
|
2775
2701
|
"legacy": false,
|
|
2776
2702
|
"metadata": {
|
|
2777
2703
|
"code_hash": "23fbe9daca09",
|
|
@@ -2804,471 +2730,125 @@
|
|
|
2804
2730
|
{
|
|
2805
2731
|
"allows_loop": false,
|
|
2806
2732
|
"cache": true,
|
|
2807
|
-
"display_name": "
|
|
2808
|
-
"group_outputs": false,
|
|
2809
|
-
"method": "search_documents",
|
|
2810
|
-
"name": "search_results",
|
|
2811
|
-
"selected": "Data",
|
|
2812
|
-
"tool_mode": true,
|
|
2813
|
-
"types": [
|
|
2814
|
-
"Data"
|
|
2815
|
-
],
|
|
2816
|
-
"value": "__UNDEFINED__"
|
|
2817
|
-
},
|
|
2818
|
-
{
|
|
2819
|
-
"allows_loop": false,
|
|
2820
|
-
"cache": true,
|
|
2821
|
-
"display_name": "DataFrame",
|
|
2822
|
-
"group_outputs": false,
|
|
2823
|
-
"method": "as_dataframe",
|
|
2824
|
-
"name": "dataframe",
|
|
2825
|
-
"selected": "DataFrame",
|
|
2826
|
-
"tool_mode": true,
|
|
2827
|
-
"types": [
|
|
2828
|
-
"DataFrame"
|
|
2829
|
-
],
|
|
2830
|
-
"value": "__UNDEFINED__"
|
|
2831
|
-
},
|
|
2832
|
-
{
|
|
2833
|
-
"allows_loop": false,
|
|
2834
|
-
"cache": true,
|
|
2835
|
-
"display_name": "Vector Store Connection",
|
|
2733
|
+
"display_name": "Raw Content",
|
|
2836
2734
|
"group_outputs": false,
|
|
2837
|
-
"
|
|
2838
|
-
"
|
|
2839
|
-
"
|
|
2840
|
-
"
|
|
2735
|
+
"method": "load_files_message",
|
|
2736
|
+
"name": "message",
|
|
2737
|
+
"options": null,
|
|
2738
|
+
"required_inputs": null,
|
|
2739
|
+
"selected": "Message",
|
|
2841
2740
|
"tool_mode": true,
|
|
2842
2741
|
"types": [
|
|
2843
|
-
"
|
|
2742
|
+
"Message"
|
|
2844
2743
|
],
|
|
2845
2744
|
"value": "__UNDEFINED__"
|
|
2846
2745
|
}
|
|
2847
2746
|
],
|
|
2848
2747
|
"pinned": false,
|
|
2849
|
-
"selected_output": "dataframe",
|
|
2850
2748
|
"template": {
|
|
2851
2749
|
"_type": "Component",
|
|
2852
|
-
"
|
|
2853
|
-
"_input_type": "NestedDictInput",
|
|
2750
|
+
"code": {
|
|
2854
2751
|
"advanced": true,
|
|
2855
|
-
"
|
|
2856
|
-
"
|
|
2857
|
-
"
|
|
2752
|
+
"dynamic": true,
|
|
2753
|
+
"fileTypes": [],
|
|
2754
|
+
"file_path": "",
|
|
2755
|
+
"info": "",
|
|
2858
2756
|
"list": false,
|
|
2859
|
-
"
|
|
2860
|
-
"
|
|
2757
|
+
"load_from_db": false,
|
|
2758
|
+
"multiline": true,
|
|
2759
|
+
"name": "code",
|
|
2760
|
+
"password": false,
|
|
2861
2761
|
"placeholder": "",
|
|
2862
|
-
"required":
|
|
2762
|
+
"required": true,
|
|
2863
2763
|
"show": true,
|
|
2864
2764
|
"title_case": false,
|
|
2865
|
-
"
|
|
2866
|
-
"trace_as_input": true,
|
|
2867
|
-
"trace_as_metadata": true,
|
|
2868
|
-
"type": "NestedDict",
|
|
2869
|
-
"value": {}
|
|
2765
|
+
"type": "code",
|
|
2766
|
+
"value": "\"\"\"Enhanced file component with clearer structure and Docling isolation.\n\nNotes:\n-----\n- Functionality is preserved with minimal behavioral changes.\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom typing import TYPE_CHECKING, Any\n\nfrom langflow.base.data.base_file import BaseFileComponent\nfrom langflow.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n FileInput,\n IntInput,\n MessageTextInput,\n Output,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.message import Message\n\nif TYPE_CHECKING:\n from langflow.schema import DataFrame\n\n\nclass FileComponent(BaseFileComponent):\n \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n display_name = \"File\"\n description = \"Loads content from files with optional advanced document processing and export using Docling.\"\n documentation: str = \"https://docs.langflow.org/components-data#file\"\n icon = \"file-text\"\n name = \"File\"\n\n # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n VALID_EXTENSIONS = [\n \"adoc\",\n \"asciidoc\",\n \"asc\",\n \"bmp\",\n \"csv\",\n \"dotx\",\n \"dotm\",\n \"docm\",\n \"docx\",\n \"htm\",\n \"html\",\n \"jpeg\",\n \"json\",\n \"md\",\n \"pdf\",\n \"png\",\n \"potx\",\n \"ppsx\",\n \"pptm\",\n \"potm\",\n \"ppsm\",\n \"pptx\",\n \"tiff\",\n \"txt\",\n \"xls\",\n \"xlsx\",\n \"xhtml\",\n \"xml\",\n \"webp\",\n *TEXT_FILE_TYPES,\n ]\n\n # Fixed export settings used when markdown export is requested.\n EXPORT_FORMAT = \"Markdown\"\n IMAGE_MODE = \"placeholder\"\n\n # ---- Inputs / Outputs (kept as close to original as possible) -------------------\n _base_inputs = deepcopy(BaseFileComponent._base_inputs)\n for input_item in _base_inputs:\n if isinstance(input_item, FileInput) and input_item.name == \"path\":\n input_item.real_time_refresh = True\n break\n\n inputs = [\n *_base_inputs,\n BoolInput(\n name=\"advanced_mode\",\n display_name=\"Advanced Parser\",\n value=False,\n real_time_refresh=True,\n info=(\n \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n \"Available only for single file processing.\"\n ),\n show=False,\n ),\n DropdownInput(\n name=\"pipeline\",\n display_name=\"Pipeline\",\n info=\"Docling pipeline to use\",\n options=[\"standard\", \"vlm\"],\n value=\"standard\",\n advanced=True,\n ),\n DropdownInput(\n name=\"ocr_engine\",\n display_name=\"OCR Engine\",\n info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n options=[\"\", \"easyocr\"],\n value=\"\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"md_image_placeholder\",\n display_name=\"Image placeholder\",\n info=\"Specify the image placeholder for markdown exports.\",\n value=\"<!-- image -->\",\n advanced=True,\n show=False,\n ),\n StrInput(\n name=\"md_page_break_placeholder\",\n display_name=\"Page break placeholder\",\n info=\"Add this placeholder between pages in the markdown output.\",\n value=\"\",\n advanced=True,\n show=False,\n ),\n MessageTextInput(\n name=\"doc_key\",\n display_name=\"Doc Key\",\n info=\"The key to use for the DoclingDocument column.\",\n value=\"doc\",\n advanced=True,\n show=False,\n ),\n # Deprecated input retained for backward-compatibility.\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"[Deprecated] Use Multithreading\",\n advanced=True,\n value=True,\n info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"When multiple files are being processed, the number of files to process concurrently.\",\n value=1,\n ),\n BoolInput(\n name=\"markdown\",\n display_name=\"Markdown Export\",\n info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n value=False,\n show=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\"),\n ]\n\n # ------------------------------ UI helpers --------------------------------------\n\n def _path_value(self, template: dict) -> list[str]:\n \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n return template.get(\"path\", {}).get(\"file_path\", [])\n\n def update_build_config(\n self,\n build_config: dict[str, Any],\n field_value: Any,\n field_name: str | None = None,\n ) -> dict[str, Any]:\n \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n if field_name == \"path\":\n paths = self._path_value(build_config)\n file_path = paths[0] if paths else \"\"\n file_count = len(field_value) if field_value else 0\n\n # Advanced mode only for single (non-tabular) file\n allow_advanced = file_count == 1 and not file_path.endswith((\".csv\", \".xlsx\", \".parquet\"))\n build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n if not allow_advanced:\n build_config[\"advanced_mode\"][\"value\"] = False\n for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n if f in build_config:\n build_config[f][\"show\"] = False\n\n elif field_name == \"advanced_mode\":\n for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n if f in build_config:\n build_config[f][\"show\"] = bool(field_value)\n\n return build_config\n\n def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]: # noqa: ARG002\n \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n if field_name not in [\"path\", \"advanced_mode\"]:\n return frontend_node\n\n template = frontend_node.get(\"template\", {})\n paths = self._path_value(template)\n if not paths:\n return frontend_node\n\n frontend_node[\"outputs\"] = []\n if len(paths) == 1:\n file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"dataframe\", method=\"load_files_structured\"),\n )\n elif file_path.endswith(\".json\"):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\"),\n )\n\n advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n if advanced_mode:\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Output\", name=\"advanced\", method=\"load_files_advanced\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Markdown\", name=\"markdown\", method=\"load_files_markdown\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\"),\n )\n else:\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\"),\n )\n else:\n # Multiple files => DataFrame output; advanced parser disabled\n frontend_node[\"outputs\"].append(Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\"))\n\n return frontend_node\n\n # ------------------------------ Core processing ----------------------------------\n\n def _is_docling_compatible(self, file_path: str) -> bool:\n \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n docling_exts = (\n \".adoc\",\n \".asciidoc\",\n \".asc\",\n \".bmp\",\n \".csv\",\n \".dotx\",\n \".dotm\",\n \".docm\",\n \".docx\",\n \".htm\",\n \".html\",\n \".jpeg\",\n \".json\",\n \".md\",\n \".pdf\",\n \".png\",\n \".potx\",\n \".ppsx\",\n \".pptm\",\n \".potm\",\n \".ppsm\",\n \".pptx\",\n \".tiff\",\n \".txt\",\n \".xls\",\n \".xlsx\",\n \".xhtml\",\n \".xml\",\n \".webp\",\n )\n return file_path.lower().endswith(docling_exts)\n\n def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n passing JSON config via stdin. The child prints a JSON result to stdout.\n \"\"\"\n if not file_path:\n return None\n\n args: dict[str, Any] = {\n \"file_path\": file_path,\n \"markdown\": bool(self.markdown),\n \"image_mode\": str(self.IMAGE_MODE),\n \"md_image_placeholder\": str(self.md_image_placeholder),\n \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n \"pipeline\": str(self.pipeline),\n \"ocr_engine\": str(self.ocr_engine) if getattr(self, \"ocr_engine\", \"\") else None,\n }\n\n # The child is a tiny, self-contained script to keep memory/state isolated.\n child_script = textwrap.dedent(\n r\"\"\"\n import json, sys\n\n def try_imports():\n # Strategy 1: latest layout\n try:\n from docling.datamodel.base_models import ConversionStatus, InputFormat # type: ignore\n from docling.document_converter import DocumentConverter # type: ignore\n from docling_core.types.doc import ImageRefMode # type: ignore\n return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n except Exception:\n pass\n # Strategy 2: alternative layout\n try:\n from docling.document_converter import DocumentConverter # type: ignore\n try:\n from docling_core.types import ConversionStatus, InputFormat # type: ignore\n except Exception:\n try:\n from docling.datamodel import ConversionStatus, InputFormat # type: ignore\n except Exception:\n class ConversionStatus: SUCCESS = \"success\"\n class InputFormat:\n PDF=\"pdf\"; IMAGE=\"image\"\n try:\n from docling_core.types.doc import ImageRefMode # type: ignore\n except Exception:\n class ImageRefMode:\n PLACEHOLDER=\"placeholder\"; EMBEDDED=\"embedded\"\n return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"alternative\"\n except Exception:\n pass\n # Strategy 3: basic converter only\n try:\n from docling.document_converter import DocumentConverter # type: ignore\n class ConversionStatus: SUCCESS = \"success\"\n class InputFormat:\n PDF=\"pdf\"; IMAGE=\"image\"\n class ImageRefMode:\n PLACEHOLDER=\"placeholder\"; EMBEDDED=\"embedded\"\n return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"basic\"\n except Exception as e:\n raise ImportError(f\"Docling imports failed: {e}\") from e\n\n def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n if strategy == \"latest\" and pipeline == \"standard\":\n try:\n from docling.datamodel.pipeline_options import PdfPipelineOptions # type: ignore\n from docling.document_converter import PdfFormatOption # type: ignore\n pipe = PdfPipelineOptions()\n if ocr_engine:\n try:\n from docling.models.factories import get_ocr_factory # type: ignore\n pipe.do_ocr = True\n fac = get_ocr_factory(allow_external_plugins=False)\n pipe.ocr_options = fac.create_options(kind=ocr_engine)\n except Exception:\n pipe.do_ocr = False\n fmt = {}\n if hasattr(input_format, \"PDF\"):\n fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n if hasattr(input_format, \"IMAGE\"):\n fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n return DocumentConverter(format_options=fmt)\n except Exception:\n return DocumentConverter()\n return DocumentConverter()\n\n def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n try:\n mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n return document.export_to_markdown(\n image_mode=mode,\n image_placeholder=img_ph,\n page_break_placeholder=pg_ph,\n )\n except Exception:\n try:\n return document.export_to_text()\n except Exception:\n return str(document)\n\n def to_rows(doc_dict):\n rows = []\n for t in doc_dict.get(\"texts\", []):\n prov = t.get(\"prov\") or []\n page_no = None\n if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n page_no = prov[0].get(\"page_no\")\n rows.append({\n \"page_no\": page_no,\n \"label\": t.get(\"label\"),\n \"text\": t.get(\"text\"),\n \"level\": t.get(\"level\"),\n })\n return rows\n\n def main():\n cfg = json.loads(sys.stdin.read())\n file_path = cfg[\"file_path\"]\n markdown = cfg[\"markdown\"]\n image_mode = cfg[\"image_mode\"]\n img_ph = cfg[\"md_image_placeholder\"]\n pg_ph = cfg[\"md_page_break_placeholder\"]\n pipeline = cfg[\"pipeline\"]\n ocr_engine = cfg.get(\"ocr_engine\")\n meta = {\"file_path\": file_path}\n\n try:\n ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n try:\n res = converter.convert(file_path)\n except Exception as e:\n print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n return\n\n ok = False\n if hasattr(res, \"status\"):\n try:\n ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n except Exception:\n ok = (str(res.status).lower() == \"success\")\n if not ok and hasattr(res, \"document\"):\n ok = getattr(res, \"document\", None) is not None\n if not ok:\n print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n return\n\n doc = getattr(res, \"document\", None)\n if doc is None:\n print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n return\n\n if markdown:\n text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n return\n\n # structured\n try:\n doc_dict = doc.export_to_dict()\n except Exception as e:\n print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n return\n\n rows = to_rows(doc_dict)\n print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n except Exception as e:\n print(\n json.dumps({\n \"ok\": False,\n \"error\": f\"Docling processing error: {e}\",\n \"meta\": {\"file_path\": file_path},\n })\n )\n\n if __name__ == \"__main__\":\n main()\n \"\"\"\n )\n\n # Validate file_path to avoid command injection or unsafe input\n if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n proc = subprocess.run( # noqa: S603\n [sys.executable, \"-u\", \"-c\", child_script],\n input=json.dumps(args).encode(\"utf-8\"),\n capture_output=True,\n check=False,\n )\n\n if not proc.stdout:\n err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") or \"no output from child process\"\n return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": file_path})\n\n try:\n result = json.loads(proc.stdout.decode(\"utf-8\"))\n except Exception as e: # noqa: BLE001\n err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n return Data(\n data={\"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\", \"file_path\": file_path},\n )\n\n if not result.get(\"ok\"):\n return Data(data={\"error\": result.get(\"error\", \"Unknown Docling error\"), **result.get(\"meta\", {})})\n\n meta = result.get(\"meta\", {})\n if result.get(\"mode\") == \"markdown\":\n exported_content = str(result.get(\"text\", \"\"))\n return Data(\n text=exported_content,\n data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n )\n\n rows = list(result.get(\"doc\", []))\n return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n def process_files(\n self,\n file_list: list[BaseFileComponent.BaseFile],\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process input files.\n\n - Single file + advanced_mode => Docling in a separate process.\n - Otherwise => standard parsing in current process (optionally threaded).\n \"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n try:\n return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n except FileNotFoundError as e:\n self.log(f\"File not found: {file_path}. Error: {e}\")\n if not silent_errors:\n raise\n return None\n except Exception as e:\n self.log(f\"Unexpected error processing {file_path}: {e}\")\n if not silent_errors:\n raise\n return None\n\n # Advanced path: only for a single Docling-compatible file\n if len(file_list) == 1:\n file_path = str(file_list[0].path)\n if self.advanced_mode and self._is_docling_compatible(file_path):\n advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n # --- UNNEST: expand each element in `doc` to its own Data row\n payload = getattr(advanced_data, \"data\", {}) or {}\n doc_rows = payload.get(\"doc\")\n if isinstance(doc_rows, list):\n rows: list[Data | None] = [\n Data(\n data={\n \"file_path\": file_path,\n **(item if isinstance(item, dict) else {\"value\": item}),\n },\n )\n for item in doc_rows\n ]\n return self.rollup_data(file_list, rows)\n\n # If not structured, keep as-is (e.g., markdown export or error dict)\n return self.rollup_data(file_list, [advanced_data])\n\n # Standard multi-file (or single non-advanced) path\n concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n file_paths = [str(f.path) for f in file_list]\n self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n my_data = parallel_load_data(\n file_paths,\n silent_errors=self.silent_errors,\n load_function=process_file_standard,\n max_concurrency=concurrency,\n )\n return self.rollup_data(file_list, my_data)\n\n # ------------------------------ Output helpers -----------------------------------\n\n def load_files_advanced(self) -> DataFrame:\n \"\"\"Load files using advanced Docling processing and export to an advanced format.\"\"\"\n self.markdown = False\n return self.load_files()\n\n def load_files_markdown(self) -> Message:\n \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n self.markdown = True\n result = self.load_files()\n return Message(text=str(result.text[0]))\n"
|
|
2870
2767
|
},
|
|
2871
|
-
"
|
|
2872
|
-
"_input_type": "
|
|
2873
|
-
"advanced":
|
|
2874
|
-
"display_name": "
|
|
2768
|
+
"concurrency_multithreading": {
|
|
2769
|
+
"_input_type": "IntInput",
|
|
2770
|
+
"advanced": true,
|
|
2771
|
+
"display_name": "Processing Concurrency",
|
|
2875
2772
|
"dynamic": false,
|
|
2876
|
-
"info": "
|
|
2773
|
+
"info": "When multiple files are being processed, the number of files to process concurrently.",
|
|
2877
2774
|
"list": false,
|
|
2878
2775
|
"list_add_label": "Add More",
|
|
2879
|
-
"
|
|
2880
|
-
"name": "api_endpoint",
|
|
2776
|
+
"name": "concurrency_multithreading",
|
|
2881
2777
|
"placeholder": "",
|
|
2882
2778
|
"required": false,
|
|
2883
|
-
"show":
|
|
2779
|
+
"show": true,
|
|
2884
2780
|
"title_case": false,
|
|
2885
2781
|
"tool_mode": false,
|
|
2886
2782
|
"trace_as_metadata": true,
|
|
2887
|
-
"type": "
|
|
2888
|
-
"value":
|
|
2783
|
+
"type": "int",
|
|
2784
|
+
"value": 1
|
|
2889
2785
|
},
|
|
2890
|
-
"
|
|
2891
|
-
"_input_type": "
|
|
2786
|
+
"delete_server_file_after_processing": {
|
|
2787
|
+
"_input_type": "BoolInput",
|
|
2892
2788
|
"advanced": true,
|
|
2893
|
-
"display_name": "
|
|
2789
|
+
"display_name": "Delete Server File After Processing",
|
|
2894
2790
|
"dynamic": false,
|
|
2895
|
-
"info": "
|
|
2791
|
+
"info": "If true, the Server File Path will be deleted after processing.",
|
|
2896
2792
|
"list": false,
|
|
2897
2793
|
"list_add_label": "Add More",
|
|
2898
|
-
"name": "
|
|
2794
|
+
"name": "delete_server_file_after_processing",
|
|
2899
2795
|
"placeholder": "",
|
|
2900
2796
|
"required": false,
|
|
2901
2797
|
"show": true,
|
|
2902
2798
|
"title_case": false,
|
|
2903
2799
|
"tool_mode": false,
|
|
2904
|
-
"trace_as_input": true,
|
|
2905
2800
|
"trace_as_metadata": true,
|
|
2906
|
-
"type": "
|
|
2907
|
-
"value":
|
|
2801
|
+
"type": "bool",
|
|
2802
|
+
"value": true
|
|
2908
2803
|
},
|
|
2909
|
-
"
|
|
2910
|
-
"_input_type": "
|
|
2804
|
+
"file_path": {
|
|
2805
|
+
"_input_type": "HandleInput",
|
|
2911
2806
|
"advanced": true,
|
|
2912
|
-
"display_name": "
|
|
2807
|
+
"display_name": "Server File Path",
|
|
2913
2808
|
"dynamic": false,
|
|
2914
|
-
"info": "
|
|
2915
|
-
"
|
|
2809
|
+
"info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.",
|
|
2810
|
+
"input_types": [
|
|
2811
|
+
"Data",
|
|
2812
|
+
"Message"
|
|
2813
|
+
],
|
|
2814
|
+
"list": true,
|
|
2916
2815
|
"list_add_label": "Add More",
|
|
2917
|
-
"name": "
|
|
2816
|
+
"name": "file_path",
|
|
2918
2817
|
"placeholder": "",
|
|
2919
2818
|
"required": false,
|
|
2920
2819
|
"show": true,
|
|
2921
2820
|
"title_case": false,
|
|
2922
|
-
"tool_mode": false,
|
|
2923
2821
|
"trace_as_metadata": true,
|
|
2924
|
-
"type": "
|
|
2925
|
-
"value": true
|
|
2926
|
-
},
|
|
2927
|
-
"code": {
|
|
2928
|
-
"advanced": true,
|
|
2929
|
-
"dynamic": true,
|
|
2930
|
-
"fileTypes": [],
|
|
2931
|
-
"file_path": "",
|
|
2932
|
-
"info": "",
|
|
2933
|
-
"list": false,
|
|
2934
|
-
"load_from_db": false,
|
|
2935
|
-
"multiline": true,
|
|
2936
|
-
"name": "code",
|
|
2937
|
-
"password": false,
|
|
2938
|
-
"placeholder": "",
|
|
2939
|
-
"required": true,
|
|
2940
|
-
"show": true,
|
|
2941
|
-
"title_case": false,
|
|
2942
|
-
"type": "code",
|
|
2943
|
-
"value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n '<a class=\"underline\" href=\"https://astra.datastax.com/\" target=\" _blank\" '\n 'rel=\"noopener noreferrer\">your database in Astra DB</a>'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f'<a class=\"underline\" target=\"_blank\" rel=\"noopener noreferrer\" '\n f'href=\"https://astra.datastax.com/org/{org_id}/database/{db_id}/data-explorer?createCollection=1&namespace={keyspace}\">'\n \"your database in Astra DB</a>.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n"
|
|
2944
|
-
},
|
|
2945
|
-
"collection_name": {
|
|
2946
|
-
"_input_type": "DropdownInput",
|
|
2947
|
-
"advanced": false,
|
|
2948
|
-
"combobox": true,
|
|
2949
|
-
"dialog_inputs": {
|
|
2950
|
-
"fields": {
|
|
2951
|
-
"data": {
|
|
2952
|
-
"node": {
|
|
2953
|
-
"description": "Please allow several seconds for creation to complete.",
|
|
2954
|
-
"display_name": "Create new collection",
|
|
2955
|
-
"field_order": [
|
|
2956
|
-
"01_new_collection_name",
|
|
2957
|
-
"02_embedding_generation_provider",
|
|
2958
|
-
"03_embedding_generation_model",
|
|
2959
|
-
"04_dimension"
|
|
2960
|
-
],
|
|
2961
|
-
"name": "create_collection",
|
|
2962
|
-
"template": {
|
|
2963
|
-
"01_new_collection_name": {
|
|
2964
|
-
"_input_type": "StrInput",
|
|
2965
|
-
"advanced": false,
|
|
2966
|
-
"display_name": "Name",
|
|
2967
|
-
"dynamic": false,
|
|
2968
|
-
"info": "Name of the new collection to create in Astra DB.",
|
|
2969
|
-
"list": false,
|
|
2970
|
-
"list_add_label": "Add More",
|
|
2971
|
-
"load_from_db": false,
|
|
2972
|
-
"name": "new_collection_name",
|
|
2973
|
-
"placeholder": "",
|
|
2974
|
-
"required": true,
|
|
2975
|
-
"show": true,
|
|
2976
|
-
"title_case": false,
|
|
2977
|
-
"tool_mode": false,
|
|
2978
|
-
"trace_as_metadata": true,
|
|
2979
|
-
"type": "str",
|
|
2980
|
-
"value": ""
|
|
2981
|
-
},
|
|
2982
|
-
"02_embedding_generation_provider": {
|
|
2983
|
-
"_input_type": "DropdownInput",
|
|
2984
|
-
"advanced": false,
|
|
2985
|
-
"combobox": false,
|
|
2986
|
-
"dialog_inputs": {},
|
|
2987
|
-
"display_name": "Embedding generation method",
|
|
2988
|
-
"dynamic": false,
|
|
2989
|
-
"helper_text": "To create collections with more embedding provider options, go to <a class=\"underline\" href=\"https://astra.datastax.com/\" target=\" _blank\" rel=\"noopener noreferrer\">your database in Astra DB</a>",
|
|
2990
|
-
"info": "Provider to use for generating embeddings.",
|
|
2991
|
-
"name": "embedding_generation_provider",
|
|
2992
|
-
"options": [],
|
|
2993
|
-
"options_metadata": [],
|
|
2994
|
-
"placeholder": "",
|
|
2995
|
-
"real_time_refresh": true,
|
|
2996
|
-
"required": true,
|
|
2997
|
-
"show": true,
|
|
2998
|
-
"title_case": false,
|
|
2999
|
-
"tool_mode": false,
|
|
3000
|
-
"trace_as_metadata": true,
|
|
3001
|
-
"type": "str",
|
|
3002
|
-
"value": ""
|
|
3003
|
-
},
|
|
3004
|
-
"03_embedding_generation_model": {
|
|
3005
|
-
"_input_type": "DropdownInput",
|
|
3006
|
-
"advanced": false,
|
|
3007
|
-
"combobox": false,
|
|
3008
|
-
"dialog_inputs": {},
|
|
3009
|
-
"display_name": "Embedding model",
|
|
3010
|
-
"dynamic": false,
|
|
3011
|
-
"info": "Model to use for generating embeddings.",
|
|
3012
|
-
"name": "embedding_generation_model",
|
|
3013
|
-
"options": [],
|
|
3014
|
-
"options_metadata": [],
|
|
3015
|
-
"placeholder": "",
|
|
3016
|
-
"real_time_refresh": true,
|
|
3017
|
-
"required": false,
|
|
3018
|
-
"show": true,
|
|
3019
|
-
"title_case": false,
|
|
3020
|
-
"tool_mode": false,
|
|
3021
|
-
"trace_as_metadata": true,
|
|
3022
|
-
"type": "str",
|
|
3023
|
-
"value": ""
|
|
3024
|
-
},
|
|
3025
|
-
"04_dimension": {
|
|
3026
|
-
"_input_type": "IntInput",
|
|
3027
|
-
"advanced": false,
|
|
3028
|
-
"display_name": "Dimensions",
|
|
3029
|
-
"dynamic": false,
|
|
3030
|
-
"info": "Dimensions of the embeddings to generate.",
|
|
3031
|
-
"list": false,
|
|
3032
|
-
"list_add_label": "Add More",
|
|
3033
|
-
"name": "dimension",
|
|
3034
|
-
"placeholder": "",
|
|
3035
|
-
"required": false,
|
|
3036
|
-
"show": true,
|
|
3037
|
-
"title_case": false,
|
|
3038
|
-
"tool_mode": false,
|
|
3039
|
-
"trace_as_metadata": true,
|
|
3040
|
-
"type": "int"
|
|
3041
|
-
}
|
|
3042
|
-
}
|
|
3043
|
-
}
|
|
3044
|
-
}
|
|
3045
|
-
},
|
|
3046
|
-
"functionality": "create"
|
|
3047
|
-
},
|
|
3048
|
-
"display_name": "Collection",
|
|
3049
|
-
"dynamic": false,
|
|
3050
|
-
"info": "The name of the collection within Astra DB where the vectors will be stored.",
|
|
3051
|
-
"name": "collection_name",
|
|
3052
|
-
"options": [],
|
|
3053
|
-
"options_metadata": [],
|
|
3054
|
-
"placeholder": "",
|
|
3055
|
-
"real_time_refresh": true,
|
|
3056
|
-
"refresh_button": true,
|
|
3057
|
-
"required": true,
|
|
3058
|
-
"show": false,
|
|
3059
|
-
"title_case": false,
|
|
3060
|
-
"tool_mode": false,
|
|
3061
|
-
"trace_as_metadata": true,
|
|
3062
|
-
"type": "str",
|
|
3063
|
-
"value": ""
|
|
3064
|
-
},
|
|
3065
|
-
"content_field": {
|
|
3066
|
-
"_input_type": "StrInput",
|
|
3067
|
-
"advanced": true,
|
|
3068
|
-
"display_name": "Content Field",
|
|
3069
|
-
"dynamic": false,
|
|
3070
|
-
"info": "Field to use as the text content field for the vector store.",
|
|
3071
|
-
"list": false,
|
|
3072
|
-
"list_add_label": "Add More",
|
|
3073
|
-
"load_from_db": false,
|
|
3074
|
-
"name": "content_field",
|
|
3075
|
-
"placeholder": "",
|
|
3076
|
-
"required": false,
|
|
3077
|
-
"show": true,
|
|
3078
|
-
"title_case": false,
|
|
3079
|
-
"tool_mode": false,
|
|
3080
|
-
"trace_as_metadata": true,
|
|
3081
|
-
"type": "str",
|
|
3082
|
-
"value": ""
|
|
3083
|
-
},
|
|
3084
|
-
"database_name": {
|
|
3085
|
-
"_input_type": "DropdownInput",
|
|
3086
|
-
"advanced": false,
|
|
3087
|
-
"combobox": true,
|
|
3088
|
-
"dialog_inputs": {
|
|
3089
|
-
"fields": {
|
|
3090
|
-
"data": {
|
|
3091
|
-
"node": {
|
|
3092
|
-
"description": "Please allow several minutes for creation to complete.",
|
|
3093
|
-
"display_name": "Create new database",
|
|
3094
|
-
"field_order": [
|
|
3095
|
-
"01_new_database_name",
|
|
3096
|
-
"02_cloud_provider",
|
|
3097
|
-
"03_region"
|
|
3098
|
-
],
|
|
3099
|
-
"name": "create_database",
|
|
3100
|
-
"template": {
|
|
3101
|
-
"01_new_database_name": {
|
|
3102
|
-
"_input_type": "StrInput",
|
|
3103
|
-
"advanced": false,
|
|
3104
|
-
"display_name": "Name",
|
|
3105
|
-
"dynamic": false,
|
|
3106
|
-
"info": "Name of the new database to create in Astra DB.",
|
|
3107
|
-
"list": false,
|
|
3108
|
-
"list_add_label": "Add More",
|
|
3109
|
-
"load_from_db": false,
|
|
3110
|
-
"name": "new_database_name",
|
|
3111
|
-
"placeholder": "",
|
|
3112
|
-
"required": true,
|
|
3113
|
-
"show": true,
|
|
3114
|
-
"title_case": false,
|
|
3115
|
-
"tool_mode": false,
|
|
3116
|
-
"trace_as_metadata": true,
|
|
3117
|
-
"type": "str",
|
|
3118
|
-
"value": ""
|
|
3119
|
-
},
|
|
3120
|
-
"02_cloud_provider": {
|
|
3121
|
-
"_input_type": "DropdownInput",
|
|
3122
|
-
"advanced": false,
|
|
3123
|
-
"combobox": false,
|
|
3124
|
-
"dialog_inputs": {},
|
|
3125
|
-
"display_name": "Cloud provider",
|
|
3126
|
-
"dynamic": false,
|
|
3127
|
-
"info": "Cloud provider for the new database.",
|
|
3128
|
-
"name": "cloud_provider",
|
|
3129
|
-
"options": [
|
|
3130
|
-
"Amazon Web Services",
|
|
3131
|
-
"Google Cloud Platform",
|
|
3132
|
-
"Microsoft Azure"
|
|
3133
|
-
],
|
|
3134
|
-
"options_metadata": [],
|
|
3135
|
-
"placeholder": "",
|
|
3136
|
-
"real_time_refresh": true,
|
|
3137
|
-
"required": true,
|
|
3138
|
-
"show": true,
|
|
3139
|
-
"title_case": false,
|
|
3140
|
-
"tool_mode": false,
|
|
3141
|
-
"trace_as_metadata": true,
|
|
3142
|
-
"type": "str",
|
|
3143
|
-
"value": ""
|
|
3144
|
-
},
|
|
3145
|
-
"03_region": {
|
|
3146
|
-
"_input_type": "DropdownInput",
|
|
3147
|
-
"advanced": false,
|
|
3148
|
-
"combobox": false,
|
|
3149
|
-
"dialog_inputs": {},
|
|
3150
|
-
"display_name": "Region",
|
|
3151
|
-
"dynamic": false,
|
|
3152
|
-
"info": "Region for the new database.",
|
|
3153
|
-
"name": "region",
|
|
3154
|
-
"options": [],
|
|
3155
|
-
"options_metadata": [],
|
|
3156
|
-
"placeholder": "",
|
|
3157
|
-
"required": true,
|
|
3158
|
-
"show": true,
|
|
3159
|
-
"title_case": false,
|
|
3160
|
-
"tool_mode": false,
|
|
3161
|
-
"trace_as_metadata": true,
|
|
3162
|
-
"type": "str",
|
|
3163
|
-
"value": ""
|
|
3164
|
-
}
|
|
3165
|
-
}
|
|
3166
|
-
}
|
|
3167
|
-
}
|
|
3168
|
-
},
|
|
3169
|
-
"functionality": "create"
|
|
3170
|
-
},
|
|
3171
|
-
"display_name": "Database",
|
|
3172
|
-
"dynamic": false,
|
|
3173
|
-
"info": "The Database name for the Astra DB instance.",
|
|
3174
|
-
"name": "database_name",
|
|
3175
|
-
"options": [],
|
|
3176
|
-
"options_metadata": [
|
|
3177
|
-
{
|
|
3178
|
-
"api_endpoint": "https://5b8bb22c-4a38-4f0a-865c-a18ed7590bd1-us-east-2.apps.astra.datastax.com",
|
|
3179
|
-
"collections": 5,
|
|
3180
|
-
"keyspaces": [
|
|
3181
|
-
"default_keyspace",
|
|
3182
|
-
"samples_dataflow"
|
|
3183
|
-
],
|
|
3184
|
-
"org_id": "260f986d-e65c-4f05-94a3-7cebfcb867a3",
|
|
3185
|
-
"status": null
|
|
3186
|
-
}
|
|
3187
|
-
],
|
|
3188
|
-
"placeholder": "",
|
|
3189
|
-
"real_time_refresh": true,
|
|
3190
|
-
"refresh_button": true,
|
|
3191
|
-
"required": true,
|
|
3192
|
-
"show": true,
|
|
3193
|
-
"title_case": false,
|
|
3194
|
-
"tool_mode": false,
|
|
3195
|
-
"trace_as_metadata": true,
|
|
3196
|
-
"type": "str",
|
|
2822
|
+
"type": "other",
|
|
3197
2823
|
"value": ""
|
|
3198
2824
|
},
|
|
3199
|
-
"
|
|
3200
|
-
"_input_type": "
|
|
2825
|
+
"ignore_unspecified_files": {
|
|
2826
|
+
"_input_type": "BoolInput",
|
|
3201
2827
|
"advanced": true,
|
|
3202
|
-
"display_name": "
|
|
3203
|
-
"dynamic": false,
|
|
3204
|
-
"info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.",
|
|
3205
|
-
"list": false,
|
|
3206
|
-
"list_add_label": "Add More",
|
|
3207
|
-
"load_from_db": false,
|
|
3208
|
-
"name": "deletion_field",
|
|
3209
|
-
"placeholder": "",
|
|
3210
|
-
"required": false,
|
|
3211
|
-
"show": true,
|
|
3212
|
-
"title_case": false,
|
|
3213
|
-
"tool_mode": false,
|
|
3214
|
-
"trace_as_metadata": true,
|
|
3215
|
-
"type": "str",
|
|
3216
|
-
"value": ""
|
|
3217
|
-
},
|
|
3218
|
-
"embedding_model": {
|
|
3219
|
-
"_input_type": "HandleInput",
|
|
3220
|
-
"advanced": false,
|
|
3221
|
-
"display_name": "Embedding Model",
|
|
2828
|
+
"display_name": "Ignore Unspecified Files",
|
|
3222
2829
|
"dynamic": false,
|
|
3223
|
-
"info": "
|
|
3224
|
-
"input_types": [
|
|
3225
|
-
"Embeddings"
|
|
3226
|
-
],
|
|
2830
|
+
"info": "If true, Data with no 'file_path' property will be ignored.",
|
|
3227
2831
|
"list": false,
|
|
3228
2832
|
"list_add_label": "Add More",
|
|
3229
|
-
"name": "
|
|
3230
|
-
"placeholder": "",
|
|
3231
|
-
"required": false,
|
|
3232
|
-
"show": true,
|
|
3233
|
-
"title_case": false,
|
|
3234
|
-
"trace_as_metadata": true,
|
|
3235
|
-
"type": "other",
|
|
3236
|
-
"value": ""
|
|
3237
|
-
},
|
|
3238
|
-
"environment": {
|
|
3239
|
-
"_input_type": "DropdownInput",
|
|
3240
|
-
"advanced": true,
|
|
3241
|
-
"combobox": true,
|
|
3242
|
-
"dialog_inputs": {},
|
|
3243
|
-
"display_name": "Environment",
|
|
3244
|
-
"dynamic": false,
|
|
3245
|
-
"info": "The environment for the Astra DB API Endpoint.",
|
|
3246
|
-
"name": "environment",
|
|
3247
|
-
"options": [
|
|
3248
|
-
"prod",
|
|
3249
|
-
"test",
|
|
3250
|
-
"dev"
|
|
3251
|
-
],
|
|
3252
|
-
"options_metadata": [],
|
|
2833
|
+
"name": "ignore_unspecified_files",
|
|
3253
2834
|
"placeholder": "",
|
|
3254
|
-
"real_time_refresh": true,
|
|
3255
2835
|
"required": false,
|
|
3256
2836
|
"show": true,
|
|
3257
2837
|
"title_case": false,
|
|
3258
2838
|
"tool_mode": false,
|
|
3259
2839
|
"trace_as_metadata": true,
|
|
3260
|
-
"type": "
|
|
3261
|
-
"value":
|
|
2840
|
+
"type": "bool",
|
|
2841
|
+
"value": false
|
|
3262
2842
|
},
|
|
3263
|
-
"
|
|
2843
|
+
"ignore_unsupported_extensions": {
|
|
3264
2844
|
"_input_type": "BoolInput",
|
|
3265
2845
|
"advanced": true,
|
|
3266
|
-
"display_name": "Ignore
|
|
2846
|
+
"display_name": "Ignore Unsupported Extensions",
|
|
3267
2847
|
"dynamic": false,
|
|
3268
|
-
"info": "
|
|
2848
|
+
"info": "If true, files with unsupported extensions will not be processed.",
|
|
3269
2849
|
"list": false,
|
|
3270
2850
|
"list_add_label": "Add More",
|
|
3271
|
-
"name": "
|
|
2851
|
+
"name": "ignore_unsupported_extensions",
|
|
3272
2852
|
"placeholder": "",
|
|
3273
2853
|
"required": false,
|
|
3274
2854
|
"show": true,
|
|
@@ -3276,210 +2856,339 @@
|
|
|
3276
2856
|
"tool_mode": false,
|
|
3277
2857
|
"trace_as_metadata": true,
|
|
3278
2858
|
"type": "bool",
|
|
3279
|
-
"value":
|
|
2859
|
+
"value": true
|
|
3280
2860
|
},
|
|
3281
|
-
"
|
|
3282
|
-
"_input_type": "
|
|
2861
|
+
"path": {
|
|
2862
|
+
"_input_type": "FileInput",
|
|
3283
2863
|
"advanced": false,
|
|
3284
|
-
"display_name": "
|
|
2864
|
+
"display_name": "Files",
|
|
3285
2865
|
"dynamic": false,
|
|
3286
|
-
"
|
|
3287
|
-
|
|
3288
|
-
"
|
|
3289
|
-
"
|
|
2866
|
+
"fileTypes": [
|
|
2867
|
+
"txt",
|
|
2868
|
+
"md",
|
|
2869
|
+
"mdx",
|
|
2870
|
+
"csv",
|
|
2871
|
+
"json",
|
|
2872
|
+
"yaml",
|
|
2873
|
+
"yml",
|
|
2874
|
+
"xml",
|
|
2875
|
+
"html",
|
|
2876
|
+
"htm",
|
|
2877
|
+
"pdf",
|
|
2878
|
+
"docx",
|
|
2879
|
+
"py",
|
|
2880
|
+
"sh",
|
|
2881
|
+
"sql",
|
|
2882
|
+
"js",
|
|
2883
|
+
"ts",
|
|
2884
|
+
"tsx",
|
|
2885
|
+
"zip",
|
|
2886
|
+
"tar",
|
|
2887
|
+
"tgz",
|
|
2888
|
+
"bz2",
|
|
2889
|
+
"gz"
|
|
3290
2890
|
],
|
|
2891
|
+
"file_path": [],
|
|
2892
|
+
"info": "Supported file extensions: txt, md, mdx, csv, json, yaml, yml, xml, html, htm, pdf, docx, py, sh, sql, js, ts, tsx; optionally bundled in file extensions: zip, tar, tgz, bz2, gz",
|
|
3291
2893
|
"list": true,
|
|
3292
2894
|
"list_add_label": "Add More",
|
|
3293
|
-
"name": "
|
|
2895
|
+
"name": "path",
|
|
3294
2896
|
"placeholder": "",
|
|
2897
|
+
"real_time_refresh": true,
|
|
3295
2898
|
"required": false,
|
|
3296
2899
|
"show": true,
|
|
2900
|
+
"temp_file": false,
|
|
3297
2901
|
"title_case": false,
|
|
3298
2902
|
"trace_as_metadata": true,
|
|
3299
|
-
"type": "
|
|
2903
|
+
"type": "file",
|
|
3300
2904
|
"value": ""
|
|
3301
2905
|
},
|
|
3302
|
-
"
|
|
3303
|
-
"_input_type": "
|
|
2906
|
+
"separator": {
|
|
2907
|
+
"_input_type": "StrInput",
|
|
3304
2908
|
"advanced": true,
|
|
3305
|
-
"
|
|
3306
|
-
"dialog_inputs": {},
|
|
3307
|
-
"display_name": "Keyspace",
|
|
2909
|
+
"display_name": "Separator",
|
|
3308
2910
|
"dynamic": false,
|
|
3309
|
-
"info": "
|
|
3310
|
-
"
|
|
3311
|
-
"
|
|
3312
|
-
"
|
|
2911
|
+
"info": "Specify the separator to use between multiple outputs in Message format.",
|
|
2912
|
+
"list": false,
|
|
2913
|
+
"list_add_label": "Add More",
|
|
2914
|
+
"load_from_db": false,
|
|
2915
|
+
"name": "separator",
|
|
3313
2916
|
"placeholder": "",
|
|
3314
|
-
"real_time_refresh": true,
|
|
3315
2917
|
"required": false,
|
|
3316
2918
|
"show": true,
|
|
3317
2919
|
"title_case": false,
|
|
3318
2920
|
"tool_mode": false,
|
|
3319
2921
|
"trace_as_metadata": true,
|
|
3320
2922
|
"type": "str",
|
|
3321
|
-
"value": ""
|
|
2923
|
+
"value": "\n\n"
|
|
3322
2924
|
},
|
|
3323
|
-
"
|
|
3324
|
-
"_input_type": "
|
|
2925
|
+
"silent_errors": {
|
|
2926
|
+
"_input_type": "BoolInput",
|
|
3325
2927
|
"advanced": true,
|
|
3326
|
-
"display_name": "
|
|
3327
|
-
"dynamic": false,
|
|
3328
|
-
"info": "
|
|
3329
|
-
"input_types": [
|
|
3330
|
-
"Message"
|
|
3331
|
-
],
|
|
2928
|
+
"display_name": "Silent Errors",
|
|
2929
|
+
"dynamic": false,
|
|
2930
|
+
"info": "If true, errors will not raise an exception.",
|
|
3332
2931
|
"list": false,
|
|
3333
2932
|
"list_add_label": "Add More",
|
|
3334
|
-
"
|
|
3335
|
-
"
|
|
3336
|
-
"placeholder": "Enter terms to search...",
|
|
2933
|
+
"name": "silent_errors",
|
|
2934
|
+
"placeholder": "",
|
|
3337
2935
|
"required": false,
|
|
3338
|
-
"
|
|
3339
|
-
"show": false,
|
|
2936
|
+
"show": true,
|
|
3340
2937
|
"title_case": false,
|
|
3341
2938
|
"tool_mode": false,
|
|
3342
|
-
"trace_as_input": true,
|
|
3343
2939
|
"trace_as_metadata": true,
|
|
3344
|
-
"type": "
|
|
3345
|
-
"value":
|
|
2940
|
+
"type": "bool",
|
|
2941
|
+
"value": false
|
|
3346
2942
|
},
|
|
3347
|
-
"
|
|
3348
|
-
"_input_type": "
|
|
2943
|
+
"use_multithreading": {
|
|
2944
|
+
"_input_type": "BoolInput",
|
|
3349
2945
|
"advanced": true,
|
|
3350
|
-
"display_name": "
|
|
2946
|
+
"display_name": "[Deprecated] Use Multithreading",
|
|
3351
2947
|
"dynamic": false,
|
|
3352
|
-
"info": "
|
|
2948
|
+
"info": "Set 'Processing Concurrency' greater than 1 to enable multithreading.",
|
|
3353
2949
|
"list": false,
|
|
3354
2950
|
"list_add_label": "Add More",
|
|
3355
|
-
"name": "
|
|
2951
|
+
"name": "use_multithreading",
|
|
3356
2952
|
"placeholder": "",
|
|
3357
2953
|
"required": false,
|
|
3358
2954
|
"show": true,
|
|
3359
2955
|
"title_case": false,
|
|
3360
2956
|
"tool_mode": false,
|
|
3361
2957
|
"trace_as_metadata": true,
|
|
3362
|
-
"type": "
|
|
3363
|
-
"value":
|
|
2958
|
+
"type": "bool",
|
|
2959
|
+
"value": true
|
|
2960
|
+
}
|
|
2961
|
+
},
|
|
2962
|
+
"tool_mode": false
|
|
2963
|
+
},
|
|
2964
|
+
"showNode": true,
|
|
2965
|
+
"type": "File"
|
|
2966
|
+
},
|
|
2967
|
+
"dragging": false,
|
|
2968
|
+
"id": "File-wqFzl",
|
|
2969
|
+
"measured": {
|
|
2970
|
+
"height": 230,
|
|
2971
|
+
"width": 320
|
|
2972
|
+
},
|
|
2973
|
+
"position": {
|
|
2974
|
+
"x": 1330.7650978046952,
|
|
2975
|
+
"y": 1431.5905495627503
|
|
2976
|
+
},
|
|
2977
|
+
"selected": false,
|
|
2978
|
+
"type": "genericNode"
|
|
2979
|
+
},
|
|
2980
|
+
{
|
|
2981
|
+
"data": {
|
|
2982
|
+
"id": "LanguageModelComponent-nQYc0",
|
|
2983
|
+
"node": {
|
|
2984
|
+
"base_classes": [
|
|
2985
|
+
"LanguageModel",
|
|
2986
|
+
"Message"
|
|
2987
|
+
],
|
|
2988
|
+
"beta": false,
|
|
2989
|
+
"conditional_paths": [],
|
|
2990
|
+
"custom_fields": {},
|
|
2991
|
+
"description": "Runs a language model given a specified provider. ",
|
|
2992
|
+
"display_name": "Language Model",
|
|
2993
|
+
"documentation": "",
|
|
2994
|
+
"edited": false,
|
|
2995
|
+
"field_order": [
|
|
2996
|
+
"provider",
|
|
2997
|
+
"model_name",
|
|
2998
|
+
"api_key",
|
|
2999
|
+
"input_value",
|
|
3000
|
+
"system_message",
|
|
3001
|
+
"stream",
|
|
3002
|
+
"temperature"
|
|
3003
|
+
],
|
|
3004
|
+
"frozen": false,
|
|
3005
|
+
"icon": "brain-circuit",
|
|
3006
|
+
"last_updated": "2025-08-27T14:19:16.085Z",
|
|
3007
|
+
"legacy": false,
|
|
3008
|
+
"metadata": {
|
|
3009
|
+
"keywords": [
|
|
3010
|
+
"model",
|
|
3011
|
+
"llm",
|
|
3012
|
+
"language model",
|
|
3013
|
+
"large language model"
|
|
3014
|
+
]
|
|
3015
|
+
},
|
|
3016
|
+
"minimized": false,
|
|
3017
|
+
"output_types": [],
|
|
3018
|
+
"outputs": [
|
|
3019
|
+
{
|
|
3020
|
+
"allows_loop": false,
|
|
3021
|
+
"cache": true,
|
|
3022
|
+
"display_name": "Model Response",
|
|
3023
|
+
"group_outputs": false,
|
|
3024
|
+
"method": "text_response",
|
|
3025
|
+
"name": "text_output",
|
|
3026
|
+
"options": null,
|
|
3027
|
+
"required_inputs": null,
|
|
3028
|
+
"selected": "Message",
|
|
3029
|
+
"tool_mode": true,
|
|
3030
|
+
"types": [
|
|
3031
|
+
"Message"
|
|
3032
|
+
],
|
|
3033
|
+
"value": "__UNDEFINED__"
|
|
3364
3034
|
},
|
|
3365
|
-
|
|
3366
|
-
"
|
|
3035
|
+
{
|
|
3036
|
+
"allows_loop": false,
|
|
3037
|
+
"cache": true,
|
|
3038
|
+
"display_name": "Language Model",
|
|
3039
|
+
"group_outputs": false,
|
|
3040
|
+
"method": "build_model",
|
|
3041
|
+
"name": "model_output",
|
|
3042
|
+
"options": null,
|
|
3043
|
+
"required_inputs": null,
|
|
3044
|
+
"selected": "LanguageModel",
|
|
3045
|
+
"tool_mode": true,
|
|
3046
|
+
"types": [
|
|
3047
|
+
"LanguageModel"
|
|
3048
|
+
],
|
|
3049
|
+
"value": "__UNDEFINED__"
|
|
3050
|
+
}
|
|
3051
|
+
],
|
|
3052
|
+
"pinned": false,
|
|
3053
|
+
"priority": 0,
|
|
3054
|
+
"template": {
|
|
3055
|
+
"_type": "Component",
|
|
3056
|
+
"api_key": {
|
|
3057
|
+
"_input_type": "SecretStrInput",
|
|
3367
3058
|
"advanced": false,
|
|
3368
|
-
"
|
|
3369
|
-
"dialog_inputs": {},
|
|
3370
|
-
"display_name": "Reranker",
|
|
3059
|
+
"display_name": "OpenAI API Key",
|
|
3371
3060
|
"dynamic": false,
|
|
3372
|
-
"info": "
|
|
3373
|
-
"
|
|
3374
|
-
"
|
|
3375
|
-
"
|
|
3061
|
+
"info": "Model Provider API key",
|
|
3062
|
+
"input_types": [],
|
|
3063
|
+
"load_from_db": true,
|
|
3064
|
+
"name": "api_key",
|
|
3065
|
+
"password": true,
|
|
3376
3066
|
"placeholder": "",
|
|
3067
|
+
"real_time_refresh": true,
|
|
3377
3068
|
"required": false,
|
|
3378
|
-
"show":
|
|
3069
|
+
"show": true,
|
|
3379
3070
|
"title_case": false,
|
|
3380
|
-
"tool_mode": false,
|
|
3381
|
-
"trace_as_metadata": true,
|
|
3382
3071
|
"type": "str",
|
|
3383
|
-
"value": ""
|
|
3072
|
+
"value": "OPENAI_API_KEY"
|
|
3384
3073
|
},
|
|
3385
|
-
"
|
|
3386
|
-
"_input_type": "DropdownInput",
|
|
3074
|
+
"code": {
|
|
3387
3075
|
"advanced": true,
|
|
3388
|
-
"
|
|
3389
|
-
"
|
|
3390
|
-
"
|
|
3391
|
-
"
|
|
3392
|
-
"
|
|
3393
|
-
"
|
|
3394
|
-
"
|
|
3395
|
-
|
|
3396
|
-
|
|
3397
|
-
],
|
|
3398
|
-
"options_metadata": [],
|
|
3076
|
+
"dynamic": true,
|
|
3077
|
+
"fileTypes": [],
|
|
3078
|
+
"file_path": "",
|
|
3079
|
+
"info": "",
|
|
3080
|
+
"list": false,
|
|
3081
|
+
"load_from_db": false,
|
|
3082
|
+
"multiline": true,
|
|
3083
|
+
"name": "code",
|
|
3084
|
+
"password": false,
|
|
3399
3085
|
"placeholder": "",
|
|
3400
|
-
"
|
|
3401
|
-
"required": false,
|
|
3086
|
+
"required": true,
|
|
3402
3087
|
"show": true,
|
|
3403
3088
|
"title_case": false,
|
|
3404
|
-
"
|
|
3405
|
-
"
|
|
3406
|
-
"type": "str",
|
|
3407
|
-
"value": "Vector Search"
|
|
3089
|
+
"type": "code",
|
|
3090
|
+
"value": "from typing import Any\n\nfrom langchain_anthropic import ChatAnthropic\nfrom langchain_google_genai import ChatGoogleGenerativeAI\nfrom langchain_openai import ChatOpenAI\n\nfrom langflow.base.models.anthropic_constants import ANTHROPIC_MODELS\nfrom langflow.base.models.google_generative_ai_constants import GOOGLE_GENERATIVE_AI_MODELS\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_CHAT_MODEL_NAMES, OPENAI_REASONING_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageInput, MultilineInput, SecretStrInput, SliderInput\nfrom langflow.schema.dotdict import dotdict\n\n\nclass LanguageModelComponent(LCModelComponent):\n display_name = \"Language Model\"\n description = \"Runs a language model given a specified provider.\"\n documentation: str = \"https://docs.langflow.org/components-models\"\n icon = \"brain-circuit\"\n category = \"models\"\n priority = 0 # Set priority to 0 to make it appear first\n\n inputs = [\n DropdownInput(\n name=\"provider\",\n display_name=\"Model Provider\",\n options=[\"OpenAI\", \"Anthropic\", \"Google\"],\n value=\"OpenAI\",\n info=\"Select the model provider\",\n real_time_refresh=True,\n options_metadata=[{\"icon\": \"OpenAI\"}, {\"icon\": \"Anthropic\"}, {\"icon\": \"GoogleGenerativeAI\"}],\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES,\n value=OPENAI_CHAT_MODEL_NAMES[0],\n info=\"Select the model to use\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"Model Provider API key\",\n required=False,\n show=True,\n real_time_refresh=True,\n ),\n MessageInput(\n name=\"input_value\",\n display_name=\"Input\",\n info=\"The input text to send to the model\",\n ),\n MultilineInput(\n name=\"system_message\",\n display_name=\"System Message\",\n info=\"A system message that helps set the behavior of the assistant\",\n advanced=False,\n ),\n BoolInput(\n name=\"stream\",\n display_name=\"Stream\",\n info=\"Whether to stream the response\",\n value=False,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n info=\"Controls randomness in responses\",\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n ]\n\n def build_model(self) -> LanguageModel:\n provider = self.provider\n model_name = self.model_name\n temperature = self.temperature\n stream = self.stream\n\n if provider == \"OpenAI\":\n if not self.api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n\n if model_name in OPENAI_REASONING_MODEL_NAMES:\n # reasoning models do not support temperature (yet)\n temperature = None\n\n return ChatOpenAI(\n model_name=model_name,\n temperature=temperature,\n streaming=stream,\n openai_api_key=self.api_key,\n )\n if provider == \"Anthropic\":\n if not self.api_key:\n msg = \"Anthropic API key is required when using Anthropic provider\"\n raise ValueError(msg)\n return ChatAnthropic(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n anthropic_api_key=self.api_key,\n )\n if provider == \"Google\":\n if not self.api_key:\n msg = \"Google API key is required when using Google provider\"\n raise ValueError(msg)\n return ChatGoogleGenerativeAI(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n google_api_key=self.api_key,\n )\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n if field_name == \"provider\":\n if field_value == \"OpenAI\":\n build_config[\"model_name\"][\"options\"] = OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES\n build_config[\"model_name\"][\"value\"] = OPENAI_CHAT_MODEL_NAMES[0]\n build_config[\"api_key\"][\"display_name\"] = \"OpenAI API Key\"\n elif field_value == \"Anthropic\":\n build_config[\"model_name\"][\"options\"] = ANTHROPIC_MODELS\n build_config[\"model_name\"][\"value\"] = ANTHROPIC_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Anthropic API Key\"\n elif field_value == \"Google\":\n build_config[\"model_name\"][\"options\"] = GOOGLE_GENERATIVE_AI_MODELS\n build_config[\"model_name\"][\"value\"] = GOOGLE_GENERATIVE_AI_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Google API Key\"\n elif field_name == \"model_name\" and field_value.startswith(\"o1\") and self.provider == \"OpenAI\":\n # Hide system_message for o1 models - currently unsupported\n if \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = False\n elif field_name == \"model_name\" and not field_value.startswith(\"o1\") and \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = True\n return build_config\n"
|
|
3408
3091
|
},
|
|
3409
|
-
"
|
|
3410
|
-
"_input_type": "
|
|
3092
|
+
"input_value": {
|
|
3093
|
+
"_input_type": "MessageInput",
|
|
3411
3094
|
"advanced": false,
|
|
3412
|
-
"display_name": "
|
|
3095
|
+
"display_name": "Input",
|
|
3413
3096
|
"dynamic": false,
|
|
3414
|
-
"info": "
|
|
3097
|
+
"info": "The input text to send to the model",
|
|
3415
3098
|
"input_types": [
|
|
3416
3099
|
"Message"
|
|
3417
3100
|
],
|
|
3418
3101
|
"list": false,
|
|
3419
3102
|
"list_add_label": "Add More",
|
|
3420
3103
|
"load_from_db": false,
|
|
3421
|
-
"name": "
|
|
3422
|
-
"placeholder": "
|
|
3104
|
+
"name": "input_value",
|
|
3105
|
+
"placeholder": "",
|
|
3423
3106
|
"required": false,
|
|
3424
3107
|
"show": true,
|
|
3425
3108
|
"title_case": false,
|
|
3426
|
-
"tool_mode":
|
|
3109
|
+
"tool_mode": false,
|
|
3427
3110
|
"trace_as_input": true,
|
|
3428
3111
|
"trace_as_metadata": true,
|
|
3429
|
-
"type": "
|
|
3112
|
+
"type": "str",
|
|
3430
3113
|
"value": ""
|
|
3431
3114
|
},
|
|
3432
|
-
"
|
|
3433
|
-
"_input_type": "
|
|
3434
|
-
"advanced":
|
|
3435
|
-
"
|
|
3115
|
+
"model_name": {
|
|
3116
|
+
"_input_type": "DropdownInput",
|
|
3117
|
+
"advanced": false,
|
|
3118
|
+
"combobox": false,
|
|
3119
|
+
"dialog_inputs": {},
|
|
3120
|
+
"display_name": "Model Name",
|
|
3436
3121
|
"dynamic": false,
|
|
3437
|
-
"info": "
|
|
3438
|
-
"
|
|
3439
|
-
"
|
|
3440
|
-
|
|
3122
|
+
"info": "Select the model to use",
|
|
3123
|
+
"name": "model_name",
|
|
3124
|
+
"options": [
|
|
3125
|
+
"gpt-4o-mini",
|
|
3126
|
+
"gpt-4o",
|
|
3127
|
+
"gpt-4.1",
|
|
3128
|
+
"gpt-4.1-mini",
|
|
3129
|
+
"gpt-4.1-nano",
|
|
3130
|
+
"gpt-4.5-preview",
|
|
3131
|
+
"gpt-4-turbo",
|
|
3132
|
+
"gpt-4-turbo-preview",
|
|
3133
|
+
"gpt-4",
|
|
3134
|
+
"gpt-3.5-turbo"
|
|
3135
|
+
],
|
|
3136
|
+
"options_metadata": [],
|
|
3441
3137
|
"placeholder": "",
|
|
3442
3138
|
"required": false,
|
|
3443
3139
|
"show": true,
|
|
3444
3140
|
"title_case": false,
|
|
3141
|
+
"toggle": false,
|
|
3445
3142
|
"tool_mode": false,
|
|
3446
3143
|
"trace_as_metadata": true,
|
|
3447
|
-
"type": "
|
|
3448
|
-
"value":
|
|
3144
|
+
"type": "str",
|
|
3145
|
+
"value": "gpt-4o-mini"
|
|
3449
3146
|
},
|
|
3450
|
-
"
|
|
3147
|
+
"provider": {
|
|
3451
3148
|
"_input_type": "DropdownInput",
|
|
3452
|
-
"advanced":
|
|
3149
|
+
"advanced": false,
|
|
3453
3150
|
"combobox": false,
|
|
3454
3151
|
"dialog_inputs": {},
|
|
3455
|
-
"display_name": "
|
|
3152
|
+
"display_name": "Model Provider",
|
|
3456
3153
|
"dynamic": false,
|
|
3457
|
-
"info": "
|
|
3458
|
-
"name": "
|
|
3154
|
+
"info": "Select the model provider",
|
|
3155
|
+
"name": "provider",
|
|
3459
3156
|
"options": [
|
|
3460
|
-
"
|
|
3461
|
-
"
|
|
3462
|
-
"
|
|
3157
|
+
"OpenAI",
|
|
3158
|
+
"Anthropic",
|
|
3159
|
+
"Google"
|
|
3160
|
+
],
|
|
3161
|
+
"options_metadata": [
|
|
3162
|
+
{
|
|
3163
|
+
"icon": "OpenAI"
|
|
3164
|
+
},
|
|
3165
|
+
{
|
|
3166
|
+
"icon": "Anthropic"
|
|
3167
|
+
},
|
|
3168
|
+
{
|
|
3169
|
+
"icon": "GoogleGenerativeAI"
|
|
3170
|
+
}
|
|
3463
3171
|
],
|
|
3464
|
-
"options_metadata": [],
|
|
3465
3172
|
"placeholder": "",
|
|
3173
|
+
"real_time_refresh": true,
|
|
3466
3174
|
"required": false,
|
|
3467
3175
|
"show": true,
|
|
3468
3176
|
"title_case": false,
|
|
3177
|
+
"toggle": false,
|
|
3469
3178
|
"tool_mode": false,
|
|
3470
3179
|
"trace_as_metadata": true,
|
|
3471
3180
|
"type": "str",
|
|
3472
|
-
"value": "
|
|
3181
|
+
"value": "OpenAI"
|
|
3473
3182
|
},
|
|
3474
|
-
"
|
|
3183
|
+
"stream": {
|
|
3475
3184
|
"_input_type": "BoolInput",
|
|
3476
3185
|
"advanced": true,
|
|
3477
|
-
"display_name": "
|
|
3186
|
+
"display_name": "Stream",
|
|
3478
3187
|
"dynamic": false,
|
|
3479
|
-
"info": "
|
|
3188
|
+
"info": "Whether to stream the response",
|
|
3480
3189
|
"list": false,
|
|
3481
3190
|
"list_add_label": "Add More",
|
|
3482
|
-
"name": "
|
|
3191
|
+
"name": "stream",
|
|
3483
3192
|
"placeholder": "",
|
|
3484
3193
|
"required": false,
|
|
3485
3194
|
"show": true,
|
|
@@ -3487,49 +3196,84 @@
|
|
|
3487
3196
|
"tool_mode": false,
|
|
3488
3197
|
"trace_as_metadata": true,
|
|
3489
3198
|
"type": "bool",
|
|
3490
|
-
"value":
|
|
3199
|
+
"value": false
|
|
3491
3200
|
},
|
|
3492
|
-
"
|
|
3493
|
-
"_input_type": "
|
|
3494
|
-
"advanced":
|
|
3495
|
-
"
|
|
3201
|
+
"system_message": {
|
|
3202
|
+
"_input_type": "MultilineInput",
|
|
3203
|
+
"advanced": true,
|
|
3204
|
+
"copy_field": false,
|
|
3205
|
+
"display_name": "System Message",
|
|
3496
3206
|
"dynamic": false,
|
|
3497
|
-
"info": "
|
|
3498
|
-
"input_types": [
|
|
3499
|
-
|
|
3500
|
-
|
|
3501
|
-
"
|
|
3207
|
+
"info": "A system message that helps set the behavior of the assistant",
|
|
3208
|
+
"input_types": [
|
|
3209
|
+
"Message"
|
|
3210
|
+
],
|
|
3211
|
+
"list": false,
|
|
3212
|
+
"list_add_label": "Add More",
|
|
3213
|
+
"load_from_db": false,
|
|
3214
|
+
"multiline": true,
|
|
3215
|
+
"name": "system_message",
|
|
3502
3216
|
"placeholder": "",
|
|
3503
|
-
"
|
|
3504
|
-
"required": true,
|
|
3217
|
+
"required": false,
|
|
3505
3218
|
"show": true,
|
|
3506
3219
|
"title_case": false,
|
|
3220
|
+
"tool_mode": false,
|
|
3221
|
+
"trace_as_input": true,
|
|
3222
|
+
"trace_as_metadata": true,
|
|
3507
3223
|
"type": "str",
|
|
3508
|
-
"value": "
|
|
3224
|
+
"value": ""
|
|
3225
|
+
},
|
|
3226
|
+
"temperature": {
|
|
3227
|
+
"_input_type": "SliderInput",
|
|
3228
|
+
"advanced": true,
|
|
3229
|
+
"display_name": "Temperature",
|
|
3230
|
+
"dynamic": false,
|
|
3231
|
+
"info": "Controls randomness in responses",
|
|
3232
|
+
"max_label": "",
|
|
3233
|
+
"max_label_icon": "",
|
|
3234
|
+
"min_label": "",
|
|
3235
|
+
"min_label_icon": "",
|
|
3236
|
+
"name": "temperature",
|
|
3237
|
+
"placeholder": "",
|
|
3238
|
+
"range_spec": {
|
|
3239
|
+
"max": 1,
|
|
3240
|
+
"min": 0,
|
|
3241
|
+
"step": 0.01,
|
|
3242
|
+
"step_type": "float"
|
|
3243
|
+
},
|
|
3244
|
+
"required": false,
|
|
3245
|
+
"show": true,
|
|
3246
|
+
"slider_buttons": false,
|
|
3247
|
+
"slider_buttons_options": [],
|
|
3248
|
+
"slider_input": false,
|
|
3249
|
+
"title_case": false,
|
|
3250
|
+
"tool_mode": false,
|
|
3251
|
+
"type": "slider",
|
|
3252
|
+
"value": 0.1
|
|
3509
3253
|
}
|
|
3510
3254
|
},
|
|
3511
3255
|
"tool_mode": false
|
|
3512
3256
|
},
|
|
3513
|
-
"selected_output": "
|
|
3257
|
+
"selected_output": "text_output",
|
|
3514
3258
|
"showNode": true,
|
|
3515
|
-
"type": "
|
|
3259
|
+
"type": "LanguageModelComponent"
|
|
3516
3260
|
},
|
|
3517
3261
|
"dragging": false,
|
|
3518
|
-
"id": "
|
|
3262
|
+
"id": "LanguageModelComponent-nQYc0",
|
|
3519
3263
|
"measured": {
|
|
3520
|
-
"height":
|
|
3264
|
+
"height": 451,
|
|
3521
3265
|
"width": 320
|
|
3522
3266
|
},
|
|
3523
3267
|
"position": {
|
|
3524
|
-
"x":
|
|
3525
|
-
"y":
|
|
3268
|
+
"x": 2354.7612483129965,
|
|
3269
|
+
"y": 633.8261067248878
|
|
3526
3270
|
},
|
|
3527
3271
|
"selected": false,
|
|
3528
3272
|
"type": "genericNode"
|
|
3529
3273
|
},
|
|
3530
3274
|
{
|
|
3531
3275
|
"data": {
|
|
3532
|
-
"id": "AstraDB-
|
|
3276
|
+
"id": "AstraDB-t8lcj",
|
|
3533
3277
|
"node": {
|
|
3534
3278
|
"base_classes": [
|
|
3535
3279
|
"Data",
|
|
@@ -3569,6 +3313,7 @@
|
|
|
3569
3313
|
],
|
|
3570
3314
|
"frozen": false,
|
|
3571
3315
|
"icon": "AstraDB",
|
|
3316
|
+
"last_updated": "2025-08-27T14:19:16.085Z",
|
|
3572
3317
|
"legacy": false,
|
|
3573
3318
|
"metadata": {
|
|
3574
3319
|
"code_hash": "23fbe9daca09",
|
|
@@ -3593,7 +3338,7 @@
|
|
|
3593
3338
|
],
|
|
3594
3339
|
"total_dependencies": 4
|
|
3595
3340
|
},
|
|
3596
|
-
"module": "langflow.components.datastax.
|
|
3341
|
+
"module": "langflow.components.datastax.astradb_vectorstore.AstraDBVectorStoreComponent"
|
|
3597
3342
|
},
|
|
3598
3343
|
"minimized": false,
|
|
3599
3344
|
"output_types": [],
|
|
@@ -3665,19 +3410,21 @@
|
|
|
3665
3410
|
"value": {}
|
|
3666
3411
|
},
|
|
3667
3412
|
"api_endpoint": {
|
|
3668
|
-
"_input_type": "
|
|
3669
|
-
"advanced":
|
|
3413
|
+
"_input_type": "DropdownInput",
|
|
3414
|
+
"advanced": true,
|
|
3415
|
+
"combobox": false,
|
|
3416
|
+
"dialog_inputs": {},
|
|
3670
3417
|
"display_name": "Astra DB API Endpoint",
|
|
3671
3418
|
"dynamic": false,
|
|
3672
3419
|
"info": "The API Endpoint for the Astra DB instance. Supercedes database selection.",
|
|
3673
|
-
"list": false,
|
|
3674
|
-
"list_add_label": "Add More",
|
|
3675
|
-
"load_from_db": false,
|
|
3676
3420
|
"name": "api_endpoint",
|
|
3421
|
+
"options": [],
|
|
3422
|
+
"options_metadata": [],
|
|
3677
3423
|
"placeholder": "",
|
|
3678
3424
|
"required": false,
|
|
3679
|
-
"show":
|
|
3425
|
+
"show": true,
|
|
3680
3426
|
"title_case": false,
|
|
3427
|
+
"toggle": false,
|
|
3681
3428
|
"tool_mode": false,
|
|
3682
3429
|
"trace_as_metadata": true,
|
|
3683
3430
|
"type": "str",
|
|
@@ -3792,6 +3539,7 @@
|
|
|
3792
3539
|
"required": true,
|
|
3793
3540
|
"show": true,
|
|
3794
3541
|
"title_case": false,
|
|
3542
|
+
"toggle": false,
|
|
3795
3543
|
"tool_mode": false,
|
|
3796
3544
|
"trace_as_metadata": true,
|
|
3797
3545
|
"type": "str",
|
|
@@ -3813,6 +3561,7 @@
|
|
|
3813
3561
|
"required": false,
|
|
3814
3562
|
"show": true,
|
|
3815
3563
|
"title_case": false,
|
|
3564
|
+
"toggle": false,
|
|
3816
3565
|
"tool_mode": false,
|
|
3817
3566
|
"trace_as_metadata": true,
|
|
3818
3567
|
"type": "str",
|
|
@@ -3853,6 +3602,7 @@
|
|
|
3853
3602
|
"required": true,
|
|
3854
3603
|
"show": false,
|
|
3855
3604
|
"title_case": false,
|
|
3605
|
+
"toggle": false,
|
|
3856
3606
|
"tool_mode": false,
|
|
3857
3607
|
"trace_as_metadata": true,
|
|
3858
3608
|
"type": "str",
|
|
@@ -3933,6 +3683,7 @@
|
|
|
3933
3683
|
"required": true,
|
|
3934
3684
|
"show": true,
|
|
3935
3685
|
"title_case": false,
|
|
3686
|
+
"toggle": false,
|
|
3936
3687
|
"tool_mode": false,
|
|
3937
3688
|
"trace_as_metadata": true,
|
|
3938
3689
|
"type": "str",
|
|
@@ -3953,6 +3704,7 @@
|
|
|
3953
3704
|
"required": true,
|
|
3954
3705
|
"show": true,
|
|
3955
3706
|
"title_case": false,
|
|
3707
|
+
"toggle": false,
|
|
3956
3708
|
"tool_mode": false,
|
|
3957
3709
|
"trace_as_metadata": true,
|
|
3958
3710
|
"type": "str",
|
|
@@ -3969,24 +3721,14 @@
|
|
|
3969
3721
|
"info": "The Database name for the Astra DB instance.",
|
|
3970
3722
|
"name": "database_name",
|
|
3971
3723
|
"options": [],
|
|
3972
|
-
"options_metadata": [
|
|
3973
|
-
{
|
|
3974
|
-
"api_endpoint": "https://5b8bb22c-4a38-4f0a-865c-a18ed7590bd1-us-east-2.apps.astra.datastax.com",
|
|
3975
|
-
"collections": 5,
|
|
3976
|
-
"keyspaces": [
|
|
3977
|
-
"default_keyspace",
|
|
3978
|
-
"samples_dataflow"
|
|
3979
|
-
],
|
|
3980
|
-
"org_id": "260f986d-e65c-4f05-94a3-7cebfcb867a3",
|
|
3981
|
-
"status": null
|
|
3982
|
-
}
|
|
3983
|
-
],
|
|
3724
|
+
"options_metadata": [],
|
|
3984
3725
|
"placeholder": "",
|
|
3985
3726
|
"real_time_refresh": true,
|
|
3986
3727
|
"refresh_button": true,
|
|
3987
3728
|
"required": true,
|
|
3988
3729
|
"show": true,
|
|
3989
3730
|
"title_case": false,
|
|
3731
|
+
"toggle": false,
|
|
3990
3732
|
"tool_mode": false,
|
|
3991
3733
|
"trace_as_metadata": true,
|
|
3992
3734
|
"type": "str",
|
|
@@ -4025,7 +3767,7 @@
|
|
|
4025
3767
|
"name": "embedding_model",
|
|
4026
3768
|
"placeholder": "",
|
|
4027
3769
|
"required": false,
|
|
4028
|
-
"show":
|
|
3770
|
+
"show": false,
|
|
4029
3771
|
"title_case": false,
|
|
4030
3772
|
"trace_as_metadata": true,
|
|
4031
3773
|
"type": "other",
|
|
@@ -4051,6 +3793,7 @@
|
|
|
4051
3793
|
"required": false,
|
|
4052
3794
|
"show": true,
|
|
4053
3795
|
"title_case": false,
|
|
3796
|
+
"toggle": false,
|
|
4054
3797
|
"tool_mode": false,
|
|
4055
3798
|
"trace_as_metadata": true,
|
|
4056
3799
|
"type": "str",
|
|
@@ -4111,6 +3854,7 @@
|
|
|
4111
3854
|
"required": false,
|
|
4112
3855
|
"show": true,
|
|
4113
3856
|
"title_case": false,
|
|
3857
|
+
"toggle": false,
|
|
4114
3858
|
"tool_mode": false,
|
|
4115
3859
|
"trace_as_metadata": true,
|
|
4116
3860
|
"type": "str",
|
|
@@ -4118,7 +3862,7 @@
|
|
|
4118
3862
|
},
|
|
4119
3863
|
"lexical_terms": {
|
|
4120
3864
|
"_input_type": "QueryInput",
|
|
4121
|
-
"advanced":
|
|
3865
|
+
"advanced": false,
|
|
4122
3866
|
"display_name": "Lexical Terms",
|
|
4123
3867
|
"dynamic": false,
|
|
4124
3868
|
"info": "Add additional terms/keywords to augment search precision.",
|
|
@@ -4173,6 +3917,7 @@
|
|
|
4173
3917
|
"required": false,
|
|
4174
3918
|
"show": false,
|
|
4175
3919
|
"title_case": false,
|
|
3920
|
+
"toggle": true,
|
|
4176
3921
|
"tool_mode": false,
|
|
4177
3922
|
"trace_as_metadata": true,
|
|
4178
3923
|
"type": "str",
|
|
@@ -4191,12 +3936,20 @@
|
|
|
4191
3936
|
"Hybrid Search",
|
|
4192
3937
|
"Vector Search"
|
|
4193
3938
|
],
|
|
4194
|
-
"options_metadata": [
|
|
3939
|
+
"options_metadata": [
|
|
3940
|
+
{
|
|
3941
|
+
"icon": "SearchHybrid"
|
|
3942
|
+
},
|
|
3943
|
+
{
|
|
3944
|
+
"icon": "SearchVector"
|
|
3945
|
+
}
|
|
3946
|
+
],
|
|
4195
3947
|
"placeholder": "",
|
|
4196
3948
|
"real_time_refresh": true,
|
|
4197
3949
|
"required": false,
|
|
4198
3950
|
"show": true,
|
|
4199
3951
|
"title_case": false,
|
|
3952
|
+
"toggle": false,
|
|
4200
3953
|
"tool_mode": false,
|
|
4201
3954
|
"trace_as_metadata": true,
|
|
4202
3955
|
"type": "str",
|
|
@@ -4262,6 +4015,7 @@
|
|
|
4262
4015
|
"required": false,
|
|
4263
4016
|
"show": true,
|
|
4264
4017
|
"title_case": false,
|
|
4018
|
+
"toggle": false,
|
|
4265
4019
|
"tool_mode": false,
|
|
4266
4020
|
"trace_as_metadata": true,
|
|
4267
4021
|
"type": "str",
|
|
@@ -4311,63 +4065,130 @@
|
|
|
4311
4065
|
"type": "AstraDB"
|
|
4312
4066
|
},
|
|
4313
4067
|
"dragging": false,
|
|
4314
|
-
"id": "AstraDB-
|
|
4068
|
+
"id": "AstraDB-t8lcj",
|
|
4315
4069
|
"measured": {
|
|
4316
|
-
"height":
|
|
4070
|
+
"height": 457,
|
|
4317
4071
|
"width": 320
|
|
4318
4072
|
},
|
|
4319
4073
|
"position": {
|
|
4320
|
-
"x":
|
|
4321
|
-
"y":
|
|
4074
|
+
"x": 2095.3200255891484,
|
|
4075
|
+
"y": 1501.719729125222
|
|
4322
4076
|
},
|
|
4323
4077
|
"selected": false,
|
|
4324
4078
|
"type": "genericNode"
|
|
4325
4079
|
},
|
|
4326
4080
|
{
|
|
4327
4081
|
"data": {
|
|
4328
|
-
"id": "
|
|
4082
|
+
"id": "AstraDB-CLCyc",
|
|
4329
4083
|
"node": {
|
|
4330
4084
|
"base_classes": [
|
|
4331
|
-
"
|
|
4085
|
+
"Data",
|
|
4086
|
+
"DataFrame",
|
|
4087
|
+
"VectorStore"
|
|
4332
4088
|
],
|
|
4333
4089
|
"beta": false,
|
|
4334
4090
|
"conditional_paths": [],
|
|
4335
4091
|
"custom_fields": {},
|
|
4336
|
-
"description": "
|
|
4337
|
-
"display_name": "
|
|
4338
|
-
"documentation": "",
|
|
4092
|
+
"description": "Ingest and search documents in Astra DB",
|
|
4093
|
+
"display_name": "Astra DB",
|
|
4094
|
+
"documentation": "https://docs.datastax.com/en/langflow/astra-components.html",
|
|
4339
4095
|
"edited": false,
|
|
4340
4096
|
"field_order": [
|
|
4341
|
-
"
|
|
4342
|
-
"
|
|
4343
|
-
"
|
|
4344
|
-
"
|
|
4345
|
-
"
|
|
4346
|
-
"
|
|
4347
|
-
"
|
|
4348
|
-
"
|
|
4349
|
-
"
|
|
4097
|
+
"token",
|
|
4098
|
+
"environment",
|
|
4099
|
+
"database_name",
|
|
4100
|
+
"api_endpoint",
|
|
4101
|
+
"keyspace",
|
|
4102
|
+
"collection_name",
|
|
4103
|
+
"embedding_model",
|
|
4104
|
+
"ingest_data",
|
|
4105
|
+
"search_query",
|
|
4106
|
+
"should_cache_vector_store",
|
|
4107
|
+
"search_method",
|
|
4108
|
+
"reranker",
|
|
4109
|
+
"lexical_terms",
|
|
4110
|
+
"number_of_results",
|
|
4111
|
+
"search_type",
|
|
4112
|
+
"search_score_threshold",
|
|
4113
|
+
"advanced_search_filter",
|
|
4114
|
+
"autodetect_collection",
|
|
4115
|
+
"content_field",
|
|
4116
|
+
"deletion_field",
|
|
4117
|
+
"ignore_invalid_documents",
|
|
4118
|
+
"astradb_vectorstore_kwargs"
|
|
4350
4119
|
],
|
|
4351
4120
|
"frozen": false,
|
|
4352
|
-
"icon": "
|
|
4121
|
+
"icon": "AstraDB",
|
|
4122
|
+
"last_updated": "2025-08-27T14:19:33.967Z",
|
|
4353
4123
|
"legacy": false,
|
|
4354
|
-
"metadata": {
|
|
4124
|
+
"metadata": {
|
|
4125
|
+
"code_hash": "23fbe9daca09",
|
|
4126
|
+
"dependencies": {
|
|
4127
|
+
"dependencies": [
|
|
4128
|
+
{
|
|
4129
|
+
"name": "astrapy",
|
|
4130
|
+
"version": "2.0.1"
|
|
4131
|
+
},
|
|
4132
|
+
{
|
|
4133
|
+
"name": "langchain_astradb",
|
|
4134
|
+
"version": "0.6.0"
|
|
4135
|
+
},
|
|
4136
|
+
{
|
|
4137
|
+
"name": "langchain_core",
|
|
4138
|
+
"version": "0.3.75"
|
|
4139
|
+
},
|
|
4140
|
+
{
|
|
4141
|
+
"name": "langflow",
|
|
4142
|
+
"version": null
|
|
4143
|
+
}
|
|
4144
|
+
],
|
|
4145
|
+
"total_dependencies": 4
|
|
4146
|
+
},
|
|
4147
|
+
"module": "langflow.components.datastax.astradb_vectorstore.AstraDBVectorStoreComponent"
|
|
4148
|
+
},
|
|
4355
4149
|
"minimized": false,
|
|
4356
4150
|
"output_types": [],
|
|
4357
4151
|
"outputs": [
|
|
4358
4152
|
{
|
|
4359
4153
|
"allows_loop": false,
|
|
4360
4154
|
"cache": true,
|
|
4361
|
-
"display_name": "
|
|
4155
|
+
"display_name": "Search Results",
|
|
4362
4156
|
"group_outputs": false,
|
|
4363
|
-
"method": "
|
|
4364
|
-
"name": "
|
|
4365
|
-
"
|
|
4366
|
-
"required_inputs": null,
|
|
4367
|
-
"selected": "Message",
|
|
4157
|
+
"method": "search_documents",
|
|
4158
|
+
"name": "search_results",
|
|
4159
|
+
"selected": "Data",
|
|
4368
4160
|
"tool_mode": true,
|
|
4369
4161
|
"types": [
|
|
4370
|
-
"
|
|
4162
|
+
"Data"
|
|
4163
|
+
],
|
|
4164
|
+
"value": "__UNDEFINED__"
|
|
4165
|
+
},
|
|
4166
|
+
{
|
|
4167
|
+
"allows_loop": false,
|
|
4168
|
+
"cache": true,
|
|
4169
|
+
"display_name": "DataFrame",
|
|
4170
|
+
"group_outputs": false,
|
|
4171
|
+
"method": "as_dataframe",
|
|
4172
|
+
"name": "dataframe",
|
|
4173
|
+
"selected": "DataFrame",
|
|
4174
|
+
"tool_mode": true,
|
|
4175
|
+
"types": [
|
|
4176
|
+
"DataFrame"
|
|
4177
|
+
],
|
|
4178
|
+
"value": "__UNDEFINED__"
|
|
4179
|
+
},
|
|
4180
|
+
{
|
|
4181
|
+
"allows_loop": false,
|
|
4182
|
+
"cache": true,
|
|
4183
|
+
"display_name": "Vector Store Connection",
|
|
4184
|
+
"group_outputs": false,
|
|
4185
|
+
"hidden": true,
|
|
4186
|
+
"method": "as_vector_store",
|
|
4187
|
+
"name": "vectorstoreconnection",
|
|
4188
|
+
"selected": "VectorStore",
|
|
4189
|
+
"tool_mode": true,
|
|
4190
|
+
"types": [
|
|
4191
|
+
"VectorStore"
|
|
4371
4192
|
],
|
|
4372
4193
|
"value": "__UNDEFINED__"
|
|
4373
4194
|
}
|
|
@@ -4375,51 +4196,74 @@
|
|
|
4375
4196
|
"pinned": false,
|
|
4376
4197
|
"template": {
|
|
4377
4198
|
"_type": "Component",
|
|
4378
|
-
"
|
|
4199
|
+
"advanced_search_filter": {
|
|
4200
|
+
"_input_type": "NestedDictInput",
|
|
4379
4201
|
"advanced": true,
|
|
4380
|
-
"
|
|
4381
|
-
"
|
|
4382
|
-
"
|
|
4383
|
-
"info": "",
|
|
4202
|
+
"display_name": "Search Metadata Filter",
|
|
4203
|
+
"dynamic": false,
|
|
4204
|
+
"info": "Optional dictionary of filters to apply to the search query.",
|
|
4384
4205
|
"list": false,
|
|
4385
|
-
"
|
|
4386
|
-
"
|
|
4387
|
-
"
|
|
4388
|
-
"
|
|
4206
|
+
"list_add_label": "Add More",
|
|
4207
|
+
"name": "advanced_search_filter",
|
|
4208
|
+
"placeholder": "",
|
|
4209
|
+
"required": false,
|
|
4210
|
+
"show": true,
|
|
4211
|
+
"title_case": false,
|
|
4212
|
+
"tool_mode": false,
|
|
4213
|
+
"trace_as_input": true,
|
|
4214
|
+
"trace_as_metadata": true,
|
|
4215
|
+
"type": "NestedDict",
|
|
4216
|
+
"value": {}
|
|
4217
|
+
},
|
|
4218
|
+
"api_endpoint": {
|
|
4219
|
+
"_input_type": "DropdownInput",
|
|
4220
|
+
"advanced": true,
|
|
4221
|
+
"combobox": false,
|
|
4222
|
+
"dialog_inputs": {},
|
|
4223
|
+
"display_name": "Astra DB API Endpoint",
|
|
4224
|
+
"dynamic": false,
|
|
4225
|
+
"info": "The API Endpoint for the Astra DB instance. Supercedes database selection.",
|
|
4226
|
+
"name": "api_endpoint",
|
|
4227
|
+
"options": [],
|
|
4228
|
+
"options_metadata": [],
|
|
4389
4229
|
"placeholder": "",
|
|
4390
|
-
"required":
|
|
4230
|
+
"required": false,
|
|
4391
4231
|
"show": true,
|
|
4392
4232
|
"title_case": false,
|
|
4393
|
-
"
|
|
4394
|
-
"value": "\"\"\"Enhanced file component with clearer structure and Docling isolation.\n\nNotes:\n-----\n- Functionality is preserved with minimal behavioral changes.\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom typing import TYPE_CHECKING, Any\n\nfrom langflow.base.data.base_file import BaseFileComponent\nfrom langflow.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n FileInput,\n IntInput,\n MessageTextInput,\n Output,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.message import Message\n\nif TYPE_CHECKING:\n from langflow.schema import DataFrame\n\n\nclass FileComponent(BaseFileComponent):\n \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n display_name = \"File\"\n description = \"Loads content from files with optional advanced document processing and export using Docling.\"\n documentation: str = \"https://docs.langflow.org/components-data#file\"\n icon = \"file-text\"\n name = \"File\"\n\n # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n VALID_EXTENSIONS = [\n \"adoc\",\n \"asciidoc\",\n \"asc\",\n \"bmp\",\n \"csv\",\n \"dotx\",\n \"dotm\",\n \"docm\",\n \"docx\",\n \"htm\",\n \"html\",\n \"jpeg\",\n \"json\",\n \"md\",\n \"pdf\",\n \"png\",\n \"potx\",\n \"ppsx\",\n \"pptm\",\n \"potm\",\n \"ppsm\",\n \"pptx\",\n \"tiff\",\n \"txt\",\n \"xls\",\n \"xlsx\",\n \"xhtml\",\n \"xml\",\n \"webp\",\n *TEXT_FILE_TYPES,\n ]\n\n # Fixed export settings used when markdown export is requested.\n EXPORT_FORMAT = \"Markdown\"\n IMAGE_MODE = \"placeholder\"\n\n # ---- Inputs / Outputs (kept as close to original as possible) -------------------\n _base_inputs = deepcopy(BaseFileComponent._base_inputs)\n for input_item in _base_inputs:\n if isinstance(input_item, FileInput) and input_item.name == \"path\":\n input_item.real_time_refresh = True\n break\n\n inputs = [\n *_base_inputs,\n BoolInput(\n name=\"advanced_mode\",\n display_name=\"Advanced Parser\",\n value=False,\n real_time_refresh=True,\n info=(\n \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n \"Available only for single file processing.\"\n ),\n show=False,\n ),\n DropdownInput(\n name=\"pipeline\",\n display_name=\"Pipeline\",\n info=\"Docling pipeline to use\",\n options=[\"standard\", \"vlm\"],\n value=\"standard\",\n advanced=True,\n ),\n DropdownInput(\n name=\"ocr_engine\",\n display_name=\"OCR Engine\",\n info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n options=[\"\", \"easyocr\"],\n value=\"\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"md_image_placeholder\",\n display_name=\"Image placeholder\",\n info=\"Specify the image placeholder for markdown exports.\",\n value=\"<!-- image -->\",\n advanced=True,\n show=False,\n ),\n StrInput(\n name=\"md_page_break_placeholder\",\n display_name=\"Page break placeholder\",\n info=\"Add this placeholder between pages in the markdown output.\",\n value=\"\",\n advanced=True,\n show=False,\n ),\n MessageTextInput(\n name=\"doc_key\",\n display_name=\"Doc Key\",\n info=\"The key to use for the DoclingDocument column.\",\n value=\"doc\",\n advanced=True,\n show=False,\n ),\n # Deprecated input retained for backward-compatibility.\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"[Deprecated] Use Multithreading\",\n advanced=True,\n value=True,\n info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"When multiple files are being processed, the number of files to process concurrently.\",\n value=1,\n ),\n BoolInput(\n name=\"markdown\",\n display_name=\"Markdown Export\",\n info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n value=False,\n show=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\"),\n ]\n\n # ------------------------------ UI helpers --------------------------------------\n\n def _path_value(self, template: dict) -> list[str]:\n \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n return template.get(\"path\", {}).get(\"file_path\", [])\n\n def update_build_config(\n self,\n build_config: dict[str, Any],\n field_value: Any,\n field_name: str | None = None,\n ) -> dict[str, Any]:\n \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n if field_name == \"path\":\n paths = self._path_value(build_config)\n file_path = paths[0] if paths else \"\"\n file_count = len(field_value) if field_value else 0\n\n # Advanced mode only for single (non-tabular) file\n allow_advanced = file_count == 1 and not file_path.endswith((\".csv\", \".xlsx\", \".parquet\"))\n build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n if not allow_advanced:\n build_config[\"advanced_mode\"][\"value\"] = False\n for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n if f in build_config:\n build_config[f][\"show\"] = False\n\n elif field_name == \"advanced_mode\":\n for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n if f in build_config:\n build_config[f][\"show\"] = bool(field_value)\n\n return build_config\n\n def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]: # noqa: ARG002\n \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n if field_name not in [\"path\", \"advanced_mode\"]:\n return frontend_node\n\n template = frontend_node.get(\"template\", {})\n paths = self._path_value(template)\n if not paths:\n return frontend_node\n\n frontend_node[\"outputs\"] = []\n if len(paths) == 1:\n file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"dataframe\", method=\"load_files_structured\"),\n )\n elif file_path.endswith(\".json\"):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\"),\n )\n\n advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n if advanced_mode:\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Output\", name=\"advanced\", method=\"load_files_advanced\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Markdown\", name=\"markdown\", method=\"load_files_markdown\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\"),\n )\n else:\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\"),\n )\n else:\n # Multiple files => DataFrame output; advanced parser disabled\n frontend_node[\"outputs\"].append(Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\"))\n\n return frontend_node\n\n # ------------------------------ Core processing ----------------------------------\n\n def _is_docling_compatible(self, file_path: str) -> bool:\n \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n docling_exts = (\n \".adoc\",\n \".asciidoc\",\n \".asc\",\n \".bmp\",\n \".csv\",\n \".dotx\",\n \".dotm\",\n \".docm\",\n \".docx\",\n \".htm\",\n \".html\",\n \".jpeg\",\n \".json\",\n \".md\",\n \".pdf\",\n \".png\",\n \".potx\",\n \".ppsx\",\n \".pptm\",\n \".potm\",\n \".ppsm\",\n \".pptx\",\n \".tiff\",\n \".txt\",\n \".xls\",\n \".xlsx\",\n \".xhtml\",\n \".xml\",\n \".webp\",\n )\n return file_path.lower().endswith(docling_exts)\n\n def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n passing JSON config via stdin. The child prints a JSON result to stdout.\n \"\"\"\n if not file_path:\n return None\n\n args: dict[str, Any] = {\n \"file_path\": file_path,\n \"markdown\": bool(self.markdown),\n \"image_mode\": str(self.IMAGE_MODE),\n \"md_image_placeholder\": str(self.md_image_placeholder),\n \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n \"pipeline\": str(self.pipeline),\n \"ocr_engine\": str(self.ocr_engine) if getattr(self, \"ocr_engine\", \"\") else None,\n }\n\n # The child is a tiny, self-contained script to keep memory/state isolated.\n child_script = textwrap.dedent(\n r\"\"\"\n import json, sys\n\n def try_imports():\n # Strategy 1: latest layout\n try:\n from docling.datamodel.base_models import ConversionStatus, InputFormat # type: ignore\n from docling.document_converter import DocumentConverter # type: ignore\n from docling_core.types.doc import ImageRefMode # type: ignore\n return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n except Exception:\n pass\n # Strategy 2: alternative layout\n try:\n from docling.document_converter import DocumentConverter # type: ignore\n try:\n from docling_core.types import ConversionStatus, InputFormat # type: ignore\n except Exception:\n try:\n from docling.datamodel import ConversionStatus, InputFormat # type: ignore\n except Exception:\n class ConversionStatus: SUCCESS = \"success\"\n class InputFormat:\n PDF=\"pdf\"; IMAGE=\"image\"\n try:\n from docling_core.types.doc import ImageRefMode # type: ignore\n except Exception:\n class ImageRefMode:\n PLACEHOLDER=\"placeholder\"; EMBEDDED=\"embedded\"\n return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"alternative\"\n except Exception:\n pass\n # Strategy 3: basic converter only\n try:\n from docling.document_converter import DocumentConverter # type: ignore\n class ConversionStatus: SUCCESS = \"success\"\n class InputFormat:\n PDF=\"pdf\"; IMAGE=\"image\"\n class ImageRefMode:\n PLACEHOLDER=\"placeholder\"; EMBEDDED=\"embedded\"\n return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"basic\"\n except Exception as e:\n raise ImportError(f\"Docling imports failed: {e}\") from e\n\n def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n if strategy == \"latest\" and pipeline == \"standard\":\n try:\n from docling.datamodel.pipeline_options import PdfPipelineOptions # type: ignore\n from docling.document_converter import PdfFormatOption # type: ignore\n pipe = PdfPipelineOptions()\n if ocr_engine:\n try:\n from docling.models.factories import get_ocr_factory # type: ignore\n pipe.do_ocr = True\n fac = get_ocr_factory(allow_external_plugins=False)\n pipe.ocr_options = fac.create_options(kind=ocr_engine)\n except Exception:\n pipe.do_ocr = False\n fmt = {}\n if hasattr(input_format, \"PDF\"):\n fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n if hasattr(input_format, \"IMAGE\"):\n fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n return DocumentConverter(format_options=fmt)\n except Exception:\n return DocumentConverter()\n return DocumentConverter()\n\n def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n try:\n mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n return document.export_to_markdown(\n image_mode=mode,\n image_placeholder=img_ph,\n page_break_placeholder=pg_ph,\n )\n except Exception:\n try:\n return document.export_to_text()\n except Exception:\n return str(document)\n\n def to_rows(doc_dict):\n rows = []\n for t in doc_dict.get(\"texts\", []):\n prov = t.get(\"prov\") or []\n page_no = None\n if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n page_no = prov[0].get(\"page_no\")\n rows.append({\n \"page_no\": page_no,\n \"label\": t.get(\"label\"),\n \"text\": t.get(\"text\"),\n \"level\": t.get(\"level\"),\n })\n return rows\n\n def main():\n cfg = json.loads(sys.stdin.read())\n file_path = cfg[\"file_path\"]\n markdown = cfg[\"markdown\"]\n image_mode = cfg[\"image_mode\"]\n img_ph = cfg[\"md_image_placeholder\"]\n pg_ph = cfg[\"md_page_break_placeholder\"]\n pipeline = cfg[\"pipeline\"]\n ocr_engine = cfg.get(\"ocr_engine\")\n meta = {\"file_path\": file_path}\n\n try:\n ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n try:\n res = converter.convert(file_path)\n except Exception as e:\n print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n return\n\n ok = False\n if hasattr(res, \"status\"):\n try:\n ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n except Exception:\n ok = (str(res.status).lower() == \"success\")\n if not ok and hasattr(res, \"document\"):\n ok = getattr(res, \"document\", None) is not None\n if not ok:\n print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n return\n\n doc = getattr(res, \"document\", None)\n if doc is None:\n print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n return\n\n if markdown:\n text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n return\n\n # structured\n try:\n doc_dict = doc.export_to_dict()\n except Exception as e:\n print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n return\n\n rows = to_rows(doc_dict)\n print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n except Exception as e:\n print(\n json.dumps({\n \"ok\": False,\n \"error\": f\"Docling processing error: {e}\",\n \"meta\": {\"file_path\": file_path},\n })\n )\n\n if __name__ == \"__main__\":\n main()\n \"\"\"\n )\n\n # Validate file_path to avoid command injection or unsafe input\n if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n proc = subprocess.run( # noqa: S603\n [sys.executable, \"-u\", \"-c\", child_script],\n input=json.dumps(args).encode(\"utf-8\"),\n capture_output=True,\n check=False,\n )\n\n if not proc.stdout:\n err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") or \"no output from child process\"\n return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": file_path})\n\n try:\n result = json.loads(proc.stdout.decode(\"utf-8\"))\n except Exception as e: # noqa: BLE001\n err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n return Data(\n data={\"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\", \"file_path\": file_path},\n )\n\n if not result.get(\"ok\"):\n return Data(data={\"error\": result.get(\"error\", \"Unknown Docling error\"), **result.get(\"meta\", {})})\n\n meta = result.get(\"meta\", {})\n if result.get(\"mode\") == \"markdown\":\n exported_content = str(result.get(\"text\", \"\"))\n return Data(\n text=exported_content,\n data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n )\n\n rows = list(result.get(\"doc\", []))\n return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n def process_files(\n self,\n file_list: list[BaseFileComponent.BaseFile],\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process input files.\n\n - Single file + advanced_mode => Docling in a separate process.\n - Otherwise => standard parsing in current process (optionally threaded).\n \"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n try:\n return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n except FileNotFoundError as e:\n self.log(f\"File not found: {file_path}. Error: {e}\")\n if not silent_errors:\n raise\n return None\n except Exception as e:\n self.log(f\"Unexpected error processing {file_path}: {e}\")\n if not silent_errors:\n raise\n return None\n\n # Advanced path: only for a single Docling-compatible file\n if len(file_list) == 1:\n file_path = str(file_list[0].path)\n if self.advanced_mode and self._is_docling_compatible(file_path):\n advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n # --- UNNEST: expand each element in `doc` to its own Data row\n payload = getattr(advanced_data, \"data\", {}) or {}\n doc_rows = payload.get(\"doc\")\n if isinstance(doc_rows, list):\n rows: list[Data | None] = [\n Data(\n data={\n \"file_path\": file_path,\n **(item if isinstance(item, dict) else {\"value\": item}),\n },\n )\n for item in doc_rows\n ]\n return self.rollup_data(file_list, rows)\n\n # If not structured, keep as-is (e.g., markdown export or error dict)\n return self.rollup_data(file_list, [advanced_data])\n\n # Standard multi-file (or single non-advanced) path\n concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n file_paths = [str(f.path) for f in file_list]\n self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n my_data = parallel_load_data(\n file_paths,\n silent_errors=self.silent_errors,\n load_function=process_file_standard,\n max_concurrency=concurrency,\n )\n return self.rollup_data(file_list, my_data)\n\n # ------------------------------ Output helpers -----------------------------------\n\n def load_files_advanced(self) -> DataFrame:\n \"\"\"Load files using advanced Docling processing and export to an advanced format.\"\"\"\n self.markdown = False\n return self.load_files()\n\n def load_files_markdown(self) -> Message:\n \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n self.markdown = True\n result = self.load_files()\n return Message(text=str(result.text[0]))\n"
|
|
4233
|
+
"toggle": false,
|
|
4234
|
+
"tool_mode": false,
|
|
4235
|
+
"trace_as_metadata": true,
|
|
4236
|
+
"type": "str",
|
|
4237
|
+
"value": ""
|
|
4395
4238
|
},
|
|
4396
|
-
"
|
|
4397
|
-
"_input_type": "
|
|
4239
|
+
"astradb_vectorstore_kwargs": {
|
|
4240
|
+
"_input_type": "NestedDictInput",
|
|
4398
4241
|
"advanced": true,
|
|
4399
|
-
"display_name": "
|
|
4242
|
+
"display_name": "AstraDBVectorStore Parameters",
|
|
4400
4243
|
"dynamic": false,
|
|
4401
|
-
"info": "
|
|
4244
|
+
"info": "Optional dictionary of additional parameters for the AstraDBVectorStore.",
|
|
4402
4245
|
"list": false,
|
|
4403
4246
|
"list_add_label": "Add More",
|
|
4404
|
-
"name": "
|
|
4247
|
+
"name": "astradb_vectorstore_kwargs",
|
|
4405
4248
|
"placeholder": "",
|
|
4406
4249
|
"required": false,
|
|
4407
4250
|
"show": true,
|
|
4408
4251
|
"title_case": false,
|
|
4409
4252
|
"tool_mode": false,
|
|
4253
|
+
"trace_as_input": true,
|
|
4410
4254
|
"trace_as_metadata": true,
|
|
4411
|
-
"type": "
|
|
4412
|
-
"value":
|
|
4255
|
+
"type": "NestedDict",
|
|
4256
|
+
"value": {}
|
|
4413
4257
|
},
|
|
4414
|
-
"
|
|
4258
|
+
"autodetect_collection": {
|
|
4415
4259
|
"_input_type": "BoolInput",
|
|
4416
4260
|
"advanced": true,
|
|
4417
|
-
"display_name": "
|
|
4261
|
+
"display_name": "Autodetect Collection",
|
|
4418
4262
|
"dynamic": false,
|
|
4419
|
-
"info": "
|
|
4263
|
+
"info": "Boolean flag to determine whether to autodetect the collection.",
|
|
4420
4264
|
"list": false,
|
|
4421
4265
|
"list_add_label": "Add More",
|
|
4422
|
-
"name": "
|
|
4266
|
+
"name": "autodetect_collection",
|
|
4423
4267
|
"placeholder": "",
|
|
4424
4268
|
"required": false,
|
|
4425
4269
|
"show": true,
|
|
@@ -4429,136 +4273,347 @@
|
|
|
4429
4273
|
"type": "bool",
|
|
4430
4274
|
"value": true
|
|
4431
4275
|
},
|
|
4432
|
-
"
|
|
4433
|
-
"_input_type": "HandleInput",
|
|
4276
|
+
"code": {
|
|
4434
4277
|
"advanced": true,
|
|
4435
|
-
"
|
|
4278
|
+
"dynamic": true,
|
|
4279
|
+
"fileTypes": [],
|
|
4280
|
+
"file_path": "",
|
|
4281
|
+
"info": "",
|
|
4282
|
+
"list": false,
|
|
4283
|
+
"load_from_db": false,
|
|
4284
|
+
"multiline": true,
|
|
4285
|
+
"name": "code",
|
|
4286
|
+
"password": false,
|
|
4287
|
+
"placeholder": "",
|
|
4288
|
+
"required": true,
|
|
4289
|
+
"show": true,
|
|
4290
|
+
"title_case": false,
|
|
4291
|
+
"type": "code",
|
|
4292
|
+
"value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n '<a class=\"underline\" href=\"https://astra.datastax.com/\" target=\" _blank\" '\n 'rel=\"noopener noreferrer\">your database in Astra DB</a>'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f'<a class=\"underline\" target=\"_blank\" rel=\"noopener noreferrer\" '\n f'href=\"https://astra.datastax.com/org/{org_id}/database/{db_id}/data-explorer?createCollection=1&namespace={keyspace}\">'\n \"your database in Astra DB</a>.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n"
|
|
4293
|
+
},
|
|
4294
|
+
"collection_name": {
|
|
4295
|
+
"_input_type": "DropdownInput",
|
|
4296
|
+
"advanced": false,
|
|
4297
|
+
"combobox": true,
|
|
4298
|
+
"dialog_inputs": {
|
|
4299
|
+
"fields": {
|
|
4300
|
+
"data": {
|
|
4301
|
+
"node": {
|
|
4302
|
+
"description": "Please allow several seconds for creation to complete.",
|
|
4303
|
+
"display_name": "Create new collection",
|
|
4304
|
+
"field_order": [
|
|
4305
|
+
"01_new_collection_name",
|
|
4306
|
+
"02_embedding_generation_provider",
|
|
4307
|
+
"03_embedding_generation_model",
|
|
4308
|
+
"04_dimension"
|
|
4309
|
+
],
|
|
4310
|
+
"name": "create_collection",
|
|
4311
|
+
"template": {
|
|
4312
|
+
"01_new_collection_name": {
|
|
4313
|
+
"_input_type": "StrInput",
|
|
4314
|
+
"advanced": false,
|
|
4315
|
+
"display_name": "Name",
|
|
4316
|
+
"dynamic": false,
|
|
4317
|
+
"info": "Name of the new collection to create in Astra DB.",
|
|
4318
|
+
"list": false,
|
|
4319
|
+
"list_add_label": "Add More",
|
|
4320
|
+
"load_from_db": false,
|
|
4321
|
+
"name": "new_collection_name",
|
|
4322
|
+
"placeholder": "",
|
|
4323
|
+
"required": true,
|
|
4324
|
+
"show": true,
|
|
4325
|
+
"title_case": false,
|
|
4326
|
+
"tool_mode": false,
|
|
4327
|
+
"trace_as_metadata": true,
|
|
4328
|
+
"type": "str",
|
|
4329
|
+
"value": ""
|
|
4330
|
+
},
|
|
4331
|
+
"02_embedding_generation_provider": {
|
|
4332
|
+
"_input_type": "DropdownInput",
|
|
4333
|
+
"advanced": false,
|
|
4334
|
+
"combobox": false,
|
|
4335
|
+
"dialog_inputs": {},
|
|
4336
|
+
"display_name": "Embedding generation method",
|
|
4337
|
+
"dynamic": false,
|
|
4338
|
+
"helper_text": "To create collections with more embedding provider options, go to <a class=\"underline\" href=\"https://astra.datastax.com/\" target=\" _blank\" rel=\"noopener noreferrer\">your database in Astra DB</a>",
|
|
4339
|
+
"info": "Provider to use for generating embeddings.",
|
|
4340
|
+
"name": "embedding_generation_provider",
|
|
4341
|
+
"options": [],
|
|
4342
|
+
"options_metadata": [],
|
|
4343
|
+
"placeholder": "",
|
|
4344
|
+
"real_time_refresh": true,
|
|
4345
|
+
"required": true,
|
|
4346
|
+
"show": true,
|
|
4347
|
+
"title_case": false,
|
|
4348
|
+
"toggle": false,
|
|
4349
|
+
"tool_mode": false,
|
|
4350
|
+
"trace_as_metadata": true,
|
|
4351
|
+
"type": "str",
|
|
4352
|
+
"value": ""
|
|
4353
|
+
},
|
|
4354
|
+
"03_embedding_generation_model": {
|
|
4355
|
+
"_input_type": "DropdownInput",
|
|
4356
|
+
"advanced": false,
|
|
4357
|
+
"combobox": false,
|
|
4358
|
+
"dialog_inputs": {},
|
|
4359
|
+
"display_name": "Embedding model",
|
|
4360
|
+
"dynamic": false,
|
|
4361
|
+
"info": "Model to use for generating embeddings.",
|
|
4362
|
+
"name": "embedding_generation_model",
|
|
4363
|
+
"options": [],
|
|
4364
|
+
"options_metadata": [],
|
|
4365
|
+
"placeholder": "",
|
|
4366
|
+
"real_time_refresh": true,
|
|
4367
|
+
"required": false,
|
|
4368
|
+
"show": true,
|
|
4369
|
+
"title_case": false,
|
|
4370
|
+
"toggle": false,
|
|
4371
|
+
"tool_mode": false,
|
|
4372
|
+
"trace_as_metadata": true,
|
|
4373
|
+
"type": "str",
|
|
4374
|
+
"value": ""
|
|
4375
|
+
},
|
|
4376
|
+
"04_dimension": {
|
|
4377
|
+
"_input_type": "IntInput",
|
|
4378
|
+
"advanced": false,
|
|
4379
|
+
"display_name": "Dimensions",
|
|
4380
|
+
"dynamic": false,
|
|
4381
|
+
"info": "Dimensions of the embeddings to generate.",
|
|
4382
|
+
"list": false,
|
|
4383
|
+
"list_add_label": "Add More",
|
|
4384
|
+
"name": "dimension",
|
|
4385
|
+
"placeholder": "",
|
|
4386
|
+
"required": false,
|
|
4387
|
+
"show": true,
|
|
4388
|
+
"title_case": false,
|
|
4389
|
+
"tool_mode": false,
|
|
4390
|
+
"trace_as_metadata": true,
|
|
4391
|
+
"type": "int"
|
|
4392
|
+
}
|
|
4393
|
+
}
|
|
4394
|
+
}
|
|
4395
|
+
}
|
|
4396
|
+
},
|
|
4397
|
+
"functionality": "create"
|
|
4398
|
+
},
|
|
4399
|
+
"display_name": "Collection",
|
|
4436
4400
|
"dynamic": false,
|
|
4437
|
-
"info": "
|
|
4438
|
-
"
|
|
4439
|
-
|
|
4440
|
-
|
|
4441
|
-
|
|
4442
|
-
"
|
|
4401
|
+
"info": "The name of the collection within Astra DB where the vectors will be stored.",
|
|
4402
|
+
"name": "collection_name",
|
|
4403
|
+
"options": [],
|
|
4404
|
+
"options_metadata": [],
|
|
4405
|
+
"placeholder": "",
|
|
4406
|
+
"real_time_refresh": true,
|
|
4407
|
+
"refresh_button": true,
|
|
4408
|
+
"required": true,
|
|
4409
|
+
"show": false,
|
|
4410
|
+
"title_case": false,
|
|
4411
|
+
"toggle": false,
|
|
4412
|
+
"tool_mode": false,
|
|
4413
|
+
"trace_as_metadata": true,
|
|
4414
|
+
"type": "str",
|
|
4415
|
+
"value": ""
|
|
4416
|
+
},
|
|
4417
|
+
"content_field": {
|
|
4418
|
+
"_input_type": "StrInput",
|
|
4419
|
+
"advanced": true,
|
|
4420
|
+
"display_name": "Content Field",
|
|
4421
|
+
"dynamic": false,
|
|
4422
|
+
"info": "Field to use as the text content field for the vector store.",
|
|
4423
|
+
"list": false,
|
|
4443
4424
|
"list_add_label": "Add More",
|
|
4444
|
-
"
|
|
4425
|
+
"load_from_db": false,
|
|
4426
|
+
"name": "content_field",
|
|
4445
4427
|
"placeholder": "",
|
|
4446
4428
|
"required": false,
|
|
4447
4429
|
"show": true,
|
|
4448
4430
|
"title_case": false,
|
|
4431
|
+
"tool_mode": false,
|
|
4449
4432
|
"trace_as_metadata": true,
|
|
4450
|
-
"type": "
|
|
4433
|
+
"type": "str",
|
|
4451
4434
|
"value": ""
|
|
4452
4435
|
},
|
|
4453
|
-
"
|
|
4454
|
-
"_input_type": "
|
|
4455
|
-
"advanced":
|
|
4456
|
-
"
|
|
4436
|
+
"database_name": {
|
|
4437
|
+
"_input_type": "DropdownInput",
|
|
4438
|
+
"advanced": false,
|
|
4439
|
+
"combobox": true,
|
|
4440
|
+
"dialog_inputs": {
|
|
4441
|
+
"fields": {
|
|
4442
|
+
"data": {
|
|
4443
|
+
"node": {
|
|
4444
|
+
"description": "Please allow several minutes for creation to complete.",
|
|
4445
|
+
"display_name": "Create new database",
|
|
4446
|
+
"field_order": [
|
|
4447
|
+
"01_new_database_name",
|
|
4448
|
+
"02_cloud_provider",
|
|
4449
|
+
"03_region"
|
|
4450
|
+
],
|
|
4451
|
+
"name": "create_database",
|
|
4452
|
+
"template": {
|
|
4453
|
+
"01_new_database_name": {
|
|
4454
|
+
"_input_type": "StrInput",
|
|
4455
|
+
"advanced": false,
|
|
4456
|
+
"display_name": "Name",
|
|
4457
|
+
"dynamic": false,
|
|
4458
|
+
"info": "Name of the new database to create in Astra DB.",
|
|
4459
|
+
"list": false,
|
|
4460
|
+
"list_add_label": "Add More",
|
|
4461
|
+
"load_from_db": false,
|
|
4462
|
+
"name": "new_database_name",
|
|
4463
|
+
"placeholder": "",
|
|
4464
|
+
"required": true,
|
|
4465
|
+
"show": true,
|
|
4466
|
+
"title_case": false,
|
|
4467
|
+
"tool_mode": false,
|
|
4468
|
+
"trace_as_metadata": true,
|
|
4469
|
+
"type": "str",
|
|
4470
|
+
"value": ""
|
|
4471
|
+
},
|
|
4472
|
+
"02_cloud_provider": {
|
|
4473
|
+
"_input_type": "DropdownInput",
|
|
4474
|
+
"advanced": false,
|
|
4475
|
+
"combobox": false,
|
|
4476
|
+
"dialog_inputs": {},
|
|
4477
|
+
"display_name": "Cloud provider",
|
|
4478
|
+
"dynamic": false,
|
|
4479
|
+
"info": "Cloud provider for the new database.",
|
|
4480
|
+
"name": "cloud_provider",
|
|
4481
|
+
"options": [
|
|
4482
|
+
"Amazon Web Services",
|
|
4483
|
+
"Google Cloud Platform",
|
|
4484
|
+
"Microsoft Azure"
|
|
4485
|
+
],
|
|
4486
|
+
"options_metadata": [],
|
|
4487
|
+
"placeholder": "",
|
|
4488
|
+
"real_time_refresh": true,
|
|
4489
|
+
"required": true,
|
|
4490
|
+
"show": true,
|
|
4491
|
+
"title_case": false,
|
|
4492
|
+
"toggle": false,
|
|
4493
|
+
"tool_mode": false,
|
|
4494
|
+
"trace_as_metadata": true,
|
|
4495
|
+
"type": "str",
|
|
4496
|
+
"value": ""
|
|
4497
|
+
},
|
|
4498
|
+
"03_region": {
|
|
4499
|
+
"_input_type": "DropdownInput",
|
|
4500
|
+
"advanced": false,
|
|
4501
|
+
"combobox": false,
|
|
4502
|
+
"dialog_inputs": {},
|
|
4503
|
+
"display_name": "Region",
|
|
4504
|
+
"dynamic": false,
|
|
4505
|
+
"info": "Region for the new database.",
|
|
4506
|
+
"name": "region",
|
|
4507
|
+
"options": [],
|
|
4508
|
+
"options_metadata": [],
|
|
4509
|
+
"placeholder": "",
|
|
4510
|
+
"required": true,
|
|
4511
|
+
"show": true,
|
|
4512
|
+
"title_case": false,
|
|
4513
|
+
"toggle": false,
|
|
4514
|
+
"tool_mode": false,
|
|
4515
|
+
"trace_as_metadata": true,
|
|
4516
|
+
"type": "str",
|
|
4517
|
+
"value": ""
|
|
4518
|
+
}
|
|
4519
|
+
}
|
|
4520
|
+
}
|
|
4521
|
+
}
|
|
4522
|
+
},
|
|
4523
|
+
"functionality": "create"
|
|
4524
|
+
},
|
|
4525
|
+
"display_name": "Database",
|
|
4457
4526
|
"dynamic": false,
|
|
4458
|
-
"info": "
|
|
4459
|
-
"
|
|
4460
|
-
"
|
|
4461
|
-
"
|
|
4527
|
+
"info": "The Database name for the Astra DB instance.",
|
|
4528
|
+
"name": "database_name",
|
|
4529
|
+
"options": [],
|
|
4530
|
+
"options_metadata": [],
|
|
4462
4531
|
"placeholder": "",
|
|
4463
|
-
"
|
|
4532
|
+
"real_time_refresh": true,
|
|
4533
|
+
"refresh_button": true,
|
|
4534
|
+
"required": true,
|
|
4464
4535
|
"show": true,
|
|
4465
4536
|
"title_case": false,
|
|
4537
|
+
"toggle": false,
|
|
4466
4538
|
"tool_mode": false,
|
|
4467
4539
|
"trace_as_metadata": true,
|
|
4468
|
-
"type": "
|
|
4469
|
-
"value":
|
|
4540
|
+
"type": "str",
|
|
4541
|
+
"value": ""
|
|
4470
4542
|
},
|
|
4471
|
-
"
|
|
4472
|
-
"_input_type": "
|
|
4543
|
+
"deletion_field": {
|
|
4544
|
+
"_input_type": "StrInput",
|
|
4473
4545
|
"advanced": true,
|
|
4474
|
-
"display_name": "
|
|
4546
|
+
"display_name": "Deletion Based On Field",
|
|
4475
4547
|
"dynamic": false,
|
|
4476
|
-
"info": "
|
|
4548
|
+
"info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.",
|
|
4477
4549
|
"list": false,
|
|
4478
4550
|
"list_add_label": "Add More",
|
|
4479
|
-
"
|
|
4551
|
+
"load_from_db": false,
|
|
4552
|
+
"name": "deletion_field",
|
|
4480
4553
|
"placeholder": "",
|
|
4481
4554
|
"required": false,
|
|
4482
4555
|
"show": true,
|
|
4483
4556
|
"title_case": false,
|
|
4484
4557
|
"tool_mode": false,
|
|
4485
4558
|
"trace_as_metadata": true,
|
|
4486
|
-
"type": "
|
|
4487
|
-
"value":
|
|
4559
|
+
"type": "str",
|
|
4560
|
+
"value": ""
|
|
4488
4561
|
},
|
|
4489
|
-
"
|
|
4490
|
-
"_input_type": "
|
|
4562
|
+
"embedding_model": {
|
|
4563
|
+
"_input_type": "HandleInput",
|
|
4491
4564
|
"advanced": false,
|
|
4492
|
-
"display_name": "
|
|
4565
|
+
"display_name": "Embedding Model",
|
|
4493
4566
|
"dynamic": false,
|
|
4494
|
-
"
|
|
4495
|
-
|
|
4496
|
-
"
|
|
4497
|
-
"mdx",
|
|
4498
|
-
"csv",
|
|
4499
|
-
"json",
|
|
4500
|
-
"yaml",
|
|
4501
|
-
"yml",
|
|
4502
|
-
"xml",
|
|
4503
|
-
"html",
|
|
4504
|
-
"htm",
|
|
4505
|
-
"pdf",
|
|
4506
|
-
"docx",
|
|
4507
|
-
"py",
|
|
4508
|
-
"sh",
|
|
4509
|
-
"sql",
|
|
4510
|
-
"js",
|
|
4511
|
-
"ts",
|
|
4512
|
-
"tsx",
|
|
4513
|
-
"zip",
|
|
4514
|
-
"tar",
|
|
4515
|
-
"tgz",
|
|
4516
|
-
"bz2",
|
|
4517
|
-
"gz"
|
|
4567
|
+
"info": "Specify the Embedding Model. Not required for Astra Vectorize collections.",
|
|
4568
|
+
"input_types": [
|
|
4569
|
+
"Embeddings"
|
|
4518
4570
|
],
|
|
4519
|
-
"
|
|
4520
|
-
"info": "Supported file extensions: txt, md, mdx, csv, json, yaml, yml, xml, html, htm, pdf, docx, py, sh, sql, js, ts, tsx; optionally bundled in file extensions: zip, tar, tgz, bz2, gz",
|
|
4521
|
-
"list": true,
|
|
4571
|
+
"list": false,
|
|
4522
4572
|
"list_add_label": "Add More",
|
|
4523
|
-
"name": "
|
|
4573
|
+
"name": "embedding_model",
|
|
4524
4574
|
"placeholder": "",
|
|
4525
|
-
"real_time_refresh": true,
|
|
4526
4575
|
"required": false,
|
|
4527
|
-
"show":
|
|
4528
|
-
"temp_file": false,
|
|
4576
|
+
"show": false,
|
|
4529
4577
|
"title_case": false,
|
|
4530
4578
|
"trace_as_metadata": true,
|
|
4531
|
-
"type": "
|
|
4579
|
+
"type": "other",
|
|
4532
4580
|
"value": ""
|
|
4533
4581
|
},
|
|
4534
|
-
"
|
|
4535
|
-
"_input_type": "
|
|
4582
|
+
"environment": {
|
|
4583
|
+
"_input_type": "DropdownInput",
|
|
4536
4584
|
"advanced": true,
|
|
4537
|
-
"
|
|
4585
|
+
"combobox": true,
|
|
4586
|
+
"dialog_inputs": {},
|
|
4587
|
+
"display_name": "Environment",
|
|
4538
4588
|
"dynamic": false,
|
|
4539
|
-
"info": "
|
|
4540
|
-
"
|
|
4541
|
-
"
|
|
4542
|
-
|
|
4543
|
-
|
|
4589
|
+
"info": "The environment for the Astra DB API Endpoint.",
|
|
4590
|
+
"name": "environment",
|
|
4591
|
+
"options": [
|
|
4592
|
+
"prod",
|
|
4593
|
+
"test",
|
|
4594
|
+
"dev"
|
|
4595
|
+
],
|
|
4596
|
+
"options_metadata": [],
|
|
4544
4597
|
"placeholder": "",
|
|
4598
|
+
"real_time_refresh": true,
|
|
4545
4599
|
"required": false,
|
|
4546
4600
|
"show": true,
|
|
4547
4601
|
"title_case": false,
|
|
4602
|
+
"toggle": false,
|
|
4548
4603
|
"tool_mode": false,
|
|
4549
4604
|
"trace_as_metadata": true,
|
|
4550
4605
|
"type": "str",
|
|
4551
|
-
"value": "
|
|
4606
|
+
"value": "prod"
|
|
4552
4607
|
},
|
|
4553
|
-
"
|
|
4608
|
+
"ignore_invalid_documents": {
|
|
4554
4609
|
"_input_type": "BoolInput",
|
|
4555
4610
|
"advanced": true,
|
|
4556
|
-
"display_name": "
|
|
4611
|
+
"display_name": "Ignore Invalid Documents",
|
|
4557
4612
|
"dynamic": false,
|
|
4558
|
-
"info": "
|
|
4613
|
+
"info": "Boolean flag to determine whether to ignore invalid documents at runtime.",
|
|
4559
4614
|
"list": false,
|
|
4560
4615
|
"list_add_label": "Add More",
|
|
4561
|
-
"name": "
|
|
4616
|
+
"name": "ignore_invalid_documents",
|
|
4562
4617
|
"placeholder": "",
|
|
4563
4618
|
"required": false,
|
|
4564
4619
|
"show": true,
|
|
@@ -4568,232 +4623,131 @@
|
|
|
4568
4623
|
"type": "bool",
|
|
4569
4624
|
"value": false
|
|
4570
4625
|
},
|
|
4571
|
-
"
|
|
4572
|
-
"_input_type": "
|
|
4573
|
-
"advanced": true,
|
|
4574
|
-
"display_name": "[Deprecated] Use Multithreading",
|
|
4575
|
-
"dynamic": false,
|
|
4576
|
-
"info": "Set 'Processing Concurrency' greater than 1 to enable multithreading.",
|
|
4577
|
-
"list": false,
|
|
4578
|
-
"list_add_label": "Add More",
|
|
4579
|
-
"name": "use_multithreading",
|
|
4580
|
-
"placeholder": "",
|
|
4581
|
-
"required": false,
|
|
4582
|
-
"show": true,
|
|
4583
|
-
"title_case": false,
|
|
4584
|
-
"tool_mode": false,
|
|
4585
|
-
"trace_as_metadata": true,
|
|
4586
|
-
"type": "bool",
|
|
4587
|
-
"value": true
|
|
4588
|
-
}
|
|
4589
|
-
},
|
|
4590
|
-
"tool_mode": false
|
|
4591
|
-
},
|
|
4592
|
-
"showNode": true,
|
|
4593
|
-
"type": "File"
|
|
4594
|
-
},
|
|
4595
|
-
"dragging": false,
|
|
4596
|
-
"id": "File-vusZ2",
|
|
4597
|
-
"measured": {
|
|
4598
|
-
"height": 230,
|
|
4599
|
-
"width": 320
|
|
4600
|
-
},
|
|
4601
|
-
"position": {
|
|
4602
|
-
"x": 1330.7650978046952,
|
|
4603
|
-
"y": 1431.5905495627503
|
|
4604
|
-
},
|
|
4605
|
-
"selected": false,
|
|
4606
|
-
"type": "genericNode"
|
|
4607
|
-
},
|
|
4608
|
-
{
|
|
4609
|
-
"data": {
|
|
4610
|
-
"id": "LanguageModelComponent-1uhUK",
|
|
4611
|
-
"node": {
|
|
4612
|
-
"base_classes": [
|
|
4613
|
-
"LanguageModel",
|
|
4614
|
-
"Message"
|
|
4615
|
-
],
|
|
4616
|
-
"beta": false,
|
|
4617
|
-
"conditional_paths": [],
|
|
4618
|
-
"custom_fields": {},
|
|
4619
|
-
"description": "Runs a language model given a specified provider. ",
|
|
4620
|
-
"display_name": "Language Model",
|
|
4621
|
-
"documentation": "",
|
|
4622
|
-
"edited": false,
|
|
4623
|
-
"field_order": [
|
|
4624
|
-
"provider",
|
|
4625
|
-
"model_name",
|
|
4626
|
-
"api_key",
|
|
4627
|
-
"input_value",
|
|
4628
|
-
"system_message",
|
|
4629
|
-
"stream",
|
|
4630
|
-
"temperature"
|
|
4631
|
-
],
|
|
4632
|
-
"frozen": false,
|
|
4633
|
-
"icon": "brain-circuit",
|
|
4634
|
-
"legacy": false,
|
|
4635
|
-
"metadata": {
|
|
4636
|
-
"keywords": [
|
|
4637
|
-
"model",
|
|
4638
|
-
"llm",
|
|
4639
|
-
"language model",
|
|
4640
|
-
"large language model"
|
|
4641
|
-
]
|
|
4642
|
-
},
|
|
4643
|
-
"minimized": false,
|
|
4644
|
-
"output_types": [],
|
|
4645
|
-
"outputs": [
|
|
4646
|
-
{
|
|
4647
|
-
"allows_loop": false,
|
|
4648
|
-
"cache": true,
|
|
4649
|
-
"display_name": "Model Response",
|
|
4650
|
-
"group_outputs": false,
|
|
4651
|
-
"method": "text_response",
|
|
4652
|
-
"name": "text_output",
|
|
4653
|
-
"options": null,
|
|
4654
|
-
"required_inputs": null,
|
|
4655
|
-
"selected": "Message",
|
|
4656
|
-
"tool_mode": true,
|
|
4657
|
-
"types": [
|
|
4658
|
-
"Message"
|
|
4659
|
-
],
|
|
4660
|
-
"value": "__UNDEFINED__"
|
|
4661
|
-
},
|
|
4662
|
-
{
|
|
4663
|
-
"allows_loop": false,
|
|
4664
|
-
"cache": true,
|
|
4665
|
-
"display_name": "Language Model",
|
|
4666
|
-
"group_outputs": false,
|
|
4667
|
-
"method": "build_model",
|
|
4668
|
-
"name": "model_output",
|
|
4669
|
-
"options": null,
|
|
4670
|
-
"required_inputs": null,
|
|
4671
|
-
"selected": "LanguageModel",
|
|
4672
|
-
"tool_mode": true,
|
|
4673
|
-
"types": [
|
|
4674
|
-
"LanguageModel"
|
|
4675
|
-
],
|
|
4676
|
-
"value": "__UNDEFINED__"
|
|
4677
|
-
}
|
|
4678
|
-
],
|
|
4679
|
-
"pinned": false,
|
|
4680
|
-
"priority": 0,
|
|
4681
|
-
"template": {
|
|
4682
|
-
"_type": "Component",
|
|
4683
|
-
"api_key": {
|
|
4684
|
-
"_input_type": "SecretStrInput",
|
|
4626
|
+
"ingest_data": {
|
|
4627
|
+
"_input_type": "HandleInput",
|
|
4685
4628
|
"advanced": false,
|
|
4686
|
-
"display_name": "
|
|
4629
|
+
"display_name": "Ingest Data",
|
|
4687
4630
|
"dynamic": false,
|
|
4688
|
-
"info": "
|
|
4689
|
-
"input_types": [
|
|
4690
|
-
|
|
4691
|
-
|
|
4692
|
-
|
|
4631
|
+
"info": "",
|
|
4632
|
+
"input_types": [
|
|
4633
|
+
"Data",
|
|
4634
|
+
"DataFrame"
|
|
4635
|
+
],
|
|
4636
|
+
"list": true,
|
|
4637
|
+
"list_add_label": "Add More",
|
|
4638
|
+
"name": "ingest_data",
|
|
4693
4639
|
"placeholder": "",
|
|
4694
|
-
"real_time_refresh": true,
|
|
4695
4640
|
"required": false,
|
|
4696
4641
|
"show": true,
|
|
4697
4642
|
"title_case": false,
|
|
4698
|
-
"
|
|
4699
|
-
"
|
|
4643
|
+
"trace_as_metadata": true,
|
|
4644
|
+
"type": "other",
|
|
4645
|
+
"value": ""
|
|
4700
4646
|
},
|
|
4701
|
-
"
|
|
4647
|
+
"keyspace": {
|
|
4648
|
+
"_input_type": "DropdownInput",
|
|
4702
4649
|
"advanced": true,
|
|
4703
|
-
"
|
|
4704
|
-
"
|
|
4705
|
-
"
|
|
4706
|
-
"
|
|
4707
|
-
"
|
|
4708
|
-
"
|
|
4709
|
-
"
|
|
4710
|
-
"
|
|
4711
|
-
"password": false,
|
|
4650
|
+
"combobox": false,
|
|
4651
|
+
"dialog_inputs": {},
|
|
4652
|
+
"display_name": "Keyspace",
|
|
4653
|
+
"dynamic": false,
|
|
4654
|
+
"info": "Optional keyspace within Astra DB to use for the collection.",
|
|
4655
|
+
"name": "keyspace",
|
|
4656
|
+
"options": [],
|
|
4657
|
+
"options_metadata": [],
|
|
4712
4658
|
"placeholder": "",
|
|
4713
|
-
"
|
|
4659
|
+
"real_time_refresh": true,
|
|
4660
|
+
"required": false,
|
|
4714
4661
|
"show": true,
|
|
4715
4662
|
"title_case": false,
|
|
4716
|
-
"
|
|
4717
|
-
"
|
|
4663
|
+
"toggle": false,
|
|
4664
|
+
"tool_mode": false,
|
|
4665
|
+
"trace_as_metadata": true,
|
|
4666
|
+
"type": "str",
|
|
4667
|
+
"value": ""
|
|
4718
4668
|
},
|
|
4719
|
-
"
|
|
4720
|
-
"_input_type": "
|
|
4669
|
+
"lexical_terms": {
|
|
4670
|
+
"_input_type": "QueryInput",
|
|
4721
4671
|
"advanced": false,
|
|
4722
|
-
"display_name": "
|
|
4672
|
+
"display_name": "Lexical Terms",
|
|
4723
4673
|
"dynamic": false,
|
|
4724
|
-
"info": "
|
|
4674
|
+
"info": "Add additional terms/keywords to augment search precision.",
|
|
4725
4675
|
"input_types": [
|
|
4726
4676
|
"Message"
|
|
4727
4677
|
],
|
|
4728
4678
|
"list": false,
|
|
4729
4679
|
"list_add_label": "Add More",
|
|
4730
4680
|
"load_from_db": false,
|
|
4731
|
-
"name": "
|
|
4732
|
-
"placeholder": "",
|
|
4681
|
+
"name": "lexical_terms",
|
|
4682
|
+
"placeholder": "Enter terms to search...",
|
|
4733
4683
|
"required": false,
|
|
4734
|
-
"
|
|
4684
|
+
"separator": " ",
|
|
4685
|
+
"show": false,
|
|
4735
4686
|
"title_case": false,
|
|
4736
4687
|
"tool_mode": false,
|
|
4737
4688
|
"trace_as_input": true,
|
|
4738
4689
|
"trace_as_metadata": true,
|
|
4739
|
-
"type": "
|
|
4690
|
+
"type": "query",
|
|
4740
4691
|
"value": ""
|
|
4741
4692
|
},
|
|
4742
|
-
"
|
|
4693
|
+
"number_of_results": {
|
|
4694
|
+
"_input_type": "IntInput",
|
|
4695
|
+
"advanced": true,
|
|
4696
|
+
"display_name": "Number of Search Results",
|
|
4697
|
+
"dynamic": false,
|
|
4698
|
+
"info": "Number of search results to return.",
|
|
4699
|
+
"list": false,
|
|
4700
|
+
"list_add_label": "Add More",
|
|
4701
|
+
"name": "number_of_results",
|
|
4702
|
+
"placeholder": "",
|
|
4703
|
+
"required": false,
|
|
4704
|
+
"show": true,
|
|
4705
|
+
"title_case": false,
|
|
4706
|
+
"tool_mode": false,
|
|
4707
|
+
"trace_as_metadata": true,
|
|
4708
|
+
"type": "int",
|
|
4709
|
+
"value": 4
|
|
4710
|
+
},
|
|
4711
|
+
"reranker": {
|
|
4743
4712
|
"_input_type": "DropdownInput",
|
|
4744
4713
|
"advanced": false,
|
|
4745
4714
|
"combobox": false,
|
|
4746
4715
|
"dialog_inputs": {},
|
|
4747
|
-
"display_name": "
|
|
4716
|
+
"display_name": "Reranker",
|
|
4748
4717
|
"dynamic": false,
|
|
4749
|
-
"info": "
|
|
4750
|
-
"name": "
|
|
4751
|
-
"options": [
|
|
4752
|
-
"gpt-4o-mini",
|
|
4753
|
-
"gpt-4o",
|
|
4754
|
-
"gpt-4.1",
|
|
4755
|
-
"gpt-4.1-mini",
|
|
4756
|
-
"gpt-4.1-nano",
|
|
4757
|
-
"gpt-4.5-preview",
|
|
4758
|
-
"gpt-4-turbo",
|
|
4759
|
-
"gpt-4-turbo-preview",
|
|
4760
|
-
"gpt-4",
|
|
4761
|
-
"gpt-3.5-turbo"
|
|
4762
|
-
],
|
|
4718
|
+
"info": "Post-retrieval model that re-scores results for optimal relevance ranking.",
|
|
4719
|
+
"name": "reranker",
|
|
4720
|
+
"options": [],
|
|
4763
4721
|
"options_metadata": [],
|
|
4764
4722
|
"placeholder": "",
|
|
4765
4723
|
"required": false,
|
|
4766
|
-
"show":
|
|
4724
|
+
"show": false,
|
|
4767
4725
|
"title_case": false,
|
|
4768
|
-
"toggle":
|
|
4726
|
+
"toggle": true,
|
|
4769
4727
|
"tool_mode": false,
|
|
4770
4728
|
"trace_as_metadata": true,
|
|
4771
4729
|
"type": "str",
|
|
4772
|
-
"value": "
|
|
4730
|
+
"value": ""
|
|
4773
4731
|
},
|
|
4774
|
-
"
|
|
4732
|
+
"search_method": {
|
|
4775
4733
|
"_input_type": "DropdownInput",
|
|
4776
|
-
"advanced":
|
|
4734
|
+
"advanced": true,
|
|
4777
4735
|
"combobox": false,
|
|
4778
4736
|
"dialog_inputs": {},
|
|
4779
|
-
"display_name": "
|
|
4737
|
+
"display_name": "Search Method",
|
|
4780
4738
|
"dynamic": false,
|
|
4781
|
-
"info": "
|
|
4782
|
-
"name": "
|
|
4739
|
+
"info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.",
|
|
4740
|
+
"name": "search_method",
|
|
4783
4741
|
"options": [
|
|
4784
|
-
"
|
|
4785
|
-
"
|
|
4786
|
-
"Google"
|
|
4742
|
+
"Hybrid Search",
|
|
4743
|
+
"Vector Search"
|
|
4787
4744
|
],
|
|
4788
4745
|
"options_metadata": [
|
|
4789
4746
|
{
|
|
4790
|
-
"icon": "
|
|
4791
|
-
},
|
|
4792
|
-
{
|
|
4793
|
-
"icon": "Anthropic"
|
|
4747
|
+
"icon": "SearchHybrid"
|
|
4794
4748
|
},
|
|
4795
4749
|
{
|
|
4796
|
-
"icon": "
|
|
4750
|
+
"icon": "SearchVector"
|
|
4797
4751
|
}
|
|
4798
4752
|
],
|
|
4799
4753
|
"placeholder": "",
|
|
@@ -4805,111 +4759,142 @@
|
|
|
4805
4759
|
"tool_mode": false,
|
|
4806
4760
|
"trace_as_metadata": true,
|
|
4807
4761
|
"type": "str",
|
|
4808
|
-
"value": "
|
|
4762
|
+
"value": "Vector Search"
|
|
4809
4763
|
},
|
|
4810
|
-
"
|
|
4811
|
-
"_input_type": "
|
|
4764
|
+
"search_query": {
|
|
4765
|
+
"_input_type": "QueryInput",
|
|
4766
|
+
"advanced": false,
|
|
4767
|
+
"display_name": "Search Query",
|
|
4768
|
+
"dynamic": false,
|
|
4769
|
+
"info": "Enter a query to run a similarity search.",
|
|
4770
|
+
"input_types": [
|
|
4771
|
+
"Message"
|
|
4772
|
+
],
|
|
4773
|
+
"list": false,
|
|
4774
|
+
"list_add_label": "Add More",
|
|
4775
|
+
"load_from_db": false,
|
|
4776
|
+
"name": "search_query",
|
|
4777
|
+
"placeholder": "Enter a query...",
|
|
4778
|
+
"required": false,
|
|
4779
|
+
"show": true,
|
|
4780
|
+
"title_case": false,
|
|
4781
|
+
"tool_mode": true,
|
|
4782
|
+
"trace_as_input": true,
|
|
4783
|
+
"trace_as_metadata": true,
|
|
4784
|
+
"type": "query",
|
|
4785
|
+
"value": ""
|
|
4786
|
+
},
|
|
4787
|
+
"search_score_threshold": {
|
|
4788
|
+
"_input_type": "FloatInput",
|
|
4812
4789
|
"advanced": true,
|
|
4813
|
-
"display_name": "
|
|
4790
|
+
"display_name": "Search Score Threshold",
|
|
4814
4791
|
"dynamic": false,
|
|
4815
|
-
"info": "
|
|
4792
|
+
"info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')",
|
|
4816
4793
|
"list": false,
|
|
4817
4794
|
"list_add_label": "Add More",
|
|
4818
|
-
"name": "
|
|
4795
|
+
"name": "search_score_threshold",
|
|
4819
4796
|
"placeholder": "",
|
|
4820
4797
|
"required": false,
|
|
4821
4798
|
"show": true,
|
|
4822
4799
|
"title_case": false,
|
|
4823
4800
|
"tool_mode": false,
|
|
4824
4801
|
"trace_as_metadata": true,
|
|
4825
|
-
"type": "
|
|
4826
|
-
"value":
|
|
4802
|
+
"type": "float",
|
|
4803
|
+
"value": 0
|
|
4827
4804
|
},
|
|
4828
|
-
"
|
|
4829
|
-
"_input_type": "
|
|
4805
|
+
"search_type": {
|
|
4806
|
+
"_input_type": "DropdownInput",
|
|
4830
4807
|
"advanced": true,
|
|
4831
|
-
"
|
|
4832
|
-
"
|
|
4808
|
+
"combobox": false,
|
|
4809
|
+
"dialog_inputs": {},
|
|
4810
|
+
"display_name": "Search Type",
|
|
4833
4811
|
"dynamic": false,
|
|
4834
|
-
"info": "
|
|
4835
|
-
"
|
|
4836
|
-
|
|
4812
|
+
"info": "Search type to use",
|
|
4813
|
+
"name": "search_type",
|
|
4814
|
+
"options": [
|
|
4815
|
+
"Similarity",
|
|
4816
|
+
"Similarity with score threshold",
|
|
4817
|
+
"MMR (Max Marginal Relevance)"
|
|
4837
4818
|
],
|
|
4838
|
-
"
|
|
4839
|
-
"list_add_label": "Add More",
|
|
4840
|
-
"load_from_db": false,
|
|
4841
|
-
"multiline": true,
|
|
4842
|
-
"name": "system_message",
|
|
4819
|
+
"options_metadata": [],
|
|
4843
4820
|
"placeholder": "",
|
|
4844
4821
|
"required": false,
|
|
4845
4822
|
"show": true,
|
|
4846
4823
|
"title_case": false,
|
|
4824
|
+
"toggle": false,
|
|
4847
4825
|
"tool_mode": false,
|
|
4848
|
-
"trace_as_input": true,
|
|
4849
4826
|
"trace_as_metadata": true,
|
|
4850
4827
|
"type": "str",
|
|
4851
|
-
"value": ""
|
|
4828
|
+
"value": "Similarity"
|
|
4852
4829
|
},
|
|
4853
|
-
"
|
|
4854
|
-
"_input_type": "
|
|
4830
|
+
"should_cache_vector_store": {
|
|
4831
|
+
"_input_type": "BoolInput",
|
|
4855
4832
|
"advanced": true,
|
|
4856
|
-
"display_name": "
|
|
4833
|
+
"display_name": "Cache Vector Store",
|
|
4857
4834
|
"dynamic": false,
|
|
4858
|
-
"info": "
|
|
4859
|
-
"
|
|
4860
|
-
"
|
|
4861
|
-
"
|
|
4862
|
-
"min_label_icon": "",
|
|
4863
|
-
"name": "temperature",
|
|
4835
|
+
"info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.",
|
|
4836
|
+
"list": false,
|
|
4837
|
+
"list_add_label": "Add More",
|
|
4838
|
+
"name": "should_cache_vector_store",
|
|
4864
4839
|
"placeholder": "",
|
|
4865
|
-
"range_spec": {
|
|
4866
|
-
"max": 1,
|
|
4867
|
-
"min": 0,
|
|
4868
|
-
"step": 0.01,
|
|
4869
|
-
"step_type": "float"
|
|
4870
|
-
},
|
|
4871
4840
|
"required": false,
|
|
4872
4841
|
"show": true,
|
|
4873
|
-
"slider_buttons": false,
|
|
4874
|
-
"slider_buttons_options": [],
|
|
4875
|
-
"slider_input": false,
|
|
4876
4842
|
"title_case": false,
|
|
4877
4843
|
"tool_mode": false,
|
|
4878
|
-
"
|
|
4879
|
-
"
|
|
4844
|
+
"trace_as_metadata": true,
|
|
4845
|
+
"type": "bool",
|
|
4846
|
+
"value": true
|
|
4847
|
+
},
|
|
4848
|
+
"token": {
|
|
4849
|
+
"_input_type": "SecretStrInput",
|
|
4850
|
+
"advanced": false,
|
|
4851
|
+
"display_name": "Astra DB Application Token",
|
|
4852
|
+
"dynamic": false,
|
|
4853
|
+
"info": "Authentication token for accessing Astra DB.",
|
|
4854
|
+
"input_types": [],
|
|
4855
|
+
"load_from_db": true,
|
|
4856
|
+
"name": "token",
|
|
4857
|
+
"password": true,
|
|
4858
|
+
"placeholder": "",
|
|
4859
|
+
"real_time_refresh": true,
|
|
4860
|
+
"required": true,
|
|
4861
|
+
"show": true,
|
|
4862
|
+
"title_case": false,
|
|
4863
|
+
"type": "str",
|
|
4864
|
+
"value": "ASTRA_DB_APPLICATION_TOKEN"
|
|
4880
4865
|
}
|
|
4881
4866
|
},
|
|
4882
4867
|
"tool_mode": false
|
|
4883
4868
|
},
|
|
4884
|
-
"selected_output": "
|
|
4869
|
+
"selected_output": "dataframe",
|
|
4885
4870
|
"showNode": true,
|
|
4886
|
-
"type": "
|
|
4871
|
+
"type": "AstraDB"
|
|
4887
4872
|
},
|
|
4888
4873
|
"dragging": false,
|
|
4889
|
-
"id": "
|
|
4874
|
+
"id": "AstraDB-CLCyc",
|
|
4890
4875
|
"measured": {
|
|
4891
|
-
"height":
|
|
4876
|
+
"height": 457,
|
|
4892
4877
|
"width": 320
|
|
4893
4878
|
},
|
|
4894
4879
|
"position": {
|
|
4895
|
-
"x":
|
|
4896
|
-
"y":
|
|
4880
|
+
"x": 1210.9603958451855,
|
|
4881
|
+
"y": 519.26557834351
|
|
4897
4882
|
},
|
|
4898
4883
|
"selected": false,
|
|
4899
4884
|
"type": "genericNode"
|
|
4900
4885
|
}
|
|
4901
4886
|
],
|
|
4902
4887
|
"viewport": {
|
|
4903
|
-
"x":
|
|
4904
|
-
"y": -
|
|
4905
|
-
"zoom": 0.
|
|
4888
|
+
"x": 100.76434400868948,
|
|
4889
|
+
"y": -143.93713359276438,
|
|
4890
|
+
"zoom": 0.43125741996443906
|
|
4906
4891
|
}
|
|
4907
4892
|
},
|
|
4908
4893
|
"description": "Load your data for chat context with Retrieval Augmented Generation.",
|
|
4909
4894
|
"endpoint_name": null,
|
|
4910
|
-
"id": "
|
|
4895
|
+
"id": "e4776e4e-d3cf-42f3-9160-981f4b18325a",
|
|
4911
4896
|
"is_component": false,
|
|
4912
|
-
"last_tested_version": "1.
|
|
4897
|
+
"last_tested_version": "1.6.0",
|
|
4913
4898
|
"name": "Vector Store RAG",
|
|
4914
4899
|
"tags": [
|
|
4915
4900
|
"openai",
|