langflow-base-nightly 0.5.1.dev0__py3-none-any.whl → 0.5.1.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. langflow/api/v1/endpoints.py +10 -7
  2. langflow/api/v1/schemas.py +5 -2
  3. langflow/base/knowledge_bases/__init__.py +0 -0
  4. langflow/components/agents/agent.py +1 -0
  5. langflow/components/data/__init__.py +0 -4
  6. langflow/components/datastax/__init__.py +2 -2
  7. langflow/components/knowledge_bases/__init__.py +34 -0
  8. langflow/components/{data/kb_ingest.py → knowledge_bases/ingestion.py} +23 -15
  9. langflow/components/{data/kb_retrieval.py → knowledge_bases/retrieval.py} +26 -22
  10. langflow/components/processing/dataframe_operations.py +12 -1
  11. langflow/frontend/assets/{SlackIcon-Cr3Q15Px.js → SlackIcon-Cc7Qnzki.js} +1 -1
  12. langflow/frontend/assets/{Wikipedia-GxM5sPdM.js → Wikipedia-7ulMZY46.js} +1 -1
  13. langflow/frontend/assets/{Wolfram-BN3-VOCA.js → Wolfram-By9PGsHS.js} +1 -1
  14. langflow/frontend/assets/{index-Kwdl-e29.js → index--e0oQqZh.js} +1 -1
  15. langflow/frontend/assets/{index-CD-PqGCY.js → index-3jlSQi5Y.js} +1 -1
  16. langflow/frontend/assets/{index-DQ7VYqQc.js → index-4JIEdyIM.js} +1 -1
  17. langflow/frontend/assets/{index-C9Cxnkl8.js → index-5-CSw2-z.js} +1 -1
  18. langflow/frontend/assets/{index-DUpri6zF.js → index-7yAHPRxv.js} +1 -1
  19. langflow/frontend/assets/{index-Dl5amdBz.js → index-9FL5xjkL.js} +1 -1
  20. langflow/frontend/assets/{index-X0guhYF8.js → index-AALDfCyt.js} +1 -1
  21. langflow/frontend/assets/{index-BLTxEeTi.js → index-AKVkmT4S.js} +1 -1
  22. langflow/frontend/assets/{index-CRcMqCIj.js → index-B3GvPjhD.js} +1 -1
  23. langflow/frontend/assets/{index-D14EWPyZ.js → index-B5LHnuQR.js} +1 -1
  24. langflow/frontend/assets/{index-C3yvArUT.js → index-BAn-AzCS.js} +1 -1
  25. langflow/frontend/assets/{index-CRPKJZw9.js → index-BCXhKCOK.js} +1 -1
  26. langflow/frontend/assets/{index-CuFXdTx4.js → index-BGt6jQ4x.js} +1 -1
  27. langflow/frontend/assets/{index-AWCSdofD.js → index-BH7AyHxp.js} +1 -1
  28. langflow/frontend/assets/{index-CBc8fEAE.js → index-BISPW-f6.js} +1 -1
  29. langflow/frontend/assets/{index-Bf0IYKLd.js → index-BIqEYjNT.js} +1 -1
  30. langflow/frontend/assets/{index-DiGWASY5.js → index-BLEWsL1U.js} +1 -1
  31. langflow/frontend/assets/{index-D-KY3kkq.js → index-BLXN681C.js} +1 -1
  32. langflow/frontend/assets/{index-C-Xfg4cD.js → index-BMpKFGhI.js} +1 -1
  33. langflow/frontend/assets/{index-BVwJDmw-.js → index-BMvp94tO.js} +1 -1
  34. langflow/frontend/assets/{index-3wW7BClE.js → index-BSwBVwyF.js} +1 -1
  35. langflow/frontend/assets/{index-Cewy7JZE.js → index-BWFIrwW1.js} +1 -1
  36. langflow/frontend/assets/{index-CiixOzDG.js → index-BWnKMRFJ.js} +1 -1
  37. langflow/frontend/assets/{index-BZcw4827.js → index-BX_asvRB.js} +1 -1
  38. langflow/frontend/assets/{index-CiR1dxI4.js → index-BZ-A4K98.js} +1 -1
  39. langflow/frontend/assets/{index-CpzXS6md.js → index-BZSa2qz7.js} +1 -1
  40. langflow/frontend/assets/{index-CdIf07Rw.js → index-B_kBTgxV.js} +1 -1
  41. langflow/frontend/assets/{index-ClsuDmR6.js → index-BdjfHsrf.js} +1 -1
  42. langflow/frontend/assets/{index-hbndqB9B.js → index-Bhcv5M0n.js} +1 -1
  43. langflow/frontend/assets/{index-dJWNxIRH.js → index-BhqVw9WQ.js} +1 -1
  44. langflow/frontend/assets/{index-Tw3Os-DN.js → index-Bl7RpmrB.js} +1 -1
  45. langflow/frontend/assets/{index-C-EdnFdA.js → index-BlDsBQ_1.js} +1 -1
  46. langflow/frontend/assets/{index-z3SRY-mX.js → index-Bm9i8F4W.js} +1 -1
  47. langflow/frontend/assets/{index-CMZ79X-Y.js → index-BnCnYnao.js} +1 -1
  48. langflow/frontend/assets/{index-DXRfN4HV.js → index-BrDz-PxE.js} +1 -1
  49. langflow/frontend/assets/{index-CVWQfRYZ.js → index-BsdLyYMY.js} +1 -1
  50. langflow/frontend/assets/{index-BTKOU4xC.js → index-BusCv3bR.js} +1 -1
  51. langflow/frontend/assets/{index-D2N3l-cw.js → index-BvRIG6P5.js} +1 -1
  52. langflow/frontend/assets/{index-DpVWih90.js → index-Bw-TIIC6.js} +1 -1
  53. langflow/frontend/assets/{index-D-1tA8Dt.js → index-ByxGmq5p.js} +1 -1
  54. langflow/frontend/assets/{index-BWYuQ2Sj.js → index-C-2MRYoJ.js} +1 -1
  55. langflow/frontend/assets/{index-CZqRL9DE.js → index-C-bjC2sz.js} +1 -1
  56. langflow/frontend/assets/{index-o0D2S7xW.js → index-C-wnbBBY.js} +1 -1
  57. langflow/frontend/assets/{index-D-_B1a8v.js → index-C51yNvIL.js} +1 -1
  58. langflow/frontend/assets/{index-DJP-ss47.js → index-C676MS3I.js} +1 -1
  59. langflow/frontend/assets/{index-lZX9AvZW.js → index-C6nzdeYx.js} +1 -1
  60. langflow/frontend/assets/{index-6pyH3ZJB.js → index-C8pI0lzi.js} +1 -1
  61. langflow/frontend/assets/{index-ovFJ_0J6.js → index-CDphUsa3.js} +1 -1
  62. langflow/frontend/assets/{index-J38wh62w.js → index-CF4_Og1m.js} +1 -1
  63. langflow/frontend/assets/{index-C1f2wMat.js → index-CJ5A6STv.js} +1 -1
  64. langflow/frontend/assets/{index-C3KequvP.js → index-CKPZpkQk.js} +1 -1
  65. langflow/frontend/assets/{index-BiKKN6FR.js → index-CLcaktde.js} +1 -1
  66. langflow/frontend/assets/{index-28oOcafk.js → index-CNh0rwur.js} +1 -1
  67. langflow/frontend/assets/{index-CGO1CiUr.js → index-COoTCxvs.js} +1 -1
  68. langflow/frontend/assets/{index-BC65VuWx.js → index-CPiM2oyj.js} +1 -1
  69. langflow/frontend/assets/{index-BWdLILDG.js → index-CQQ-4XMS.js} +1 -1
  70. langflow/frontend/assets/{index-pYJJOcma.js → index-CU16NJD7.js} +1 -1
  71. langflow/frontend/assets/{index-h_aSZHf3.js → index-CUzlcce2.js} +1 -1
  72. langflow/frontend/assets/{index-BrJV8psX.js → index-CVkIdc6y.js} +1 -1
  73. langflow/frontend/assets/{index-lTpteg8t.js → index-C_157Mb-.js} +1 -1
  74. langflow/frontend/assets/{index-Cyd2HtHK.js → index-C_MhBX6R.js} +1 -1
  75. langflow/frontend/assets/{index-DrDrcajG.js → index-C_NwzK6j.js} +1 -1
  76. langflow/frontend/assets/{index-DlD4dXlZ.js → index-Ca1b7Iag.js} +1 -1
  77. langflow/frontend/assets/{index-BS8Vo8nc.js → index-Cb5G9Ifd.js} +1 -1
  78. langflow/frontend/assets/{index-CH5UVA9b.js → index-CeswGUz3.js} +1 -1
  79. langflow/frontend/assets/{index-BCDSei1q.js → index-ChsGhZn3.js} +1 -1
  80. langflow/frontend/assets/{index-DiB3CTo8.js → index-CiklyQU3.js} +1 -1
  81. langflow/frontend/assets/{index-dcJ8-agu.js → index-Co__gFM1.js} +1 -1
  82. langflow/frontend/assets/{index-eo2mAtL-.js → index-Coi86oqP.js} +1 -1
  83. langflow/frontend/assets/{index-CBvrGgID.js → index-Cu2Xr6_j.js} +1 -1
  84. langflow/frontend/assets/{index-2wSXqBtB.js → index-Cu7vC48Y.js} +1 -1
  85. langflow/frontend/assets/{index-Bbi87Ve4.js → index-CvSoff-8.js} +1 -1
  86. langflow/frontend/assets/{index-DA6-bvgN.js → index-Cw0UComa.js} +1 -1
  87. langflow/frontend/assets/{index-CWPzZtSx.js → index-D-SnFlhU.js} +1 -1
  88. langflow/frontend/assets/{index-DHgomBdh.js → index-D-WStJI6.js} +1 -1
  89. langflow/frontend/assets/{index-BpxbUiZD.js → index-D234yKNJ.js} +186 -186
  90. langflow/frontend/assets/{index-BkXec1Yf.js → index-D5c2nNvp.js} +1 -1
  91. langflow/frontend/assets/{index-DwQEZe3C.js → index-DFY8YFbC.js} +1 -1
  92. langflow/frontend/assets/{index-Bief6eyJ.js → index-DJ6HD14g.js} +1 -1
  93. langflow/frontend/assets/{index-Bx7dBY26.js → index-DMCerPJM.js} +1 -1
  94. langflow/frontend/assets/{index-DDWBeudF.js → index-DOj_QWqG.js} +1 -1
  95. langflow/frontend/assets/{index-CmEYYRN1.js → index-DP1oE6QB.js} +1 -1
  96. langflow/frontend/assets/{index-C3ZjKdCD.js → index-DTCrijba.js} +1 -1
  97. langflow/frontend/assets/{index-CtVIONP2.js → index-DVLIDc2_.js} +1 -1
  98. langflow/frontend/assets/{index-DyJFTK24.js → index-DX7JcSMz.js} +1 -1
  99. langflow/frontend/assets/{index-DmeiHnfl.js → index-DZVgPCio.js} +1 -1
  100. langflow/frontend/assets/{index-BwLWcUXL.js → index-DbfS_UH-.js} +1 -1
  101. langflow/frontend/assets/{index-DV3utZDZ.js → index-DcApTyZ7.js} +1 -1
  102. langflow/frontend/assets/{index-DDcMAaG4.js → index-Deu8rlaZ.js} +1 -1
  103. langflow/frontend/assets/{index-iJngutFo.js → index-Df6psZEj.js} +1 -1
  104. langflow/frontend/assets/{index-CRPyCfYy.js → index-DiblXWmk.js} +1 -1
  105. langflow/frontend/assets/{index-CMzfJKiW.js → index-DjQElpEg.js} +1 -1
  106. langflow/frontend/assets/{index-Dmu-X5-4.js → index-DmVt5Jlx.js} +1 -1
  107. langflow/frontend/assets/{index-CPHEscq9.js → index-DmYLDQag.js} +1 -1
  108. langflow/frontend/assets/{index-BKseQQ2I.js → index-DnlVWWU8.js} +1 -1
  109. langflow/frontend/assets/{index-D5ETnvJa.js → index-Dp7ZQyL3.js} +1 -1
  110. langflow/frontend/assets/{index-Co20d-eQ.js → index-DpWrk8mA.js} +1 -1
  111. langflow/frontend/assets/{index-CVl6MbaM.js → index-DrXXKzpD.js} +1 -1
  112. langflow/frontend/assets/{index-OwPvCmpW.js → index-Drg8me2a.js} +1 -1
  113. langflow/frontend/assets/{index-CVwWoX99.js → index-DsEZjOcp.js} +1 -1
  114. langflow/frontend/assets/{index-DwPkMTaY.js → index-DznH7Jbq.js} +1 -1
  115. langflow/frontend/assets/{index-CNw1H-Wc.js → index-GzOGB_fo.js} +1 -1
  116. langflow/frontend/assets/{index-C3l0zYn0.js → index-MVW4HTEk.js} +1 -1
  117. langflow/frontend/assets/{index-DhtZ5hx8.js → index-OsUvqIUr.js} +1 -1
  118. langflow/frontend/assets/{index-B2ptVQGM.js → index-RH_I78z_.js} +1 -1
  119. langflow/frontend/assets/{index-DdtMEn6I.js → index-RjeC0kaX.js} +1 -1
  120. langflow/frontend/assets/{index-hG24k5xJ.js → index-S-sc0Cm9.js} +1 -1
  121. langflow/frontend/assets/{index-Bg5nrMRh.js → index-S8uJXTOq.js} +1 -1
  122. langflow/frontend/assets/{index-m8QA6VNM.js → index-SB4rw8D5.js} +1 -1
  123. langflow/frontend/assets/{index-Du-pc0KE.js → index-YJsAl7vm.js} +1 -1
  124. langflow/frontend/assets/{index-DfDhMHgQ.js → index-ZjeocHyu.js} +1 -1
  125. langflow/frontend/assets/{index-Bnl6QHtP.js → index-_szO7sta.js} +1 -1
  126. langflow/frontend/assets/{index-xvFOmxx4.js → index-aAgSKWb3.js} +1 -1
  127. langflow/frontend/assets/{index-Db9dYSzy.js → index-aWnZIwHd.js} +1 -1
  128. langflow/frontend/assets/{index-BJy50PvP.js → index-bMhyLtgS.js} +1 -1
  129. langflow/frontend/assets/{index-Cqpzl1J4.js → index-cYFKmtmg.js} +1 -1
  130. langflow/frontend/assets/{index-CLJeJYjH.js → index-hg2y9OAt.js} +1 -1
  131. langflow/frontend/assets/{index-D7kquVv2.js → index-jwzN3Jd_.js} +1 -1
  132. langflow/frontend/assets/{index-BiC280Nx.js → index-k9jP5chN.js} +1 -1
  133. langflow/frontend/assets/{index-B3TANVes.js → index-lnF9Eqr2.js} +1 -1
  134. langflow/frontend/assets/{index-B4yCvZKV.js → index-mjwtJmkP.js} +1 -1
  135. langflow/frontend/assets/{index-CfwLpbMM.js → index-nw3WF9lY.js} +1 -1
  136. langflow/frontend/assets/{index-CUVDws8F.js → index-qiVTWUuf.js} +1 -1
  137. langflow/frontend/assets/{index-DTqbvGC0.js → index-uybez8MR.js} +1 -1
  138. langflow/frontend/assets/{index-Dfe7qfvf.js → index-v8eXbWlM.js} +1 -1
  139. langflow/frontend/assets/{index-B2Zgv_xv.js → index-xN8ogFdo.js} +1 -1
  140. langflow/frontend/assets/{index-BRg1f4Mu.js → index-xV6ystWy.js} +1 -1
  141. langflow/frontend/assets/{index-sI75DsdM.js → index-yyAaYjLR.js} +1 -1
  142. langflow/frontend/assets/lazyIconImports-Ci-S9xBA.js +2 -0
  143. langflow/frontend/assets/{use-post-add-user-C0MdTpQ5.js → use-post-add-user-JUeLDErC.js} +1 -1
  144. langflow/frontend/index.html +1 -1
  145. langflow/initial_setup/starter_projects/Hybrid Search RAG.json +1280 -1276
  146. langflow/initial_setup/starter_projects/Instagram Copywriter.json +1 -1
  147. langflow/initial_setup/starter_projects/Invoice Summarizer.json +1 -1
  148. langflow/initial_setup/starter_projects/Knowledge Ingestion.json +46 -47
  149. langflow/initial_setup/starter_projects/Knowledge Retrieval.json +73 -56
  150. langflow/initial_setup/starter_projects/Market Research.json +1 -1
  151. langflow/initial_setup/starter_projects/News Aggregator.json +1 -1
  152. langflow/initial_setup/starter_projects/Nvidia Remix.json +1 -1
  153. langflow/initial_setup/starter_projects/Pok/303/251dex Agent.json" +1 -1
  154. langflow/initial_setup/starter_projects/Price Deal Finder.json +1 -1
  155. langflow/initial_setup/starter_projects/Research Agent.json +1 -1
  156. langflow/initial_setup/starter_projects/SaaS Pricing.json +1 -1
  157. langflow/initial_setup/starter_projects/Search agent.json +1 -1
  158. langflow/initial_setup/starter_projects/Sequential Tasks Agents.json +3 -3
  159. langflow/initial_setup/starter_projects/Simple Agent.json +1 -1
  160. langflow/initial_setup/starter_projects/Social Media Agent.json +1 -1
  161. langflow/initial_setup/starter_projects/Travel Planning Agents.json +3 -3
  162. langflow/initial_setup/starter_projects/Vector Store RAG.json +1093 -1108
  163. langflow/initial_setup/starter_projects/Youtube Analysis.json +1 -1
  164. langflow/services/auth/utils.py +78 -1
  165. langflow/services/settings/auth.py +4 -0
  166. {langflow_base_nightly-0.5.1.dev0.dist-info → langflow_base_nightly-0.5.1.dev2.dist-info}/METADATA +1 -1
  167. {langflow_base_nightly-0.5.1.dev0.dist-info → langflow_base_nightly-0.5.1.dev2.dist-info}/RECORD +171 -169
  168. langflow/frontend/assets/lazyIconImports-D97HEZkE.js +0 -2
  169. /langflow/base/{data/kb_utils.py → knowledge_bases/knowledge_base_utils.py} +0 -0
  170. /langflow/components/datastax/{astradb.py → astradb_vectorstore.py} +0 -0
  171. {langflow_base_nightly-0.5.1.dev0.dist-info → langflow_base_nightly-0.5.1.dev2.dist-info}/WHEEL +0 -0
  172. {langflow_base_nightly-0.5.1.dev0.dist-info → langflow_base_nightly-0.5.1.dev2.dist-info}/entry_points.txt +0 -0
@@ -7,7 +7,7 @@
7
7
  "data": {
8
8
  "sourceHandle": {
9
9
  "dataType": "URLComponent",
10
- "id": "URLComponent-6JEUC",
10
+ "id": "URLComponent-WBxJx",
11
11
  "name": "page_results",
12
12
  "output_types": [
13
13
  "DataFrame"
@@ -15,7 +15,7 @@
15
15
  },
16
16
  "targetHandle": {
17
17
  "fieldName": "data_inputs",
18
- "id": "SplitText-gvHe2",
18
+ "id": "SplitText-edq28",
19
19
  "inputTypes": [
20
20
  "Data",
21
21
  "DataFrame",
@@ -24,20 +24,19 @@
24
24
  "type": "other"
25
25
  }
26
26
  },
27
- "id": "reactflow__edge-URLComponent-6JEUC{œdataTypeœ:œURLComponentœ,œidœ:œURLComponent-6JEUCœ,œnameœ:œpage_resultsœ,œoutput_typesœ:[œDataFrameœ]}-SplitText-gvHe2{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-gvHe2œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
27
+ "id": "reactflow__edge-URLComponent-WBxJx{œdataTypeœ:œURLComponentœ,œidœ:œURLComponent-WBxJxœ,œnameœ:œpage_resultsœ,œoutput_typesœ:[œDataFrameœ]}-SplitText-edq28{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-edq28œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
28
28
  "selected": false,
29
- "source": "URLComponent-6JEUC",
30
- "sourceHandle": "{œdataTypeœ: œURLComponentœ, œidœ: œURLComponent-6JEUCœ, œnameœ: œpage_resultsœ, œoutput_typesœ: [œDataFrameœ]}",
31
- "target": "SplitText-gvHe2",
32
- "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-gvHe2œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
29
+ "source": "URLComponent-WBxJx",
30
+ "sourceHandle": "{œdataTypeœ: œURLComponentœ, œidœ: œURLComponent-WBxJxœ, œnameœ: œpage_resultsœ, œoutput_typesœ: [œDataFrameœ]}",
31
+ "target": "SplitText-edq28",
32
+ "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-edq28œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
33
33
  },
34
34
  {
35
- "animated": false,
36
35
  "className": "",
37
36
  "data": {
38
37
  "sourceHandle": {
39
38
  "dataType": "SplitText",
40
- "id": "SplitText-gvHe2",
39
+ "id": "SplitText-edq28",
41
40
  "name": "dataframe",
42
41
  "output_types": [
43
42
  "DataFrame"
@@ -45,25 +44,25 @@
45
44
  },
46
45
  "targetHandle": {
47
46
  "fieldName": "input_df",
48
- "id": "KBIngestion-jj5iW",
47
+ "id": "KnowledgeIngestion-uSOy6",
49
48
  "inputTypes": [
49
+ "Data",
50
50
  "DataFrame"
51
51
  ],
52
52
  "type": "other"
53
53
  }
54
54
  },
55
- "id": "xy-edge__SplitText-gvHe2{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-gvHe2œ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-KBIngestion-jj5iW{œfieldNameœ:œinput_dfœ,œidœ:œKBIngestion-jj5iWœ,œinputTypesœ:[œDataFrameœ],œtypeœ:œotherœ}",
56
- "selected": false,
57
- "source": "SplitText-gvHe2",
58
- "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-gvHe2œ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}",
59
- "target": "KBIngestion-jj5iW",
60
- "targetHandle": "{œfieldNameœ: œinput_dfœ, œidœ: œKBIngestion-jj5iWœ, œinputTypesœ: [œDataFrameœ], œtypeœ: œotherœ}"
55
+ "id": "xy-edge__SplitText-edq28{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-edq28œ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-KnowledgeIngestion-uSOy6{œfieldNameœ:œinput_dfœ,œidœ:œKnowledgeIngestion-uSOy6œ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}",
56
+ "source": "SplitText-edq28",
57
+ "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-edq28œ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}",
58
+ "target": "KnowledgeIngestion-uSOy6",
59
+ "targetHandle": "{œfieldNameœ: œinput_dfœ, œidœ: œKnowledgeIngestion-uSOy6œ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}"
61
60
  }
62
61
  ],
63
62
  "nodes": [
64
63
  {
65
64
  "data": {
66
- "id": "SplitText-gvHe2",
65
+ "id": "SplitText-edq28",
67
66
  "node": {
68
67
  "base_classes": [
69
68
  "DataFrame"
@@ -280,7 +279,7 @@
280
279
  "type": "SplitText"
281
280
  },
282
281
  "dragging": false,
283
- "id": "SplitText-gvHe2",
282
+ "id": "SplitText-edq28",
284
283
  "measured": {
285
284
  "height": 413,
286
285
  "width": 320
@@ -294,7 +293,7 @@
294
293
  },
295
294
  {
296
295
  "data": {
297
- "id": "note-bpWz8",
296
+ "id": "note-httIY",
298
297
  "node": {
299
298
  "description": "## Knowledge Ingestion\n\nThis flow shows the basics of the creation and ingestion of knowledge bases in Langflow. Here we use the `URL` component to dynamically fetch page data from the Langflow website, split it into chunks of 100 tokens, then ingest into a Knowledge Base.\n\n1. (Optional) Change the URL or switch to a different input data source as desired.\n2. (Optional) Adjust the Chunk Size as desired.\n3. Select or Create a new knowledge base.\n4. Ensure the column you wish to Vectorize is properly reflected in the Column Configuration table.",
300
299
  "display_name": "",
@@ -305,7 +304,7 @@
305
304
  },
306
305
  "dragging": false,
307
306
  "height": 401,
308
- "id": "note-bpWz8",
307
+ "id": "note-httIY",
309
308
  "measured": {
310
309
  "height": 401,
311
310
  "width": 388
@@ -315,13 +314,13 @@
315
314
  "y": 75.97023827444744
316
315
  },
317
316
  "resizing": false,
318
- "selected": true,
317
+ "selected": false,
319
318
  "type": "noteNode",
320
319
  "width": 388
321
320
  },
322
321
  {
323
322
  "data": {
324
- "id": "URLComponent-6JEUC",
323
+ "id": "URLComponent-WBxJx",
325
324
  "node": {
326
325
  "base_classes": [
327
326
  "DataFrame",
@@ -697,7 +696,7 @@
697
696
  "type": "URLComponent"
698
697
  },
699
698
  "dragging": false,
700
- "id": "URLComponent-6JEUC",
699
+ "id": "URLComponent-WBxJx",
701
700
  "measured": {
702
701
  "height": 292,
703
702
  "width": 320
@@ -711,7 +710,7 @@
711
710
  },
712
711
  {
713
712
  "data": {
714
- "id": "KBIngestion-jj5iW",
713
+ "id": "KnowledgeIngestion-uSOy6",
715
714
  "node": {
716
715
  "base_classes": [
717
716
  "Data"
@@ -732,11 +731,11 @@
732
731
  "allow_duplicates"
733
732
  ],
734
733
  "frozen": false,
735
- "icon": "database",
736
- "last_updated": "2025-08-13T19:45:49.122Z",
734
+ "icon": "upload",
735
+ "last_updated": "2025-08-26T18:21:03.358Z",
737
736
  "legacy": false,
738
737
  "metadata": {
739
- "code_hash": "6c62063f2c09",
738
+ "code_hash": "ce9549373934",
740
739
  "dependencies": {
741
740
  "dependencies": [
742
741
  {
@@ -774,7 +773,7 @@
774
773
  ],
775
774
  "total_dependencies": 8
776
775
  },
777
- "module": "langflow.components.data.kb_ingest.KBIngestionComponent"
776
+ "module": "langflow.components.knowledge_bases.ingestion.KnowledgeIngestionComponent"
778
777
  },
779
778
  "minimized": false,
780
779
  "output_types": [],
@@ -782,10 +781,10 @@
782
781
  {
783
782
  "allows_loop": false,
784
783
  "cache": true,
785
- "display_name": "DataFrame",
784
+ "display_name": "Results",
786
785
  "group_outputs": false,
787
786
  "method": "build_kb_info",
788
- "name": "dataframe",
787
+ "name": "dataframe_output",
789
788
  "selected": "Data",
790
789
  "tool_mode": true,
791
790
  "types": [
@@ -866,7 +865,7 @@
866
865
  "show": true,
867
866
  "title_case": false,
868
867
  "type": "code",
869
- "value": "from __future__ import annotations\n\nimport asyncio\nimport contextlib\nimport hashlib\nimport json\nimport re\nimport uuid\nfrom dataclasses import asdict, dataclass, field\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import Any\n\nimport pandas as pd\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom loguru import logger\n\nfrom langflow.base.data.kb_utils import get_knowledge_bases\nfrom langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DataFrameInput, DropdownInput, IntInput, Output, SecretStrInput, StrInput, TableInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict # noqa: TC001\nfrom langflow.schema.table import EditMode\nfrom langflow.services.auth.utils import decrypt_api_key, encrypt_api_key\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom langflow.services.deps import get_settings_service, get_variable_service, session_scope\n\nHUGGINGFACE_MODEL_NAMES = [\"sentence-transformers/all-MiniLM-L6-v2\", \"sentence-transformers/all-mpnet-base-v2\"]\nCOHERE_MODEL_NAMES = [\"embed-english-v3.0\", \"embed-multilingual-v3.0\"]\n\nsettings = get_settings_service().settings\nknowledge_directory = settings.knowledge_bases_dir\nif not knowledge_directory:\n msg = \"Knowledge bases directory is not set in the settings.\"\n raise ValueError(msg)\nKNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n\n\nclass KBIngestionComponent(Component):\n \"\"\"Create or append to Langflow Knowledge from a DataFrame.\"\"\"\n\n # ------ UI metadata ---------------------------------------------------\n display_name = \"Knowledge Ingestion\"\n description = \"Create or update knowledge in Langflow.\"\n icon = \"database\"\n name = \"KBIngestion\"\n\n def __init__(self, *args, **kwargs) -> None:\n super().__init__(*args, **kwargs)\n self._cached_kb_path: Path | None = None\n\n @dataclass\n class NewKnowledgeBaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_knowledge_base\",\n \"description\": \"Create new knowledge in Langflow.\",\n \"display_name\": \"Create new knowledge\",\n \"field_order\": [\"01_new_kb_name\", \"02_embedding_model\", \"03_api_key\"],\n \"template\": {\n \"01_new_kb_name\": StrInput(\n name=\"new_kb_name\",\n display_name=\"Knowledge Name\",\n info=\"Name of the new knowledge to create.\",\n required=True,\n ),\n \"02_embedding_model\": DropdownInput(\n name=\"embedding_model\",\n display_name=\"Model Name\",\n info=\"Select the embedding model to use for this knowledge base.\",\n required=True,\n options=OPENAI_EMBEDDING_MODEL_NAMES + HUGGINGFACE_MODEL_NAMES + COHERE_MODEL_NAMES,\n options_metadata=[{\"icon\": \"OpenAI\"} for _ in OPENAI_EMBEDDING_MODEL_NAMES]\n + [{\"icon\": \"HuggingFace\"} for _ in HUGGINGFACE_MODEL_NAMES]\n + [{\"icon\": \"Cohere\"} for _ in COHERE_MODEL_NAMES],\n ),\n \"03_api_key\": SecretStrInput(\n name=\"api_key\",\n display_name=\"API Key\",\n info=\"Provider API key for embedding model\",\n required=True,\n load_from_db=False,\n ),\n },\n },\n }\n }\n )\n\n # ------ Inputs --------------------------------------------------------\n inputs = [\n DropdownInput(\n name=\"knowledge_base\",\n display_name=\"Knowledge\",\n info=\"Select the knowledge to load data from.\",\n required=True,\n options=[],\n refresh_button=True,\n dialog_inputs=asdict(NewKnowledgeBaseInput()),\n ),\n DataFrameInput(\n name=\"input_df\",\n display_name=\"Data\",\n info=\"Table with all original columns (already chunked / processed).\",\n required=True,\n ),\n TableInput(\n name=\"column_config\",\n display_name=\"Column Configuration\",\n info=\"Configure column behavior for the knowledge base.\",\n required=True,\n table_schema=[\n {\n \"name\": \"column_name\",\n \"display_name\": \"Column Name\",\n \"type\": \"str\",\n \"description\": \"Name of the column in the source DataFrame\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"vectorize\",\n \"display_name\": \"Vectorize\",\n \"type\": \"boolean\",\n \"description\": \"Create embeddings for this column\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"identifier\",\n \"display_name\": \"Identifier\",\n \"type\": \"boolean\",\n \"description\": \"Use this column as unique identifier\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"column_name\": \"text\",\n \"vectorize\": True,\n \"identifier\": True,\n },\n ],\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"Batch size for processing embeddings\",\n advanced=True,\n value=1000,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Embedding Provider API Key\",\n info=\"API key for the embedding provider to generate embeddings.\",\n advanced=True,\n required=False,\n ),\n BoolInput(\n name=\"allow_duplicates\",\n display_name=\"Allow Duplicates\",\n info=\"Allow duplicate rows in the knowledge base\",\n advanced=True,\n value=False,\n ),\n ]\n\n # ------ Outputs -------------------------------------------------------\n outputs = [Output(display_name=\"DataFrame\", name=\"dataframe\", method=\"build_kb_info\")]\n\n # ------ Internal helpers ---------------------------------------------\n def _get_kb_root(self) -> Path:\n \"\"\"Return the root directory for knowledge bases.\"\"\"\n return KNOWLEDGE_BASES_ROOT_PATH\n\n def _validate_column_config(self, df_source: pd.DataFrame) -> list[dict[str, Any]]:\n \"\"\"Validate column configuration using Structured Output patterns.\"\"\"\n if not self.column_config:\n msg = \"Column configuration cannot be empty\"\n raise ValueError(msg)\n\n # Convert table input to list of dicts (similar to Structured Output)\n config_list = self.column_config if isinstance(self.column_config, list) else []\n\n # Validate column names exist in DataFrame\n df_columns = set(df_source.columns)\n for config in config_list:\n col_name = config.get(\"column_name\")\n if col_name not in df_columns:\n msg = f\"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}\"\n raise ValueError(msg)\n\n return config_list\n\n def _get_embedding_provider(self, embedding_model: str) -> str:\n \"\"\"Get embedding provider by matching model name to lists.\"\"\"\n if embedding_model in OPENAI_EMBEDDING_MODEL_NAMES:\n return \"OpenAI\"\n if embedding_model in HUGGINGFACE_MODEL_NAMES:\n return \"HuggingFace\"\n if embedding_model in COHERE_MODEL_NAMES:\n return \"Cohere\"\n return \"Custom\"\n\n def _build_embeddings(self, embedding_model: str, api_key: str):\n \"\"\"Build embedding model using provider patterns.\"\"\"\n # Get provider by matching model name to lists\n provider = self._get_embedding_provider(embedding_model)\n\n # Validate provider and model\n if provider == \"OpenAI\":\n from langchain_openai import OpenAIEmbeddings\n\n if not api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n return OpenAIEmbeddings(\n model=embedding_model,\n api_key=api_key,\n chunk_size=self.chunk_size,\n )\n if provider == \"HuggingFace\":\n from langchain_huggingface import HuggingFaceEmbeddings\n\n return HuggingFaceEmbeddings(\n model=embedding_model,\n )\n if provider == \"Cohere\":\n from langchain_cohere import CohereEmbeddings\n\n if not api_key:\n msg = \"Cohere API key is required when using Cohere provider\"\n raise ValueError(msg)\n return CohereEmbeddings(\n model=embedding_model,\n cohere_api_key=api_key,\n )\n if provider == \"Custom\":\n # For custom embedding models, we would need additional configuration\n msg = \"Custom embedding models not yet supported\"\n raise NotImplementedError(msg)\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def _build_embedding_metadata(self, embedding_model, api_key) -> dict[str, Any]:\n \"\"\"Build embedding model metadata.\"\"\"\n # Get provider by matching model name to lists\n embedding_provider = self._get_embedding_provider(embedding_model)\n\n api_key_to_save = None\n if api_key and hasattr(api_key, \"get_secret_value\"):\n api_key_to_save = api_key.get_secret_value()\n elif isinstance(api_key, str):\n api_key_to_save = api_key\n\n encrypted_api_key = None\n if api_key_to_save:\n settings_service = get_settings_service()\n try:\n encrypted_api_key = encrypt_api_key(api_key_to_save, settings_service=settings_service)\n except (TypeError, ValueError) as e:\n self.log(f\"Could not encrypt API key: {e}\")\n logger.error(f\"Could not encrypt API key: {e}\")\n\n return {\n \"embedding_provider\": embedding_provider,\n \"embedding_model\": embedding_model,\n \"api_key\": encrypted_api_key,\n \"api_key_used\": bool(api_key),\n \"chunk_size\": self.chunk_size,\n \"created_at\": datetime.now(timezone.utc).isoformat(),\n }\n\n def _save_embedding_metadata(self, kb_path: Path, embedding_model: str, api_key: str) -> None:\n \"\"\"Save embedding model metadata.\"\"\"\n embedding_metadata = self._build_embedding_metadata(embedding_model, api_key)\n metadata_path = kb_path / \"embedding_metadata.json\"\n metadata_path.write_text(json.dumps(embedding_metadata, indent=2))\n\n def _save_kb_files(\n self,\n kb_path: Path,\n config_list: list[dict[str, Any]],\n ) -> None:\n \"\"\"Save KB files using File Component storage patterns.\"\"\"\n try:\n # Create directory (following File Component patterns)\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save column configuration\n # Only do this if the file doesn't exist already\n cfg_path = kb_path / \"schema.json\"\n if not cfg_path.exists():\n cfg_path.write_text(json.dumps(config_list, indent=2))\n\n except (OSError, TypeError, ValueError) as e:\n self.log(f\"Error saving KB files: {e}\")\n\n def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:\n \"\"\"Build detailed column metadata.\"\"\"\n metadata: dict[str, Any] = {\n \"total_columns\": len(df_source.columns),\n \"mapped_columns\": len(config_list),\n \"unmapped_columns\": len(df_source.columns) - len(config_list),\n \"columns\": [],\n \"summary\": {\"vectorized_columns\": [], \"identifier_columns\": []},\n }\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n # Add to columns list\n metadata[\"columns\"].append(\n {\n \"name\": col_name,\n \"vectorize\": vectorize,\n \"identifier\": identifier,\n }\n )\n\n # Update summary\n if vectorize:\n metadata[\"summary\"][\"vectorized_columns\"].append(col_name)\n if identifier:\n metadata[\"summary\"][\"identifier_columns\"].append(col_name)\n\n return metadata\n\n async def _create_vector_store(\n self, df_source: pd.DataFrame, config_list: list[dict[str, Any]], embedding_model: str, api_key: str\n ) -> None:\n \"\"\"Create vector store following Local DB component pattern.\"\"\"\n try:\n # Set up vector store directory\n vector_store_dir = await self._kb_path()\n if not vector_store_dir:\n msg = \"Knowledge base path is not set. Please create a new knowledge base first.\"\n raise ValueError(msg)\n vector_store_dir.mkdir(parents=True, exist_ok=True)\n\n # Create embeddings model\n embedding_function = self._build_embeddings(embedding_model, api_key)\n\n # Convert DataFrame to Data objects (following Local DB pattern)\n data_objects = await self._convert_df_to_data_objects(df_source, config_list)\n\n # Create vector store\n chroma = Chroma(\n persist_directory=str(vector_store_dir),\n embedding_function=embedding_function,\n collection_name=self.knowledge_base,\n )\n\n # Convert Data objects to LangChain Documents\n documents = []\n for data_obj in data_objects:\n doc = data_obj.to_lc_document()\n documents.append(doc)\n\n # Add documents to vector store\n if documents:\n chroma.add_documents(documents)\n self.log(f\"Added {len(documents)} documents to vector store '{self.knowledge_base}'\")\n\n except (OSError, ValueError, RuntimeError) as e:\n self.log(f\"Error creating vector store: {e}\")\n\n async def _convert_df_to_data_objects(\n self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]\n ) -> list[Data]:\n \"\"\"Convert DataFrame to Data objects for vector store.\"\"\"\n data_objects: list[Data] = []\n\n # Set up vector store directory\n kb_path = await self._kb_path()\n\n # If we don't allow duplicates, we need to get the existing hashes\n chroma = Chroma(\n persist_directory=str(kb_path),\n collection_name=self.knowledge_base,\n )\n\n # Get all documents and their metadata\n all_docs = chroma.get()\n\n # Extract all _id values from metadata\n id_list = [metadata.get(\"_id\") for metadata in all_docs[\"metadatas\"] if metadata.get(\"_id\")]\n\n # Get column roles\n content_cols = []\n identifier_cols = []\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n if vectorize:\n content_cols.append(col_name)\n elif identifier:\n identifier_cols.append(col_name)\n\n # Convert each row to a Data object\n for _, row in df_source.iterrows():\n # Build content text from identifier columns using list comprehension\n identifier_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]\n\n # Join all parts into a single string\n page_content = \" \".join(identifier_parts)\n\n # Build metadata from NON-vectorized columns only (simple key-value pairs)\n data_dict = {\n \"text\": page_content, # Main content for vectorization\n }\n\n # Add identifier columns if they exist\n if identifier_cols:\n identifier_parts = [str(row[col]) for col in identifier_cols if col in row and pd.notna(row[col])]\n page_content = \" \".join(identifier_parts)\n\n # Add metadata columns as simple key-value pairs\n for col in df_source.columns:\n if col not in content_cols and col in row and pd.notna(row[col]):\n # Convert to simple types for Chroma metadata\n value = row[col]\n data_dict[col] = str(value) # Convert complex types to string\n\n # Hash the page_content for unique ID\n page_content_hash = hashlib.sha256(page_content.encode()).hexdigest()\n data_dict[\"_id\"] = page_content_hash\n\n # If duplicates are disallowed, and hash exists, prevent adding this row\n if not self.allow_duplicates and page_content_hash in id_list:\n self.log(f\"Skipping duplicate row with hash {page_content_hash}\")\n continue\n\n # Create Data object - everything except \"text\" becomes metadata\n data_obj = Data(data=data_dict)\n data_objects.append(data_obj)\n\n return data_objects\n\n def is_valid_collection_name(self, name, min_length: int = 3, max_length: int = 63) -> bool:\n \"\"\"Validates collection name against conditions 1-3.\n\n 1. Contains 3-63 characters\n 2. Starts and ends with alphanumeric character\n 3. Contains only alphanumeric characters, underscores, or hyphens.\n\n Args:\n name (str): Collection name to validate\n min_length (int): Minimum length of the name\n max_length (int): Maximum length of the name\n\n Returns:\n bool: True if valid, False otherwise\n \"\"\"\n # Check length (condition 1)\n if not (min_length <= len(name) <= max_length):\n return False\n\n # Check start/end with alphanumeric (condition 2)\n if not (name[0].isalnum() and name[-1].isalnum()):\n return False\n\n # Check allowed characters (condition 3)\n return re.match(r\"^[a-zA-Z0-9_-]+$\", name) is not None\n\n async def _kb_path(self) -> Path | None:\n # Check if we already have the path cached\n cached_path = getattr(self, \"_cached_kb_path\", None)\n if cached_path is not None:\n return cached_path\n\n # If not cached, compute it\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching knowledge base path.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n kb_user = current_user.username\n\n kb_root = self._get_kb_root()\n\n # Cache the result\n self._cached_kb_path = kb_root / kb_user / self.knowledge_base\n\n return self._cached_kb_path\n\n # ---------------------------------------------------------------------\n # OUTPUT METHODS\n # ---------------------------------------------------------------------\n async def build_kb_info(self) -> Data:\n \"\"\"Main ingestion routine → returns a dict with KB metadata.\"\"\"\n try:\n # Get source DataFrame\n df_source: pd.DataFrame = self.input_df\n\n # Validate column configuration (using Structured Output patterns)\n config_list = self._validate_column_config(df_source)\n column_metadata = self._build_column_metadata(config_list, df_source)\n\n # Read the embedding info from the knowledge base folder\n kb_path = await self._kb_path()\n if not kb_path:\n msg = \"Knowledge base path is not set. Please create a new knowledge base first.\"\n raise ValueError(msg)\n metadata_path = kb_path / \"embedding_metadata.json\"\n\n # If the API key is not provided, try to read it from the metadata file\n if metadata_path.exists():\n settings_service = get_settings_service()\n metadata = json.loads(metadata_path.read_text())\n embedding_model = metadata.get(\"embedding_model\")\n try:\n api_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n except (InvalidToken, TypeError, ValueError) as e:\n logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n\n # Check if a custom API key was provided, update metadata if so\n if self.api_key:\n api_key = self.api_key\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=embedding_model,\n api_key=api_key,\n )\n\n # Create vector store following Local DB component pattern\n await self._create_vector_store(df_source, config_list, embedding_model=embedding_model, api_key=api_key)\n\n # Save KB files (using File Component storage patterns)\n self._save_kb_files(kb_path, config_list)\n\n # Build metadata response\n meta: dict[str, Any] = {\n \"kb_id\": str(uuid.uuid4()),\n \"kb_name\": self.knowledge_base,\n \"rows\": len(df_source),\n \"column_metadata\": column_metadata,\n \"path\": str(kb_path),\n \"config_columns\": len(config_list),\n \"timestamp\": datetime.now(tz=timezone.utc).isoformat(),\n }\n\n # Set status message\n self.status = f\"✅ KB **{self.knowledge_base}** saved · {len(df_source)} chunks.\"\n\n return Data(data=meta)\n\n except (OSError, ValueError, RuntimeError, KeyError) as e:\n self.log(f\"Error in KB ingestion: {e}\")\n self.status = f\"❌ KB ingestion failed: {e}\"\n return Data(data={\"error\": str(e), \"kb_name\": self.knowledge_base})\n\n async def _get_api_key_variable(self, field_value: dict[str, Any]):\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching global variables.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n variable_service = get_variable_service()\n\n # Process the api_key field variable\n return await variable_service.get_variable(\n user_id=current_user.id,\n name=field_value[\"03_api_key\"],\n field=\"\",\n session=db,\n )\n\n async def update_build_config(\n self,\n build_config: dotdict,\n field_value: Any,\n field_name: str | None = None,\n ) -> dotdict:\n \"\"\"Update build configuration based on provider selection.\"\"\"\n # Create a new knowledge base\n if field_name == \"knowledge_base\":\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching knowledge base list.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n kb_user = current_user.username\n if isinstance(field_value, dict) and \"01_new_kb_name\" in field_value:\n # Validate the knowledge base name - Make sure it follows these rules:\n if not self.is_valid_collection_name(field_value[\"01_new_kb_name\"]):\n msg = f\"Invalid knowledge base name: {field_value['01_new_kb_name']}\"\n raise ValueError(msg)\n\n api_key = field_value.get(\"03_api_key\", None)\n with contextlib.suppress(Exception):\n # If the API key is a variable, resolve it\n api_key = await self._get_api_key_variable(field_value)\n\n # Make sure api_key is a string\n if not isinstance(api_key, str):\n msg = \"API key must be a string.\"\n raise ValueError(msg)\n\n # We need to test the API Key one time against the embedding model\n embed_model = self._build_embeddings(embedding_model=field_value[\"02_embedding_model\"], api_key=api_key)\n\n # Try to generate a dummy embedding to validate the API key without blocking the event loop\n try:\n await asyncio.wait_for(\n asyncio.to_thread(embed_model.embed_query, \"test\"),\n timeout=10,\n )\n except TimeoutError as e:\n msg = \"Embedding validation timed out. Please verify network connectivity and key.\"\n raise ValueError(msg) from e\n except Exception as e:\n msg = f\"Embedding validation failed: {e!s}\"\n raise ValueError(msg) from e\n\n # Create the new knowledge base directory\n kb_path = KNOWLEDGE_BASES_ROOT_PATH / kb_user / field_value[\"01_new_kb_name\"]\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save the embedding metadata\n build_config[\"knowledge_base\"][\"value\"] = field_value[\"01_new_kb_name\"]\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=field_value[\"02_embedding_model\"],\n api_key=api_key,\n )\n\n # Update the knowledge base options dynamically\n build_config[\"knowledge_base\"][\"options\"] = await get_knowledge_bases(\n KNOWLEDGE_BASES_ROOT_PATH,\n user_id=self.user_id,\n )\n\n # If the selected knowledge base is not available, reset it\n if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n build_config[\"knowledge_base\"][\"value\"] = None\n\n return build_config\n"
868
+ "value": "from __future__ import annotations\n\nimport asyncio\nimport contextlib\nimport hashlib\nimport json\nimport re\nimport uuid\nfrom dataclasses import asdict, dataclass, field\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING, Any\n\nimport pandas as pd\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom loguru import logger\n\nfrom langflow.base.knowledge_bases.knowledge_base_utils import get_knowledge_bases\nfrom langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom langflow.components.processing.converter import convert_to_dataframe\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, Output, SecretStrInput, StrInput, TableInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict # noqa: TC001\nfrom langflow.schema.table import EditMode\nfrom langflow.services.auth.utils import decrypt_api_key, encrypt_api_key\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom langflow.services.deps import get_settings_service, get_variable_service, session_scope\n\nif TYPE_CHECKING:\n from langflow.schema.dataframe import DataFrame\n\nHUGGINGFACE_MODEL_NAMES = [\"sentence-transformers/all-MiniLM-L6-v2\", \"sentence-transformers/all-mpnet-base-v2\"]\nCOHERE_MODEL_NAMES = [\"embed-english-v3.0\", \"embed-multilingual-v3.0\"]\n\nsettings = get_settings_service().settings\nknowledge_directory = settings.knowledge_bases_dir\nif not knowledge_directory:\n msg = \"Knowledge bases directory is not set in the settings.\"\n raise ValueError(msg)\nKNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n\n\nclass KnowledgeIngestionComponent(Component):\n \"\"\"Create or append to Langflow Knowledge from a DataFrame.\"\"\"\n\n # ------ UI metadata ---------------------------------------------------\n display_name = \"Knowledge Ingestion\"\n description = \"Create or update knowledge in Langflow.\"\n icon = \"upload\"\n name = \"KnowledgeIngestion\"\n\n def __init__(self, *args, **kwargs) -> None:\n super().__init__(*args, **kwargs)\n self._cached_kb_path: Path | None = None\n\n @dataclass\n class NewKnowledgeBaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_knowledge_base\",\n \"description\": \"Create new knowledge in Langflow.\",\n \"display_name\": \"Create new knowledge\",\n \"field_order\": [\"01_new_kb_name\", \"02_embedding_model\", \"03_api_key\"],\n \"template\": {\n \"01_new_kb_name\": StrInput(\n name=\"new_kb_name\",\n display_name=\"Knowledge Name\",\n info=\"Name of the new knowledge to create.\",\n required=True,\n ),\n \"02_embedding_model\": DropdownInput(\n name=\"embedding_model\",\n display_name=\"Model Name\",\n info=\"Select the embedding model to use for this knowledge base.\",\n required=True,\n options=OPENAI_EMBEDDING_MODEL_NAMES + HUGGINGFACE_MODEL_NAMES + COHERE_MODEL_NAMES,\n options_metadata=[{\"icon\": \"OpenAI\"} for _ in OPENAI_EMBEDDING_MODEL_NAMES]\n + [{\"icon\": \"HuggingFace\"} for _ in HUGGINGFACE_MODEL_NAMES]\n + [{\"icon\": \"Cohere\"} for _ in COHERE_MODEL_NAMES],\n ),\n \"03_api_key\": SecretStrInput(\n name=\"api_key\",\n display_name=\"API Key\",\n info=\"Provider API key for embedding model\",\n required=True,\n load_from_db=False,\n ),\n },\n },\n }\n }\n )\n\n # ------ Inputs --------------------------------------------------------\n inputs = [\n DropdownInput(\n name=\"knowledge_base\",\n display_name=\"Knowledge\",\n info=\"Select the knowledge to load data from.\",\n required=True,\n options=[],\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewKnowledgeBaseInput()),\n ),\n HandleInput(\n name=\"input_df\",\n display_name=\"Input\",\n info=(\n \"Table with all original columns (already chunked / processed). \"\n \"Accepts Data or DataFrame. If Data is provided, it is converted to a DataFrame automatically.\"\n ),\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n TableInput(\n name=\"column_config\",\n display_name=\"Column Configuration\",\n info=\"Configure column behavior for the knowledge base.\",\n required=True,\n table_schema=[\n {\n \"name\": \"column_name\",\n \"display_name\": \"Column Name\",\n \"type\": \"str\",\n \"description\": \"Name of the column in the source DataFrame\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"vectorize\",\n \"display_name\": \"Vectorize\",\n \"type\": \"boolean\",\n \"description\": \"Create embeddings for this column\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"identifier\",\n \"display_name\": \"Identifier\",\n \"type\": \"boolean\",\n \"description\": \"Use this column as unique identifier\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"column_name\": \"text\",\n \"vectorize\": True,\n \"identifier\": True,\n },\n ],\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"Batch size for processing embeddings\",\n advanced=True,\n value=1000,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Embedding Provider API Key\",\n info=\"API key for the embedding provider to generate embeddings.\",\n advanced=True,\n required=False,\n ),\n BoolInput(\n name=\"allow_duplicates\",\n display_name=\"Allow Duplicates\",\n info=\"Allow duplicate rows in the knowledge base\",\n advanced=True,\n value=False,\n ),\n ]\n\n # ------ Outputs -------------------------------------------------------\n outputs = [Output(display_name=\"Results\", name=\"dataframe_output\", method=\"build_kb_info\")]\n\n # ------ Internal helpers ---------------------------------------------\n def _get_kb_root(self) -> Path:\n \"\"\"Return the root directory for knowledge bases.\"\"\"\n return KNOWLEDGE_BASES_ROOT_PATH\n\n def _validate_column_config(self, df_source: pd.DataFrame) -> list[dict[str, Any]]:\n \"\"\"Validate column configuration using Structured Output patterns.\"\"\"\n if not self.column_config:\n msg = \"Column configuration cannot be empty\"\n raise ValueError(msg)\n\n # Convert table input to list of dicts (similar to Structured Output)\n config_list = self.column_config if isinstance(self.column_config, list) else []\n\n # Validate column names exist in DataFrame\n df_columns = set(df_source.columns)\n for config in config_list:\n col_name = config.get(\"column_name\")\n if col_name not in df_columns:\n msg = f\"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}\"\n raise ValueError(msg)\n\n return config_list\n\n def _get_embedding_provider(self, embedding_model: str) -> str:\n \"\"\"Get embedding provider by matching model name to lists.\"\"\"\n if embedding_model in OPENAI_EMBEDDING_MODEL_NAMES:\n return \"OpenAI\"\n if embedding_model in HUGGINGFACE_MODEL_NAMES:\n return \"HuggingFace\"\n if embedding_model in COHERE_MODEL_NAMES:\n return \"Cohere\"\n return \"Custom\"\n\n def _build_embeddings(self, embedding_model: str, api_key: str):\n \"\"\"Build embedding model using provider patterns.\"\"\"\n # Get provider by matching model name to lists\n provider = self._get_embedding_provider(embedding_model)\n\n # Validate provider and model\n if provider == \"OpenAI\":\n from langchain_openai import OpenAIEmbeddings\n\n if not api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n return OpenAIEmbeddings(\n model=embedding_model,\n api_key=api_key,\n chunk_size=self.chunk_size,\n )\n if provider == \"HuggingFace\":\n from langchain_huggingface import HuggingFaceEmbeddings\n\n return HuggingFaceEmbeddings(\n model=embedding_model,\n )\n if provider == \"Cohere\":\n from langchain_cohere import CohereEmbeddings\n\n if not api_key:\n msg = \"Cohere API key is required when using Cohere provider\"\n raise ValueError(msg)\n return CohereEmbeddings(\n model=embedding_model,\n cohere_api_key=api_key,\n )\n if provider == \"Custom\":\n # For custom embedding models, we would need additional configuration\n msg = \"Custom embedding models not yet supported\"\n raise NotImplementedError(msg)\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def _build_embedding_metadata(self, embedding_model, api_key) -> dict[str, Any]:\n \"\"\"Build embedding model metadata.\"\"\"\n # Get provider by matching model name to lists\n embedding_provider = self._get_embedding_provider(embedding_model)\n\n api_key_to_save = None\n if api_key and hasattr(api_key, \"get_secret_value\"):\n api_key_to_save = api_key.get_secret_value()\n elif isinstance(api_key, str):\n api_key_to_save = api_key\n\n encrypted_api_key = None\n if api_key_to_save:\n settings_service = get_settings_service()\n try:\n encrypted_api_key = encrypt_api_key(api_key_to_save, settings_service=settings_service)\n except (TypeError, ValueError) as e:\n self.log(f\"Could not encrypt API key: {e}\")\n logger.error(f\"Could not encrypt API key: {e}\")\n\n return {\n \"embedding_provider\": embedding_provider,\n \"embedding_model\": embedding_model,\n \"api_key\": encrypted_api_key,\n \"api_key_used\": bool(api_key),\n \"chunk_size\": self.chunk_size,\n \"created_at\": datetime.now(timezone.utc).isoformat(),\n }\n\n def _save_embedding_metadata(self, kb_path: Path, embedding_model: str, api_key: str) -> None:\n \"\"\"Save embedding model metadata.\"\"\"\n embedding_metadata = self._build_embedding_metadata(embedding_model, api_key)\n metadata_path = kb_path / \"embedding_metadata.json\"\n metadata_path.write_text(json.dumps(embedding_metadata, indent=2))\n\n def _save_kb_files(\n self,\n kb_path: Path,\n config_list: list[dict[str, Any]],\n ) -> None:\n \"\"\"Save KB files using File Component storage patterns.\"\"\"\n try:\n # Create directory (following File Component patterns)\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save column configuration\n # Only do this if the file doesn't exist already\n cfg_path = kb_path / \"schema.json\"\n if not cfg_path.exists():\n cfg_path.write_text(json.dumps(config_list, indent=2))\n\n except (OSError, TypeError, ValueError) as e:\n self.log(f\"Error saving KB files: {e}\")\n\n def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:\n \"\"\"Build detailed column metadata.\"\"\"\n metadata: dict[str, Any] = {\n \"total_columns\": len(df_source.columns),\n \"mapped_columns\": len(config_list),\n \"unmapped_columns\": len(df_source.columns) - len(config_list),\n \"columns\": [],\n \"summary\": {\"vectorized_columns\": [], \"identifier_columns\": []},\n }\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n # Add to columns list\n metadata[\"columns\"].append(\n {\n \"name\": col_name,\n \"vectorize\": vectorize,\n \"identifier\": identifier,\n }\n )\n\n # Update summary\n if vectorize:\n metadata[\"summary\"][\"vectorized_columns\"].append(col_name)\n if identifier:\n metadata[\"summary\"][\"identifier_columns\"].append(col_name)\n\n return metadata\n\n async def _create_vector_store(\n self, df_source: pd.DataFrame, config_list: list[dict[str, Any]], embedding_model: str, api_key: str\n ) -> None:\n \"\"\"Create vector store following Local DB component pattern.\"\"\"\n try:\n # Set up vector store directory\n vector_store_dir = await self._kb_path()\n if not vector_store_dir:\n msg = \"Knowledge base path is not set. Please create a new knowledge base first.\"\n raise ValueError(msg)\n vector_store_dir.mkdir(parents=True, exist_ok=True)\n\n # Create embeddings model\n embedding_function = self._build_embeddings(embedding_model, api_key)\n\n # Convert DataFrame to Data objects (following Local DB pattern)\n data_objects = await self._convert_df_to_data_objects(df_source, config_list)\n\n # Create vector store\n chroma = Chroma(\n persist_directory=str(vector_store_dir),\n embedding_function=embedding_function,\n collection_name=self.knowledge_base,\n )\n\n # Convert Data objects to LangChain Documents\n documents = []\n for data_obj in data_objects:\n doc = data_obj.to_lc_document()\n documents.append(doc)\n\n # Add documents to vector store\n if documents:\n chroma.add_documents(documents)\n self.log(f\"Added {len(documents)} documents to vector store '{self.knowledge_base}'\")\n\n except (OSError, ValueError, RuntimeError) as e:\n self.log(f\"Error creating vector store: {e}\")\n\n async def _convert_df_to_data_objects(\n self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]\n ) -> list[Data]:\n \"\"\"Convert DataFrame to Data objects for vector store.\"\"\"\n data_objects: list[Data] = []\n\n # Set up vector store directory\n kb_path = await self._kb_path()\n\n # If we don't allow duplicates, we need to get the existing hashes\n chroma = Chroma(\n persist_directory=str(kb_path),\n collection_name=self.knowledge_base,\n )\n\n # Get all documents and their metadata\n all_docs = chroma.get()\n\n # Extract all _id values from metadata\n id_list = [metadata.get(\"_id\") for metadata in all_docs[\"metadatas\"] if metadata.get(\"_id\")]\n\n # Get column roles\n content_cols = []\n identifier_cols = []\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n if vectorize:\n content_cols.append(col_name)\n elif identifier:\n identifier_cols.append(col_name)\n\n # Convert each row to a Data object\n for _, row in df_source.iterrows():\n # Build content text from identifier columns using list comprehension\n identifier_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]\n\n # Join all parts into a single string\n page_content = \" \".join(identifier_parts)\n\n # Build metadata from NON-vectorized columns only (simple key-value pairs)\n data_dict = {\n \"text\": page_content, # Main content for vectorization\n }\n\n # Add identifier columns if they exist\n if identifier_cols:\n identifier_parts = [str(row[col]) for col in identifier_cols if col in row and pd.notna(row[col])]\n page_content = \" \".join(identifier_parts)\n\n # Add metadata columns as simple key-value pairs\n for col in df_source.columns:\n if col not in content_cols and col in row and pd.notna(row[col]):\n # Convert to simple types for Chroma metadata\n value = row[col]\n data_dict[col] = str(value) # Convert complex types to string\n\n # Hash the page_content for unique ID\n page_content_hash = hashlib.sha256(page_content.encode()).hexdigest()\n data_dict[\"_id\"] = page_content_hash\n\n # If duplicates are disallowed, and hash exists, prevent adding this row\n if not self.allow_duplicates and page_content_hash in id_list:\n self.log(f\"Skipping duplicate row with hash {page_content_hash}\")\n continue\n\n # Create Data object - everything except \"text\" becomes metadata\n data_obj = Data(data=data_dict)\n data_objects.append(data_obj)\n\n return data_objects\n\n def is_valid_collection_name(self, name, min_length: int = 3, max_length: int = 63) -> bool:\n \"\"\"Validates collection name against conditions 1-3.\n\n 1. Contains 3-63 characters\n 2. Starts and ends with alphanumeric character\n 3. Contains only alphanumeric characters, underscores, or hyphens.\n\n Args:\n name (str): Collection name to validate\n min_length (int): Minimum length of the name\n max_length (int): Maximum length of the name\n\n Returns:\n bool: True if valid, False otherwise\n \"\"\"\n # Check length (condition 1)\n if not (min_length <= len(name) <= max_length):\n return False\n\n # Check start/end with alphanumeric (condition 2)\n if not (name[0].isalnum() and name[-1].isalnum()):\n return False\n\n # Check allowed characters (condition 3)\n return re.match(r\"^[a-zA-Z0-9_-]+$\", name) is not None\n\n async def _kb_path(self) -> Path | None:\n # Check if we already have the path cached\n cached_path = getattr(self, \"_cached_kb_path\", None)\n if cached_path is not None:\n return cached_path\n\n # If not cached, compute it\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching knowledge base path.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n kb_user = current_user.username\n\n kb_root = self._get_kb_root()\n\n # Cache the result\n self._cached_kb_path = kb_root / kb_user / self.knowledge_base\n\n return self._cached_kb_path\n\n # ---------------------------------------------------------------------\n # OUTPUT METHODS\n # ---------------------------------------------------------------------\n async def build_kb_info(self) -> Data:\n \"\"\"Main ingestion routine → returns a dict with KB metadata.\"\"\"\n try:\n input_value = self.input_df[0] if isinstance(self.input_df, list) else self.input_df\n df_source: DataFrame = convert_to_dataframe(input_value)\n\n # Validate column configuration (using Structured Output patterns)\n config_list = self._validate_column_config(df_source)\n column_metadata = self._build_column_metadata(config_list, df_source)\n\n # Read the embedding info from the knowledge base folder\n kb_path = await self._kb_path()\n if not kb_path:\n msg = \"Knowledge base path is not set. Please create a new knowledge base first.\"\n raise ValueError(msg)\n metadata_path = kb_path / \"embedding_metadata.json\"\n\n # If the API key is not provided, try to read it from the metadata file\n if metadata_path.exists():\n settings_service = get_settings_service()\n metadata = json.loads(metadata_path.read_text())\n embedding_model = metadata.get(\"embedding_model\")\n try:\n api_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n except (InvalidToken, TypeError, ValueError) as e:\n logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n\n # Check if a custom API key was provided, update metadata if so\n if self.api_key:\n api_key = self.api_key\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=embedding_model,\n api_key=api_key,\n )\n\n # Create vector store following Local DB component pattern\n await self._create_vector_store(df_source, config_list, embedding_model=embedding_model, api_key=api_key)\n\n # Save KB files (using File Component storage patterns)\n self._save_kb_files(kb_path, config_list)\n\n # Build metadata response\n meta: dict[str, Any] = {\n \"kb_id\": str(uuid.uuid4()),\n \"kb_name\": self.knowledge_base,\n \"rows\": len(df_source),\n \"column_metadata\": column_metadata,\n \"path\": str(kb_path),\n \"config_columns\": len(config_list),\n \"timestamp\": datetime.now(tz=timezone.utc).isoformat(),\n }\n\n # Set status message\n self.status = f\"✅ KB **{self.knowledge_base}** saved · {len(df_source)} chunks.\"\n\n return Data(data=meta)\n\n except (OSError, ValueError, RuntimeError, KeyError) as e:\n msg = f\"Error during KB ingestion: {e}\"\n raise RuntimeError(msg) from e\n\n async def _get_api_key_variable(self, field_value: dict[str, Any]):\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching global variables.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n variable_service = get_variable_service()\n\n # Process the api_key field variable\n return await variable_service.get_variable(\n user_id=current_user.id,\n name=field_value[\"03_api_key\"],\n field=\"\",\n session=db,\n )\n\n async def update_build_config(\n self,\n build_config: dotdict,\n field_value: Any,\n field_name: str | None = None,\n ) -> dotdict:\n \"\"\"Update build configuration based on provider selection.\"\"\"\n # Create a new knowledge base\n if field_name == \"knowledge_base\":\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching knowledge base list.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n kb_user = current_user.username\n if isinstance(field_value, dict) and \"01_new_kb_name\" in field_value:\n # Validate the knowledge base name - Make sure it follows these rules:\n if not self.is_valid_collection_name(field_value[\"01_new_kb_name\"]):\n msg = f\"Invalid knowledge base name: {field_value['01_new_kb_name']}\"\n raise ValueError(msg)\n\n api_key = field_value.get(\"03_api_key\", None)\n with contextlib.suppress(Exception):\n # If the API key is a variable, resolve it\n api_key = await self._get_api_key_variable(field_value)\n\n # Make sure api_key is a string\n if not isinstance(api_key, str):\n msg = \"API key must be a string.\"\n raise ValueError(msg)\n\n # We need to test the API Key one time against the embedding model\n embed_model = self._build_embeddings(embedding_model=field_value[\"02_embedding_model\"], api_key=api_key)\n\n # Try to generate a dummy embedding to validate the API key without blocking the event loop\n try:\n await asyncio.wait_for(\n asyncio.to_thread(embed_model.embed_query, \"test\"),\n timeout=10,\n )\n except TimeoutError as e:\n msg = \"Embedding validation timed out. Please verify network connectivity and key.\"\n raise ValueError(msg) from e\n except Exception as e:\n msg = f\"Embedding validation failed: {e!s}\"\n raise ValueError(msg) from e\n\n # Create the new knowledge base directory\n kb_path = KNOWLEDGE_BASES_ROOT_PATH / kb_user / field_value[\"01_new_kb_name\"]\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save the embedding metadata\n build_config[\"knowledge_base\"][\"value\"] = field_value[\"01_new_kb_name\"]\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=field_value[\"02_embedding_model\"],\n api_key=api_key,\n )\n\n # Update the knowledge base options dynamically\n build_config[\"knowledge_base\"][\"options\"] = await get_knowledge_bases(\n KNOWLEDGE_BASES_ROOT_PATH,\n user_id=self.user_id,\n )\n\n # If the selected knowledge base is not available, reset it\n if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n build_config[\"knowledge_base\"][\"value\"] = None\n\n return build_config\n"
870
869
  },
871
870
  "column_config": {
872
871
  "_input_type": "TableInput",
@@ -933,18 +932,19 @@
933
932
  "value": [
934
933
  {
935
934
  "column_name": "text",
936
- "identifier": false,
935
+ "identifier": true,
937
936
  "vectorize": true
938
937
  }
939
938
  ]
940
939
  },
941
940
  "input_df": {
942
- "_input_type": "DataFrameInput",
941
+ "_input_type": "HandleInput",
943
942
  "advanced": false,
944
- "display_name": "Data",
943
+ "display_name": "Input",
945
944
  "dynamic": false,
946
- "info": "Table with all original columns (already chunked / processed).",
945
+ "info": "Table with all original columns (already chunked / processed). Accepts Data or DataFrame. If Data is provided, it is converted to a DataFrame automatically.",
947
946
  "input_types": [
947
+ "Data",
948
948
  "DataFrame"
949
949
  ],
950
950
  "list": false,
@@ -954,8 +954,6 @@
954
954
  "required": true,
955
955
  "show": true,
956
956
  "title_case": false,
957
- "tool_mode": false,
958
- "trace_as_input": true,
959
957
  "trace_as_metadata": true,
960
958
  "type": "other",
961
959
  "value": ""
@@ -1054,7 +1052,7 @@
1054
1052
  "dynamic": false,
1055
1053
  "info": "Provider API key for embedding model",
1056
1054
  "input_types": [],
1057
- "load_from_db": true,
1055
+ "load_from_db": false,
1058
1056
  "name": "api_key",
1059
1057
  "password": true,
1060
1058
  "placeholder": "",
@@ -1077,6 +1075,7 @@
1077
1075
  "options": [],
1078
1076
  "options_metadata": [],
1079
1077
  "placeholder": "",
1078
+ "real_time_refresh": true,
1080
1079
  "refresh_button": true,
1081
1080
  "required": true,
1082
1081
  "show": true,
@@ -1091,33 +1090,33 @@
1091
1090
  "tool_mode": false
1092
1091
  },
1093
1092
  "showNode": true,
1094
- "type": "KBIngestion"
1093
+ "type": "KnowledgeIngestion"
1095
1094
  },
1096
1095
  "dragging": false,
1097
- "id": "KBIngestion-jj5iW",
1096
+ "id": "KnowledgeIngestion-uSOy6",
1098
1097
  "measured": {
1099
1098
  "height": 333,
1100
1099
  "width": 320
1101
1100
  },
1102
1101
  "position": {
1103
- "x": 1000.4023842644599,
1104
- "y": 101.77068666606948
1102
+ "x": 1001.4259863865477,
1103
+ "y": 110.75185048793182
1105
1104
  },
1106
1105
  "selected": false,
1107
1106
  "type": "genericNode"
1108
1107
  }
1109
1108
  ],
1110
1109
  "viewport": {
1111
- "x": 280.03407172860966,
1112
- "y": 131.39479654897661,
1113
- "zoom": 0.9295918751284687
1110
+ "x": 283.8838616133703,
1111
+ "y": 160.13925386069144,
1112
+ "zoom": 0.937778878472336
1114
1113
  }
1115
1114
  },
1116
1115
  "description": "An example of creating a Knowledge Base and ingesting data into it from a web URL.",
1117
1116
  "endpoint_name": null,
1118
- "id": "dfffa40b-547b-46ae-9c4a-6539851990bf",
1117
+ "id": "fd03902d-38ec-4bc5-ac00-c308bbcca359",
1119
1118
  "is_component": false,
1120
- "last_tested_version": "1.5.0.post1",
1119
+ "last_tested_version": "1.5.0.post2",
1121
1120
  "name": "Knowledge Ingestion",
1122
1121
  "tags": []
1123
1122
  }