langflow-base-nightly 0.5.1.dev0__py3-none-any.whl → 0.5.1.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. langflow/api/v1/endpoints.py +10 -7
  2. langflow/api/v1/schemas.py +5 -2
  3. langflow/base/knowledge_bases/__init__.py +0 -0
  4. langflow/components/agents/agent.py +1 -0
  5. langflow/components/data/__init__.py +0 -4
  6. langflow/components/datastax/__init__.py +2 -2
  7. langflow/components/knowledge_bases/__init__.py +34 -0
  8. langflow/components/{data/kb_ingest.py → knowledge_bases/ingestion.py} +23 -15
  9. langflow/components/{data/kb_retrieval.py → knowledge_bases/retrieval.py} +26 -22
  10. langflow/components/processing/dataframe_operations.py +12 -1
  11. langflow/frontend/assets/{SlackIcon-Cr3Q15Px.js → SlackIcon-Cc7Qnzki.js} +1 -1
  12. langflow/frontend/assets/{Wikipedia-GxM5sPdM.js → Wikipedia-7ulMZY46.js} +1 -1
  13. langflow/frontend/assets/{Wolfram-BN3-VOCA.js → Wolfram-By9PGsHS.js} +1 -1
  14. langflow/frontend/assets/{index-Kwdl-e29.js → index--e0oQqZh.js} +1 -1
  15. langflow/frontend/assets/{index-CD-PqGCY.js → index-3jlSQi5Y.js} +1 -1
  16. langflow/frontend/assets/{index-DQ7VYqQc.js → index-4JIEdyIM.js} +1 -1
  17. langflow/frontend/assets/{index-C9Cxnkl8.js → index-5-CSw2-z.js} +1 -1
  18. langflow/frontend/assets/{index-DUpri6zF.js → index-7yAHPRxv.js} +1 -1
  19. langflow/frontend/assets/{index-Dl5amdBz.js → index-9FL5xjkL.js} +1 -1
  20. langflow/frontend/assets/{index-X0guhYF8.js → index-AALDfCyt.js} +1 -1
  21. langflow/frontend/assets/{index-BLTxEeTi.js → index-AKVkmT4S.js} +1 -1
  22. langflow/frontend/assets/{index-CRcMqCIj.js → index-B3GvPjhD.js} +1 -1
  23. langflow/frontend/assets/{index-D14EWPyZ.js → index-B5LHnuQR.js} +1 -1
  24. langflow/frontend/assets/{index-C3yvArUT.js → index-BAn-AzCS.js} +1 -1
  25. langflow/frontend/assets/{index-CRPKJZw9.js → index-BCXhKCOK.js} +1 -1
  26. langflow/frontend/assets/{index-CuFXdTx4.js → index-BGt6jQ4x.js} +1 -1
  27. langflow/frontend/assets/{index-AWCSdofD.js → index-BH7AyHxp.js} +1 -1
  28. langflow/frontend/assets/{index-CBc8fEAE.js → index-BISPW-f6.js} +1 -1
  29. langflow/frontend/assets/{index-Bf0IYKLd.js → index-BIqEYjNT.js} +1 -1
  30. langflow/frontend/assets/{index-DiGWASY5.js → index-BLEWsL1U.js} +1 -1
  31. langflow/frontend/assets/{index-D-KY3kkq.js → index-BLXN681C.js} +1 -1
  32. langflow/frontend/assets/{index-C-Xfg4cD.js → index-BMpKFGhI.js} +1 -1
  33. langflow/frontend/assets/{index-BVwJDmw-.js → index-BMvp94tO.js} +1 -1
  34. langflow/frontend/assets/{index-3wW7BClE.js → index-BSwBVwyF.js} +1 -1
  35. langflow/frontend/assets/{index-Cewy7JZE.js → index-BWFIrwW1.js} +1 -1
  36. langflow/frontend/assets/{index-CiixOzDG.js → index-BWnKMRFJ.js} +1 -1
  37. langflow/frontend/assets/{index-BZcw4827.js → index-BX_asvRB.js} +1 -1
  38. langflow/frontend/assets/{index-CiR1dxI4.js → index-BZ-A4K98.js} +1 -1
  39. langflow/frontend/assets/{index-CpzXS6md.js → index-BZSa2qz7.js} +1 -1
  40. langflow/frontend/assets/{index-CdIf07Rw.js → index-B_kBTgxV.js} +1 -1
  41. langflow/frontend/assets/{index-ClsuDmR6.js → index-BdjfHsrf.js} +1 -1
  42. langflow/frontend/assets/{index-hbndqB9B.js → index-Bhcv5M0n.js} +1 -1
  43. langflow/frontend/assets/{index-dJWNxIRH.js → index-BhqVw9WQ.js} +1 -1
  44. langflow/frontend/assets/{index-Tw3Os-DN.js → index-Bl7RpmrB.js} +1 -1
  45. langflow/frontend/assets/{index-C-EdnFdA.js → index-BlDsBQ_1.js} +1 -1
  46. langflow/frontend/assets/{index-z3SRY-mX.js → index-Bm9i8F4W.js} +1 -1
  47. langflow/frontend/assets/{index-CMZ79X-Y.js → index-BnCnYnao.js} +1 -1
  48. langflow/frontend/assets/{index-DXRfN4HV.js → index-BrDz-PxE.js} +1 -1
  49. langflow/frontend/assets/{index-CVWQfRYZ.js → index-BsdLyYMY.js} +1 -1
  50. langflow/frontend/assets/{index-BTKOU4xC.js → index-BusCv3bR.js} +1 -1
  51. langflow/frontend/assets/{index-D2N3l-cw.js → index-BvRIG6P5.js} +1 -1
  52. langflow/frontend/assets/{index-DpVWih90.js → index-Bw-TIIC6.js} +1 -1
  53. langflow/frontend/assets/{index-D-1tA8Dt.js → index-ByxGmq5p.js} +1 -1
  54. langflow/frontend/assets/{index-BWYuQ2Sj.js → index-C-2MRYoJ.js} +1 -1
  55. langflow/frontend/assets/{index-CZqRL9DE.js → index-C-bjC2sz.js} +1 -1
  56. langflow/frontend/assets/{index-o0D2S7xW.js → index-C-wnbBBY.js} +1 -1
  57. langflow/frontend/assets/{index-D-_B1a8v.js → index-C51yNvIL.js} +1 -1
  58. langflow/frontend/assets/{index-DJP-ss47.js → index-C676MS3I.js} +1 -1
  59. langflow/frontend/assets/{index-lZX9AvZW.js → index-C6nzdeYx.js} +1 -1
  60. langflow/frontend/assets/{index-6pyH3ZJB.js → index-C8pI0lzi.js} +1 -1
  61. langflow/frontend/assets/{index-ovFJ_0J6.js → index-CDphUsa3.js} +1 -1
  62. langflow/frontend/assets/{index-J38wh62w.js → index-CF4_Og1m.js} +1 -1
  63. langflow/frontend/assets/{index-C1f2wMat.js → index-CJ5A6STv.js} +1 -1
  64. langflow/frontend/assets/{index-C3KequvP.js → index-CKPZpkQk.js} +1 -1
  65. langflow/frontend/assets/{index-BiKKN6FR.js → index-CLcaktde.js} +1 -1
  66. langflow/frontend/assets/{index-28oOcafk.js → index-CNh0rwur.js} +1 -1
  67. langflow/frontend/assets/{index-CGO1CiUr.js → index-COoTCxvs.js} +1 -1
  68. langflow/frontend/assets/{index-BC65VuWx.js → index-CPiM2oyj.js} +1 -1
  69. langflow/frontend/assets/{index-BWdLILDG.js → index-CQQ-4XMS.js} +1 -1
  70. langflow/frontend/assets/{index-pYJJOcma.js → index-CU16NJD7.js} +1 -1
  71. langflow/frontend/assets/{index-h_aSZHf3.js → index-CUzlcce2.js} +1 -1
  72. langflow/frontend/assets/{index-BrJV8psX.js → index-CVkIdc6y.js} +1 -1
  73. langflow/frontend/assets/{index-lTpteg8t.js → index-C_157Mb-.js} +1 -1
  74. langflow/frontend/assets/{index-Cyd2HtHK.js → index-C_MhBX6R.js} +1 -1
  75. langflow/frontend/assets/{index-DrDrcajG.js → index-C_NwzK6j.js} +1 -1
  76. langflow/frontend/assets/{index-DlD4dXlZ.js → index-Ca1b7Iag.js} +1 -1
  77. langflow/frontend/assets/{index-BS8Vo8nc.js → index-Cb5G9Ifd.js} +1 -1
  78. langflow/frontend/assets/{index-CH5UVA9b.js → index-CeswGUz3.js} +1 -1
  79. langflow/frontend/assets/{index-BCDSei1q.js → index-ChsGhZn3.js} +1 -1
  80. langflow/frontend/assets/{index-DiB3CTo8.js → index-CiklyQU3.js} +1 -1
  81. langflow/frontend/assets/{index-dcJ8-agu.js → index-Co__gFM1.js} +1 -1
  82. langflow/frontend/assets/{index-eo2mAtL-.js → index-Coi86oqP.js} +1 -1
  83. langflow/frontend/assets/{index-CBvrGgID.js → index-Cu2Xr6_j.js} +1 -1
  84. langflow/frontend/assets/{index-2wSXqBtB.js → index-Cu7vC48Y.js} +1 -1
  85. langflow/frontend/assets/{index-Bbi87Ve4.js → index-CvSoff-8.js} +1 -1
  86. langflow/frontend/assets/{index-DA6-bvgN.js → index-Cw0UComa.js} +1 -1
  87. langflow/frontend/assets/{index-CWPzZtSx.js → index-D-SnFlhU.js} +1 -1
  88. langflow/frontend/assets/{index-DHgomBdh.js → index-D-WStJI6.js} +1 -1
  89. langflow/frontend/assets/{index-BpxbUiZD.js → index-D234yKNJ.js} +186 -186
  90. langflow/frontend/assets/{index-BkXec1Yf.js → index-D5c2nNvp.js} +1 -1
  91. langflow/frontend/assets/{index-DwQEZe3C.js → index-DFY8YFbC.js} +1 -1
  92. langflow/frontend/assets/{index-Bief6eyJ.js → index-DJ6HD14g.js} +1 -1
  93. langflow/frontend/assets/{index-Bx7dBY26.js → index-DMCerPJM.js} +1 -1
  94. langflow/frontend/assets/{index-DDWBeudF.js → index-DOj_QWqG.js} +1 -1
  95. langflow/frontend/assets/{index-CmEYYRN1.js → index-DP1oE6QB.js} +1 -1
  96. langflow/frontend/assets/{index-C3ZjKdCD.js → index-DTCrijba.js} +1 -1
  97. langflow/frontend/assets/{index-CtVIONP2.js → index-DVLIDc2_.js} +1 -1
  98. langflow/frontend/assets/{index-DyJFTK24.js → index-DX7JcSMz.js} +1 -1
  99. langflow/frontend/assets/{index-DmeiHnfl.js → index-DZVgPCio.js} +1 -1
  100. langflow/frontend/assets/{index-BwLWcUXL.js → index-DbfS_UH-.js} +1 -1
  101. langflow/frontend/assets/{index-DV3utZDZ.js → index-DcApTyZ7.js} +1 -1
  102. langflow/frontend/assets/{index-DDcMAaG4.js → index-Deu8rlaZ.js} +1 -1
  103. langflow/frontend/assets/{index-iJngutFo.js → index-Df6psZEj.js} +1 -1
  104. langflow/frontend/assets/{index-CRPyCfYy.js → index-DiblXWmk.js} +1 -1
  105. langflow/frontend/assets/{index-CMzfJKiW.js → index-DjQElpEg.js} +1 -1
  106. langflow/frontend/assets/{index-Dmu-X5-4.js → index-DmVt5Jlx.js} +1 -1
  107. langflow/frontend/assets/{index-CPHEscq9.js → index-DmYLDQag.js} +1 -1
  108. langflow/frontend/assets/{index-BKseQQ2I.js → index-DnlVWWU8.js} +1 -1
  109. langflow/frontend/assets/{index-D5ETnvJa.js → index-Dp7ZQyL3.js} +1 -1
  110. langflow/frontend/assets/{index-Co20d-eQ.js → index-DpWrk8mA.js} +1 -1
  111. langflow/frontend/assets/{index-CVl6MbaM.js → index-DrXXKzpD.js} +1 -1
  112. langflow/frontend/assets/{index-OwPvCmpW.js → index-Drg8me2a.js} +1 -1
  113. langflow/frontend/assets/{index-CVwWoX99.js → index-DsEZjOcp.js} +1 -1
  114. langflow/frontend/assets/{index-DwPkMTaY.js → index-DznH7Jbq.js} +1 -1
  115. langflow/frontend/assets/{index-CNw1H-Wc.js → index-GzOGB_fo.js} +1 -1
  116. langflow/frontend/assets/{index-C3l0zYn0.js → index-MVW4HTEk.js} +1 -1
  117. langflow/frontend/assets/{index-DhtZ5hx8.js → index-OsUvqIUr.js} +1 -1
  118. langflow/frontend/assets/{index-B2ptVQGM.js → index-RH_I78z_.js} +1 -1
  119. langflow/frontend/assets/{index-DdtMEn6I.js → index-RjeC0kaX.js} +1 -1
  120. langflow/frontend/assets/{index-hG24k5xJ.js → index-S-sc0Cm9.js} +1 -1
  121. langflow/frontend/assets/{index-Bg5nrMRh.js → index-S8uJXTOq.js} +1 -1
  122. langflow/frontend/assets/{index-m8QA6VNM.js → index-SB4rw8D5.js} +1 -1
  123. langflow/frontend/assets/{index-Du-pc0KE.js → index-YJsAl7vm.js} +1 -1
  124. langflow/frontend/assets/{index-DfDhMHgQ.js → index-ZjeocHyu.js} +1 -1
  125. langflow/frontend/assets/{index-Bnl6QHtP.js → index-_szO7sta.js} +1 -1
  126. langflow/frontend/assets/{index-xvFOmxx4.js → index-aAgSKWb3.js} +1 -1
  127. langflow/frontend/assets/{index-Db9dYSzy.js → index-aWnZIwHd.js} +1 -1
  128. langflow/frontend/assets/{index-BJy50PvP.js → index-bMhyLtgS.js} +1 -1
  129. langflow/frontend/assets/{index-Cqpzl1J4.js → index-cYFKmtmg.js} +1 -1
  130. langflow/frontend/assets/{index-CLJeJYjH.js → index-hg2y9OAt.js} +1 -1
  131. langflow/frontend/assets/{index-D7kquVv2.js → index-jwzN3Jd_.js} +1 -1
  132. langflow/frontend/assets/{index-BiC280Nx.js → index-k9jP5chN.js} +1 -1
  133. langflow/frontend/assets/{index-B3TANVes.js → index-lnF9Eqr2.js} +1 -1
  134. langflow/frontend/assets/{index-B4yCvZKV.js → index-mjwtJmkP.js} +1 -1
  135. langflow/frontend/assets/{index-CfwLpbMM.js → index-nw3WF9lY.js} +1 -1
  136. langflow/frontend/assets/{index-CUVDws8F.js → index-qiVTWUuf.js} +1 -1
  137. langflow/frontend/assets/{index-DTqbvGC0.js → index-uybez8MR.js} +1 -1
  138. langflow/frontend/assets/{index-Dfe7qfvf.js → index-v8eXbWlM.js} +1 -1
  139. langflow/frontend/assets/{index-B2Zgv_xv.js → index-xN8ogFdo.js} +1 -1
  140. langflow/frontend/assets/{index-BRg1f4Mu.js → index-xV6ystWy.js} +1 -1
  141. langflow/frontend/assets/{index-sI75DsdM.js → index-yyAaYjLR.js} +1 -1
  142. langflow/frontend/assets/lazyIconImports-Ci-S9xBA.js +2 -0
  143. langflow/frontend/assets/{use-post-add-user-C0MdTpQ5.js → use-post-add-user-JUeLDErC.js} +1 -1
  144. langflow/frontend/index.html +1 -1
  145. langflow/initial_setup/starter_projects/Hybrid Search RAG.json +1280 -1276
  146. langflow/initial_setup/starter_projects/Instagram Copywriter.json +1 -1
  147. langflow/initial_setup/starter_projects/Invoice Summarizer.json +1 -1
  148. langflow/initial_setup/starter_projects/Knowledge Ingestion.json +46 -47
  149. langflow/initial_setup/starter_projects/Knowledge Retrieval.json +73 -56
  150. langflow/initial_setup/starter_projects/Market Research.json +1 -1
  151. langflow/initial_setup/starter_projects/News Aggregator.json +1 -1
  152. langflow/initial_setup/starter_projects/Nvidia Remix.json +1 -1
  153. langflow/initial_setup/starter_projects/Pok/303/251dex Agent.json" +1 -1
  154. langflow/initial_setup/starter_projects/Price Deal Finder.json +1 -1
  155. langflow/initial_setup/starter_projects/Research Agent.json +1 -1
  156. langflow/initial_setup/starter_projects/SaaS Pricing.json +1 -1
  157. langflow/initial_setup/starter_projects/Search agent.json +1 -1
  158. langflow/initial_setup/starter_projects/Sequential Tasks Agents.json +3 -3
  159. langflow/initial_setup/starter_projects/Simple Agent.json +1 -1
  160. langflow/initial_setup/starter_projects/Social Media Agent.json +1 -1
  161. langflow/initial_setup/starter_projects/Travel Planning Agents.json +3 -3
  162. langflow/initial_setup/starter_projects/Vector Store RAG.json +1093 -1108
  163. langflow/initial_setup/starter_projects/Youtube Analysis.json +1 -1
  164. langflow/services/auth/utils.py +78 -1
  165. langflow/services/settings/auth.py +4 -0
  166. {langflow_base_nightly-0.5.1.dev0.dist-info → langflow_base_nightly-0.5.1.dev2.dist-info}/METADATA +1 -1
  167. {langflow_base_nightly-0.5.1.dev0.dist-info → langflow_base_nightly-0.5.1.dev2.dist-info}/RECORD +171 -169
  168. langflow/frontend/assets/lazyIconImports-D97HEZkE.js +0 -2
  169. /langflow/base/{data/kb_utils.py → knowledge_bases/knowledge_base_utils.py} +0 -0
  170. /langflow/components/datastax/{astradb.py → astradb_vectorstore.py} +0 -0
  171. {langflow_base_nightly-0.5.1.dev0.dist-info → langflow_base_nightly-0.5.1.dev2.dist-info}/WHEEL +0 -0
  172. {langflow_base_nightly-0.5.1.dev0.dist-info → langflow_base_nightly-0.5.1.dev2.dist-info}/entry_points.txt +0 -0
@@ -7,7 +7,7 @@
7
7
  "data": {
8
8
  "sourceHandle": {
9
9
  "dataType": "ParserComponent",
10
- "id": "ParserComponent-3Wxa2",
10
+ "id": "ParserComponent-0KvmM",
11
11
  "name": "parsed_text",
12
12
  "output_types": [
13
13
  "Message"
@@ -15,7 +15,7 @@
15
15
  },
16
16
  "targetHandle": {
17
17
  "fieldName": "input_value",
18
- "id": "ChatOutput-811h1",
18
+ "id": "ChatOutput-zViXc",
19
19
  "inputTypes": [
20
20
  "Data",
21
21
  "DataFrame",
@@ -24,56 +24,56 @@
24
24
  "type": "other"
25
25
  }
26
26
  },
27
- "id": "reactflow__edge-ParserComponent-3Wxa2{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-3Wxa2œ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-811h1{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-811h1œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
27
+ "id": "reactflow__edge-ParserComponent-0KvmM{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-0KvmMœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-zViXc{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-zViXcœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
28
28
  "selected": false,
29
- "source": "ParserComponent-3Wxa2",
30
- "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-3Wxa2œ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}",
31
- "target": "ChatOutput-811h1",
32
- "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-811h1œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
29
+ "source": "ParserComponent-0KvmM",
30
+ "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-0KvmMœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}",
31
+ "target": "ChatOutput-zViXc",
32
+ "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-zViXcœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
33
33
  },
34
34
  {
35
35
  "animated": false,
36
36
  "className": "",
37
37
  "data": {
38
38
  "sourceHandle": {
39
- "dataType": "ChatInput",
40
- "id": "ChatInput-uZ7jn",
41
- "name": "message",
39
+ "dataType": "LanguageModelComponent",
40
+ "id": "LanguageModelComponent-CRZxx",
41
+ "name": "text_output",
42
42
  "output_types": [
43
43
  "Message"
44
44
  ]
45
45
  },
46
46
  "targetHandle": {
47
- "fieldName": "search_query",
48
- "id": "AstraDB-7gXip",
47
+ "fieldName": "input_value",
48
+ "id": "StructuredOutput-AUzID",
49
49
  "inputTypes": [
50
50
  "Message"
51
51
  ],
52
- "type": "query"
52
+ "type": "str"
53
53
  }
54
54
  },
55
- "id": "reactflow__edge-ChatInput-uZ7jn{œdataTypeœ:œChatInputœ,œidœ:œChatInput-uZ7jnœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-7gXip{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-7gXipœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}",
55
+ "id": "reactflow__edge-LanguageModelComponent-CRZxx{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-CRZxxœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-StructuredOutput-AUzID{œfieldNameœ:œinput_valueœ,œidœ:œStructuredOutput-AUzIDœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
56
56
  "selected": false,
57
- "source": "ChatInput-uZ7jn",
58
- "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-uZ7jnœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
59
- "target": "AstraDB-7gXip",
60
- "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-7gXipœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}"
57
+ "source": "LanguageModelComponent-CRZxx",
58
+ "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-CRZxxœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}",
59
+ "target": "StructuredOutput-AUzID",
60
+ "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œStructuredOutput-AUzIDœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}"
61
61
  },
62
62
  {
63
63
  "animated": false,
64
64
  "className": "",
65
65
  "data": {
66
66
  "sourceHandle": {
67
- "dataType": "AstraDB",
68
- "id": "AstraDB-7gXip",
69
- "name": "dataframe",
67
+ "dataType": "StructuredOutput",
68
+ "id": "StructuredOutput-AUzID",
69
+ "name": "structured_output",
70
70
  "output_types": [
71
- "DataFrame"
71
+ "Data"
72
72
  ]
73
73
  },
74
74
  "targetHandle": {
75
75
  "fieldName": "input_data",
76
- "id": "ParserComponent-3Wxa2",
76
+ "id": "ParserComponent-6wYbr",
77
77
  "inputTypes": [
78
78
  "DataFrame",
79
79
  "Data"
@@ -81,12 +81,12 @@
81
81
  "type": "other"
82
82
  }
83
83
  },
84
- "id": "reactflow__edge-AstraDB-7gXip{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-7gXipœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-ParserComponent-3Wxa2{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-3Wxa2œ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
84
+ "id": "reactflow__edge-StructuredOutput-AUzID{œdataTypeœ:œStructuredOutputœ,œidœ:œStructuredOutput-AUzIDœ,œnameœ:œstructured_outputœ,œoutput_typesœ:[œDataœ]}-ParserComponent-6wYbr{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-6wYbrœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
85
85
  "selected": false,
86
- "source": "AstraDB-7gXip",
87
- "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-7gXipœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}",
88
- "target": "ParserComponent-3Wxa2",
89
- "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-3Wxa2œ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}"
86
+ "source": "StructuredOutput-AUzID",
87
+ "sourceHandle": "{œdataTypeœ: œStructuredOutputœ, œidœ: œStructuredOutput-AUzIDœ, œnameœ: œstructured_outputœ, œoutput_typesœ: [œDataœ]}",
88
+ "target": "ParserComponent-6wYbr",
89
+ "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-6wYbrœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}"
90
90
  },
91
91
  {
92
92
  "animated": false,
@@ -94,90 +94,112 @@
94
94
  "data": {
95
95
  "sourceHandle": {
96
96
  "dataType": "LanguageModelComponent",
97
- "id": "LanguageModelComponent-NEQ8S",
98
- "name": "text_output",
97
+ "id": "LanguageModelComponent-MD9V5",
98
+ "name": "model_output",
99
+ "output_types": [
100
+ "LanguageModel"
101
+ ]
102
+ },
103
+ "targetHandle": {
104
+ "fieldName": "llm",
105
+ "id": "StructuredOutput-AUzID",
106
+ "inputTypes": [
107
+ "LanguageModel"
108
+ ],
109
+ "type": "other"
110
+ }
111
+ },
112
+ "id": "reactflow__edge-LanguageModelComponent-MD9V5{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-MD9V5œ,œnameœ:œmodel_outputœ,œoutput_typesœ:[œLanguageModelœ]}-StructuredOutput-AUzID{œfieldNameœ:œllmœ,œidœ:œStructuredOutput-AUzIDœ,œinputTypesœ:[œLanguageModelœ],œtypeœ:œotherœ}",
113
+ "selected": false,
114
+ "source": "LanguageModelComponent-MD9V5",
115
+ "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-MD9V5œ, œnameœ: œmodel_outputœ, œoutput_typesœ: [œLanguageModelœ]}",
116
+ "target": "StructuredOutput-AUzID",
117
+ "targetHandle": "{œfieldNameœ: œllmœ, œidœ: œStructuredOutput-AUzIDœ, œinputTypesœ: [œLanguageModelœ], œtypeœ: œotherœ}"
118
+ },
119
+ {
120
+ "className": "",
121
+ "data": {
122
+ "sourceHandle": {
123
+ "dataType": "ParserComponent",
124
+ "id": "ParserComponent-6wYbr",
125
+ "name": "parsed_text",
99
126
  "output_types": [
100
127
  "Message"
101
128
  ]
102
129
  },
103
130
  "targetHandle": {
104
- "fieldName": "input_value",
105
- "id": "StructuredOutput-n8Y3t",
131
+ "fieldName": "lexical_terms",
132
+ "id": "AstraDB-93cal",
106
133
  "inputTypes": [
107
134
  "Message"
108
135
  ],
109
- "type": "str"
136
+ "type": "query"
110
137
  }
111
138
  },
112
- "id": "reactflow__edge-LanguageModelComponent-NEQ8S{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-NEQ8Sœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-StructuredOutput-n8Y3t{œfieldNameœ:œinput_valueœ,œidœ:œStructuredOutput-n8Y3tœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
113
- "selected": false,
114
- "source": "LanguageModelComponent-NEQ8S",
115
- "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-NEQ8Sœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}",
116
- "target": "StructuredOutput-n8Y3t",
117
- "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œStructuredOutput-n8Y3tœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}"
139
+ "id": "xy-edge__ParserComponent-6wYbr{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-6wYbrœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-AstraDB-93cal{œfieldNameœ:œlexical_termsœ,œidœ:œAstraDB-93calœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}",
140
+ "source": "ParserComponent-6wYbr",
141
+ "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-6wYbrœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}",
142
+ "target": "AstraDB-93cal",
143
+ "targetHandle": "{œfieldNameœ: œlexical_termsœ, œidœ: œAstraDB-93calœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}"
118
144
  },
119
145
  {
120
- "animated": false,
121
146
  "className": "",
122
147
  "data": {
123
148
  "sourceHandle": {
124
- "dataType": "StructuredOutput",
125
- "id": "StructuredOutput-n8Y3t",
126
- "name": "structured_output",
149
+ "dataType": "ChatInput",
150
+ "id": "ChatInput-2JUiB",
151
+ "name": "message",
127
152
  "output_types": [
128
- "Data"
153
+ "Message"
129
154
  ]
130
155
  },
131
156
  "targetHandle": {
132
- "fieldName": "input_data",
133
- "id": "ParserComponent-Kb474",
157
+ "fieldName": "search_query",
158
+ "id": "AstraDB-93cal",
134
159
  "inputTypes": [
135
- "DataFrame",
136
- "Data"
160
+ "Message"
137
161
  ],
138
- "type": "other"
162
+ "type": "query"
139
163
  }
140
164
  },
141
- "id": "reactflow__edge-StructuredOutput-n8Y3t{œdataTypeœ:œStructuredOutputœ,œidœ:œStructuredOutput-n8Y3tœ,œnameœ:œstructured_outputœ,œoutput_typesœ:[œDataœ]}-ParserComponent-Kb474{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-Kb474œ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
142
- "selected": false,
143
- "source": "StructuredOutput-n8Y3t",
144
- "sourceHandle": "{œdataTypeœ: œStructuredOutputœ, œidœ: œStructuredOutput-n8Y3tœ, œnameœ: œstructured_outputœ, œoutput_typesœ: [œDataœ]}",
145
- "target": "ParserComponent-Kb474",
146
- "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-Kb474œ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}"
165
+ "id": "xy-edge__ChatInput-2JUiB{œdataTypeœ:œChatInputœ,œidœ:œChatInput-2JUiBœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-93cal{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-93calœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}",
166
+ "source": "ChatInput-2JUiB",
167
+ "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-2JUiBœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
168
+ "target": "AstraDB-93cal",
169
+ "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-93calœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}"
147
170
  },
148
171
  {
149
- "animated": false,
150
172
  "className": "",
151
173
  "data": {
152
174
  "sourceHandle": {
153
- "dataType": "LanguageModelComponent",
154
- "id": "LanguageModelComponent-pB4iD",
155
- "name": "model_output",
175
+ "dataType": "AstraDB",
176
+ "id": "AstraDB-93cal",
177
+ "name": "search_results",
156
178
  "output_types": [
157
- "LanguageModel"
179
+ "Data"
158
180
  ]
159
181
  },
160
182
  "targetHandle": {
161
- "fieldName": "llm",
162
- "id": "StructuredOutput-n8Y3t",
183
+ "fieldName": "input_data",
184
+ "id": "ParserComponent-0KvmM",
163
185
  "inputTypes": [
164
- "LanguageModel"
186
+ "DataFrame",
187
+ "Data"
165
188
  ],
166
189
  "type": "other"
167
190
  }
168
191
  },
169
- "id": "reactflow__edge-LanguageModelComponent-pB4iD{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-pB4iDœ,œnameœ:œmodel_outputœ,œoutput_typesœ:[œLanguageModelœ]}-StructuredOutput-n8Y3t{œfieldNameœ:œllmœ,œidœ:œStructuredOutput-n8Y3tœ,œinputTypesœ:[œLanguageModelœ],œtypeœ:œotherœ}",
170
- "selected": false,
171
- "source": "LanguageModelComponent-pB4iD",
172
- "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-pB4iDœ, œnameœ: œmodel_outputœ, œoutput_typesœ: [œLanguageModelœ]}",
173
- "target": "StructuredOutput-n8Y3t",
174
- "targetHandle": "{œfieldNameœ: œllmœ, œidœ: œStructuredOutput-n8Y3tœ, œinputTypesœ: [œLanguageModelœ], œtypeœ: œotherœ}"
192
+ "id": "xy-edge__AstraDB-93cal{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-93calœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-ParserComponent-0KvmM{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-0KvmMœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
193
+ "source": "AstraDB-93cal",
194
+ "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-93calœ, œnameœ: œsearch_resultsœ, œoutput_typesœ: [œDataœ]}",
195
+ "target": "ParserComponent-0KvmM",
196
+ "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-0KvmMœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}"
175
197
  }
176
198
  ],
177
199
  "nodes": [
178
200
  {
179
201
  "data": {
180
- "id": "ChatInput-uZ7jn",
202
+ "id": "ChatInput-2JUiB",
181
203
  "node": {
182
204
  "base_classes": [
183
205
  "Message"
@@ -487,7 +509,7 @@
487
509
  "type": "ChatInput"
488
510
  },
489
511
  "dragging": false,
490
- "id": "ChatInput-uZ7jn",
512
+ "id": "ChatInput-2JUiB",
491
513
  "measured": {
492
514
  "height": 48,
493
515
  "width": 192
@@ -501,7 +523,7 @@
501
523
  },
502
524
  {
503
525
  "data": {
504
- "id": "ParserComponent-Kb474",
526
+ "id": "ParserComponent-6wYbr",
505
527
  "node": {
506
528
  "base_classes": [
507
529
  "Message"
@@ -672,7 +694,7 @@
672
694
  "type": "ParserComponent"
673
695
  },
674
696
  "dragging": false,
675
- "id": "ParserComponent-Kb474",
697
+ "id": "ParserComponent-6wYbr",
676
698
  "measured": {
677
699
  "height": 329,
678
700
  "width": 320
@@ -686,7 +708,7 @@
686
708
  },
687
709
  {
688
710
  "data": {
689
- "id": "ChatOutput-811h1",
711
+ "id": "ChatOutput-zViXc",
690
712
  "node": {
691
713
  "base_classes": [
692
714
  "Message"
@@ -1000,7 +1022,7 @@
1000
1022
  "type": "ChatOutput"
1001
1023
  },
1002
1024
  "dragging": false,
1003
- "id": "ChatOutput-811h1",
1025
+ "id": "ChatOutput-zViXc",
1004
1026
  "measured": {
1005
1027
  "height": 48,
1006
1028
  "width": 192
@@ -1014,7 +1036,7 @@
1014
1036
  },
1015
1037
  {
1016
1038
  "data": {
1017
- "id": "ParserComponent-3Wxa2",
1039
+ "id": "ParserComponent-0KvmM",
1018
1040
  "node": {
1019
1041
  "base_classes": [
1020
1042
  "Message"
@@ -1185,7 +1207,7 @@
1185
1207
  "type": "ParserComponent"
1186
1208
  },
1187
1209
  "dragging": false,
1188
- "id": "ParserComponent-3Wxa2",
1210
+ "id": "ParserComponent-0KvmM",
1189
1211
  "measured": {
1190
1212
  "height": 246,
1191
1213
  "width": 320
@@ -1199,48 +1221,32 @@
1199
1221
  },
1200
1222
  {
1201
1223
  "data": {
1202
- "id": "AstraDB-7gXip",
1224
+ "id": "LanguageModelComponent-CRZxx",
1203
1225
  "node": {
1204
1226
  "base_classes": [
1205
- "Data",
1206
- "DataFrame",
1207
- "VectorStore"
1227
+ "LanguageModel",
1228
+ "Message"
1208
1229
  ],
1209
1230
  "beta": false,
1210
1231
  "conditional_paths": [],
1211
1232
  "custom_fields": {},
1212
- "description": "Ingest and search documents in Astra DB",
1213
- "display_name": "Astra DB",
1214
- "documentation": "https://docs.datastax.com/en/langflow/astra-components.html",
1233
+ "description": "Runs a language model given a specified provider. ",
1234
+ "display_name": "Language Model",
1235
+ "documentation": "",
1215
1236
  "edited": false,
1216
1237
  "field_order": [
1217
- "token",
1218
- "environment",
1219
- "database_name",
1220
- "api_endpoint",
1221
- "keyspace",
1222
- "collection_name",
1223
- "embedding_model",
1224
- "ingest_data",
1225
- "search_query",
1226
- "should_cache_vector_store",
1227
- "search_method",
1228
- "reranker",
1229
- "lexical_terms",
1230
- "number_of_results",
1231
- "search_type",
1232
- "search_score_threshold",
1233
- "advanced_search_filter",
1234
- "autodetect_collection",
1235
- "content_field",
1236
- "deletion_field",
1237
- "ignore_invalid_documents",
1238
- "astradb_vectorstore_kwargs"
1238
+ "provider",
1239
+ "model_name",
1240
+ "api_key",
1241
+ "input_value",
1242
+ "system_message",
1243
+ "stream",
1244
+ "temperature"
1239
1245
  ],
1240
1246
  "frozen": false,
1241
- "icon": "AstraDB",
1247
+ "icon": "brain-circuit",
1248
+ "last_updated": "2025-08-26T16:33:20.961Z",
1242
1249
  "legacy": false,
1243
- "lf_version": "1.4.3",
1244
1250
  "metadata": {
1245
1251
  "code_hash": "23fbe9daca09",
1246
1252
  "dependencies": {
@@ -1272,97 +1278,89 @@
1272
1278
  {
1273
1279
  "allows_loop": false,
1274
1280
  "cache": true,
1275
- "display_name": "Search Results",
1276
- "group_outputs": false,
1277
- "method": "search_documents",
1278
- "name": "search_results",
1279
- "selected": "Data",
1280
- "tool_mode": true,
1281
- "types": [
1282
- "Data"
1283
- ],
1284
- "value": "__UNDEFINED__"
1285
- },
1286
- {
1287
- "allows_loop": false,
1288
- "cache": true,
1289
- "display_name": "DataFrame",
1281
+ "display_name": "Model Response",
1290
1282
  "group_outputs": false,
1291
- "method": "as_dataframe",
1292
- "name": "dataframe",
1293
- "selected": "DataFrame",
1283
+ "method": "text_response",
1284
+ "name": "text_output",
1285
+ "options": null,
1286
+ "required_inputs": null,
1287
+ "selected": "Message",
1294
1288
  "tool_mode": true,
1295
1289
  "types": [
1296
- "DataFrame"
1290
+ "Message"
1297
1291
  ],
1298
1292
  "value": "__UNDEFINED__"
1299
1293
  },
1300
1294
  {
1301
1295
  "allows_loop": false,
1302
1296
  "cache": true,
1303
- "display_name": "Vector Store Connection",
1297
+ "display_name": "Language Model",
1304
1298
  "group_outputs": false,
1305
- "hidden": true,
1306
- "method": "as_vector_store",
1307
- "name": "vectorstoreconnection",
1308
- "selected": "VectorStore",
1299
+ "method": "build_model",
1300
+ "name": "model_output",
1301
+ "options": null,
1302
+ "required_inputs": null,
1303
+ "selected": "LanguageModel",
1309
1304
  "tool_mode": true,
1310
1305
  "types": [
1311
- "VectorStore"
1306
+ "LanguageModel"
1312
1307
  ],
1313
1308
  "value": "__UNDEFINED__"
1314
1309
  }
1315
1310
  ],
1316
1311
  "pinned": false,
1312
+ "priority": 0,
1317
1313
  "template": {
1318
1314
  "_type": "Component",
1319
- "advanced_search_filter": {
1320
- "_input_type": "NestedDictInput",
1321
- "advanced": true,
1322
- "display_name": "Search Metadata Filter",
1315
+ "api_key": {
1316
+ "_input_type": "SecretStrInput",
1317
+ "advanced": false,
1318
+ "display_name": "OpenAI API Key",
1323
1319
  "dynamic": false,
1324
- "info": "Optional dictionary of filters to apply to the search query.",
1325
- "list": false,
1326
- "list_add_label": "Add More",
1327
- "name": "advanced_search_filter",
1320
+ "info": "Model Provider API key",
1321
+ "input_types": [],
1322
+ "load_from_db": true,
1323
+ "name": "api_key",
1324
+ "password": true,
1328
1325
  "placeholder": "",
1326
+ "real_time_refresh": true,
1329
1327
  "required": false,
1330
1328
  "show": true,
1331
1329
  "title_case": false,
1332
- "tool_mode": false,
1333
- "trace_as_input": true,
1334
- "trace_as_metadata": true,
1335
- "type": "NestedDict",
1336
- "value": {}
1330
+ "type": "str",
1331
+ "value": "OPENAI_API_KEY"
1337
1332
  },
1338
- "api_endpoint": {
1339
- "_input_type": "StrInput",
1340
- "advanced": false,
1341
- "display_name": "Astra DB API Endpoint",
1342
- "dynamic": false,
1343
- "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.",
1333
+ "code": {
1334
+ "advanced": true,
1335
+ "dynamic": true,
1336
+ "fileTypes": [],
1337
+ "file_path": "",
1338
+ "info": "",
1344
1339
  "list": false,
1345
- "list_add_label": "Add More",
1346
- "load_from_db": true,
1347
- "name": "api_endpoint",
1340
+ "load_from_db": false,
1341
+ "multiline": true,
1342
+ "name": "code",
1343
+ "password": false,
1348
1344
  "placeholder": "",
1349
- "required": false,
1350
- "show": false,
1345
+ "required": true,
1346
+ "show": true,
1351
1347
  "title_case": false,
1352
- "tool_mode": false,
1353
- "trace_as_metadata": true,
1354
- "type": "str",
1355
- "value": "ASTRA_DB_API_ENDPOINT"
1348
+ "type": "code",
1349
+ "value": "from typing import Any\n\nfrom langchain_anthropic import ChatAnthropic\nfrom langchain_google_genai import ChatGoogleGenerativeAI\nfrom langchain_openai import ChatOpenAI\n\nfrom langflow.base.models.anthropic_constants import ANTHROPIC_MODELS\nfrom langflow.base.models.google_generative_ai_constants import GOOGLE_GENERATIVE_AI_MODELS\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_CHAT_MODEL_NAMES, OPENAI_REASONING_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageInput, MultilineInput, SecretStrInput, SliderInput\nfrom langflow.schema.dotdict import dotdict\n\n\nclass LanguageModelComponent(LCModelComponent):\n display_name = \"Language Model\"\n description = \"Runs a language model given a specified provider.\"\n documentation: str = \"https://docs.langflow.org/components-models\"\n icon = \"brain-circuit\"\n category = \"models\"\n priority = 0 # Set priority to 0 to make it appear first\n\n inputs = [\n DropdownInput(\n name=\"provider\",\n display_name=\"Model Provider\",\n options=[\"OpenAI\", \"Anthropic\", \"Google\"],\n value=\"OpenAI\",\n info=\"Select the model provider\",\n real_time_refresh=True,\n options_metadata=[{\"icon\": \"OpenAI\"}, {\"icon\": \"Anthropic\"}, {\"icon\": \"GoogleGenerativeAI\"}],\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES,\n value=OPENAI_CHAT_MODEL_NAMES[0],\n info=\"Select the model to use\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"Model Provider API key\",\n required=False,\n show=True,\n real_time_refresh=True,\n ),\n MessageInput(\n name=\"input_value\",\n display_name=\"Input\",\n info=\"The input text to send to the model\",\n ),\n MultilineInput(\n name=\"system_message\",\n display_name=\"System Message\",\n info=\"A system message that helps set the behavior of the assistant\",\n advanced=False,\n ),\n BoolInput(\n name=\"stream\",\n display_name=\"Stream\",\n info=\"Whether to stream the response\",\n value=False,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n info=\"Controls randomness in responses\",\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n ]\n\n def build_model(self) -> LanguageModel:\n provider = self.provider\n model_name = self.model_name\n temperature = self.temperature\n stream = self.stream\n\n if provider == \"OpenAI\":\n if not self.api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n\n if model_name in OPENAI_REASONING_MODEL_NAMES:\n # reasoning models do not support temperature (yet)\n temperature = None\n\n return ChatOpenAI(\n model_name=model_name,\n temperature=temperature,\n streaming=stream,\n openai_api_key=self.api_key,\n )\n if provider == \"Anthropic\":\n if not self.api_key:\n msg = \"Anthropic API key is required when using Anthropic provider\"\n raise ValueError(msg)\n return ChatAnthropic(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n anthropic_api_key=self.api_key,\n )\n if provider == \"Google\":\n if not self.api_key:\n msg = \"Google API key is required when using Google provider\"\n raise ValueError(msg)\n return ChatGoogleGenerativeAI(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n google_api_key=self.api_key,\n )\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n if field_name == \"provider\":\n if field_value == \"OpenAI\":\n build_config[\"model_name\"][\"options\"] = OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES\n build_config[\"model_name\"][\"value\"] = OPENAI_CHAT_MODEL_NAMES[0]\n build_config[\"api_key\"][\"display_name\"] = \"OpenAI API Key\"\n elif field_value == \"Anthropic\":\n build_config[\"model_name\"][\"options\"] = ANTHROPIC_MODELS\n build_config[\"model_name\"][\"value\"] = ANTHROPIC_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Anthropic API Key\"\n elif field_value == \"Google\":\n build_config[\"model_name\"][\"options\"] = GOOGLE_GENERATIVE_AI_MODELS\n build_config[\"model_name\"][\"value\"] = GOOGLE_GENERATIVE_AI_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Google API Key\"\n elif field_name == \"model_name\" and field_value.startswith(\"o1\") and self.provider == \"OpenAI\":\n # Hide system_message for o1 models - currently unsupported\n if \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = False\n elif field_name == \"model_name\" and not field_value.startswith(\"o1\") and \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = True\n return build_config\n"
1356
1350
  },
1357
- "astradb_vectorstore_kwargs": {
1358
- "_input_type": "NestedDictInput",
1359
- "advanced": true,
1360
- "display_name": "AstraDBVectorStore Parameters",
1351
+ "input_value": {
1352
+ "_input_type": "MessageInput",
1353
+ "advanced": false,
1354
+ "display_name": "Input",
1361
1355
  "dynamic": false,
1362
- "info": "Optional dictionary of additional parameters for the AstraDBVectorStore.",
1356
+ "info": "The input text to send to the model",
1357
+ "input_types": [
1358
+ "Message"
1359
+ ],
1363
1360
  "list": false,
1364
1361
  "list_add_label": "Add More",
1365
- "name": "astradb_vectorstore_kwargs",
1362
+ "load_from_db": false,
1363
+ "name": "input_value",
1366
1364
  "placeholder": "",
1367
1365
  "required": false,
1368
1366
  "show": true,
@@ -1370,374 +1368,66 @@
1370
1368
  "tool_mode": false,
1371
1369
  "trace_as_input": true,
1372
1370
  "trace_as_metadata": true,
1373
- "type": "NestedDict",
1374
- "value": {}
1371
+ "type": "str",
1372
+ "value": "You are an AI system designed to extract structured information from unstructured text.Given the input_text, return a JSON object with predefined keys based on the expected structure.Extract values accurately and format them according to the specified type (e.g., string, integer, float, date).If a value is missing or cannot be determined, return a default (e.g., null, 0, or 'N/A').If multiple instances of the expected structure exist within the input_text, stream each as a separate JSON object."
1375
1373
  },
1376
- "autodetect_collection": {
1377
- "_input_type": "BoolInput",
1378
- "advanced": true,
1379
- "display_name": "Autodetect Collection",
1374
+ "model_name": {
1375
+ "_input_type": "DropdownInput",
1376
+ "advanced": false,
1377
+ "combobox": false,
1378
+ "dialog_inputs": {},
1379
+ "display_name": "Model Name",
1380
1380
  "dynamic": false,
1381
- "info": "Boolean flag to determine whether to autodetect the collection.",
1382
- "list": false,
1383
- "list_add_label": "Add More",
1384
- "name": "autodetect_collection",
1381
+ "info": "Select the model to use",
1382
+ "name": "model_name",
1383
+ "options": [
1384
+ "gpt-4o-mini",
1385
+ "gpt-4o",
1386
+ "gpt-4.1",
1387
+ "gpt-4.1-mini",
1388
+ "gpt-4.1-nano",
1389
+ "gpt-4.5-preview",
1390
+ "gpt-4-turbo",
1391
+ "gpt-4-turbo-preview",
1392
+ "gpt-4",
1393
+ "gpt-3.5-turbo"
1394
+ ],
1395
+ "options_metadata": [],
1385
1396
  "placeholder": "",
1386
1397
  "required": false,
1387
1398
  "show": true,
1388
1399
  "title_case": false,
1400
+ "toggle": false,
1389
1401
  "tool_mode": false,
1390
1402
  "trace_as_metadata": true,
1391
- "type": "bool",
1392
- "value": true
1403
+ "type": "str",
1404
+ "value": "gpt-4o-mini"
1393
1405
  },
1394
- "code": {
1395
- "advanced": true,
1396
- "dynamic": true,
1397
- "fileTypes": [],
1398
- "file_path": "",
1399
- "info": "",
1400
- "list": false,
1401
- "load_from_db": false,
1402
- "multiline": true,
1403
- "name": "code",
1404
- "password": false,
1405
- "placeholder": "",
1406
- "required": true,
1407
- "show": true,
1408
- "title_case": false,
1409
- "type": "code",
1410
- "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n '<a class=\"underline\" href=\"https://astra.datastax.com/\" target=\" _blank\" '\n 'rel=\"noopener noreferrer\">your database in Astra DB</a>'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f'<a class=\"underline\" target=\"_blank\" rel=\"noopener noreferrer\" '\n f'href=\"https://astra.datastax.com/org/{org_id}/database/{db_id}/data-explorer?createCollection=1&namespace={keyspace}\">'\n \"your database in Astra DB</a>.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n"
1411
- },
1412
- "collection_name": {
1413
- "_input_type": "DropdownInput",
1414
- "advanced": false,
1415
- "combobox": true,
1416
- "dialog_inputs": {
1417
- "fields": {
1418
- "data": {
1419
- "node": {
1420
- "description": "Please allow several seconds for creation to complete.",
1421
- "display_name": "Create new collection",
1422
- "field_order": [
1423
- "01_new_collection_name",
1424
- "02_embedding_generation_provider",
1425
- "03_embedding_generation_model",
1426
- "04_dimension"
1427
- ],
1428
- "name": "create_collection",
1429
- "template": {
1430
- "01_new_collection_name": {
1431
- "_input_type": "StrInput",
1432
- "advanced": false,
1433
- "display_name": "Name",
1434
- "dynamic": false,
1435
- "info": "Name of the new collection to create in Astra DB.",
1436
- "list": false,
1437
- "list_add_label": "Add More",
1438
- "load_from_db": false,
1439
- "name": "new_collection_name",
1440
- "placeholder": "",
1441
- "required": true,
1442
- "show": true,
1443
- "title_case": false,
1444
- "tool_mode": false,
1445
- "trace_as_metadata": true,
1446
- "type": "str",
1447
- "value": ""
1448
- },
1449
- "02_embedding_generation_provider": {
1450
- "_input_type": "DropdownInput",
1451
- "advanced": false,
1452
- "combobox": false,
1453
- "dialog_inputs": {},
1454
- "display_name": "Embedding generation method",
1455
- "dynamic": false,
1456
- "helper_text": "To create collections with more embedding provider options, go to <a class=\"underline\" target=\"_blank\" rel=\"noopener noreferrer\" href=\"https://astra.datastax.com/org/b7c08322-20a3-4516-80b0-5fa08655143e/database/f3166ac4-3e2f-4b32-880c-231d1f9e3f3e/data-explorer?createCollection=1&namespace=default_keyspace\">your database in Astra DB</a>.",
1457
- "info": "Provider to use for generating embeddings.",
1458
- "name": "embedding_generation_provider",
1459
- "options": [
1460
- "Bring your own",
1461
- "Nvidia"
1462
- ],
1463
- "options_metadata": [
1464
- {
1465
- "icon": "vectorstores"
1466
- },
1467
- {
1468
- "icon": "NVIDIA"
1469
- }
1470
- ],
1471
- "placeholder": "",
1472
- "real_time_refresh": true,
1473
- "required": true,
1474
- "show": true,
1475
- "title_case": false,
1476
- "toggle": false,
1477
- "tool_mode": false,
1478
- "trace_as_metadata": true,
1479
- "type": "str",
1480
- "value": ""
1481
- },
1482
- "03_embedding_generation_model": {
1483
- "_input_type": "DropdownInput",
1484
- "advanced": false,
1485
- "combobox": false,
1486
- "dialog_inputs": {},
1487
- "display_name": "Embedding model",
1488
- "dynamic": false,
1489
- "info": "Model to use for generating embeddings.",
1490
- "name": "embedding_generation_model",
1491
- "options": [],
1492
- "options_metadata": [],
1493
- "placeholder": null,
1494
- "readonly": "",
1495
- "real_time_refresh": true,
1496
- "required": true,
1497
- "show": true,
1498
- "title_case": false,
1499
- "toggle": false,
1500
- "tool_mode": false,
1501
- "trace_as_metadata": true,
1502
- "type": "str",
1503
- "value": null
1504
- },
1505
- "04_dimension": {
1506
- "_input_type": "IntInput",
1507
- "advanced": false,
1508
- "display_name": "Dimensions",
1509
- "dynamic": false,
1510
- "info": "Dimensions of the embeddings to generate.",
1511
- "list": false,
1512
- "list_add_label": "Add More",
1513
- "name": "dimension",
1514
- "placeholder": 1024,
1515
- "readonly": true,
1516
- "required": "",
1517
- "show": true,
1518
- "title_case": false,
1519
- "tool_mode": false,
1520
- "trace_as_metadata": true,
1521
- "type": "int",
1522
- "value": 1024
1523
- }
1524
- }
1525
- }
1526
- }
1527
- },
1528
- "functionality": "create"
1529
- },
1530
- "display_name": "Collection",
1531
- "dynamic": false,
1532
- "info": "The name of the collection within Astra DB where the vectors will be stored.",
1533
- "load_from_db": false,
1534
- "name": "collection_name",
1535
- "options": [],
1536
- "options_metadata": [],
1537
- "placeholder": "",
1538
- "real_time_refresh": true,
1539
- "refresh_button": true,
1540
- "required": true,
1541
- "show": false,
1542
- "title_case": false,
1543
- "toggle": false,
1544
- "tool_mode": false,
1545
- "trace_as_metadata": true,
1546
- "type": "str",
1547
- "value": ""
1548
- },
1549
- "content_field": {
1550
- "_input_type": "StrInput",
1551
- "advanced": true,
1552
- "display_name": "Content Field",
1553
- "dynamic": false,
1554
- "info": "Field to use as the text content field for the vector store.",
1555
- "list": false,
1556
- "list_add_label": "Add More",
1557
- "load_from_db": false,
1558
- "name": "content_field",
1559
- "placeholder": "",
1560
- "required": false,
1561
- "show": true,
1562
- "title_case": false,
1563
- "tool_mode": false,
1564
- "trace_as_metadata": true,
1565
- "type": "str",
1566
- "value": ""
1567
- },
1568
- "database_name": {
1406
+ "provider": {
1569
1407
  "_input_type": "DropdownInput",
1570
1408
  "advanced": false,
1571
- "combobox": true,
1572
- "dialog_inputs": {
1573
- "fields": {
1574
- "data": {
1575
- "node": {
1576
- "description": "Please allow several minutes for creation to complete.",
1577
- "display_name": "Create new database",
1578
- "field_order": [
1579
- "01_new_database_name",
1580
- "02_cloud_provider",
1581
- "03_region"
1582
- ],
1583
- "name": "create_database",
1584
- "template": {
1585
- "01_new_database_name": {
1586
- "_input_type": "StrInput",
1587
- "advanced": false,
1588
- "display_name": "Name",
1589
- "dynamic": false,
1590
- "info": "Name of the new database to create in Astra DB.",
1591
- "list": false,
1592
- "list_add_label": "Add More",
1593
- "load_from_db": false,
1594
- "name": "new_database_name",
1595
- "placeholder": "",
1596
- "required": true,
1597
- "show": true,
1598
- "title_case": false,
1599
- "tool_mode": false,
1600
- "trace_as_metadata": true,
1601
- "type": "str",
1602
- "value": ""
1603
- },
1604
- "02_cloud_provider": {
1605
- "_input_type": "DropdownInput",
1606
- "advanced": false,
1607
- "combobox": false,
1608
- "dialog_inputs": {},
1609
- "display_name": "Cloud provider",
1610
- "dynamic": false,
1611
- "info": "Cloud provider for the new database.",
1612
- "name": "cloud_provider",
1613
- "options": [
1614
- "Amazon Web Services",
1615
- "Google Cloud Platform",
1616
- "Microsoft Azure"
1617
- ],
1618
- "options_metadata": [],
1619
- "placeholder": "",
1620
- "real_time_refresh": true,
1621
- "required": true,
1622
- "show": true,
1623
- "title_case": false,
1624
- "toggle": false,
1625
- "tool_mode": false,
1626
- "trace_as_metadata": true,
1627
- "type": "str",
1628
- "value": ""
1629
- },
1630
- "03_region": {
1631
- "_input_type": "DropdownInput",
1632
- "advanced": false,
1633
- "combobox": false,
1634
- "dialog_inputs": {},
1635
- "display_name": "Region",
1636
- "dynamic": false,
1637
- "info": "Region for the new database.",
1638
- "name": "region",
1639
- "options": [],
1640
- "options_metadata": [],
1641
- "placeholder": "",
1642
- "required": true,
1643
- "show": true,
1644
- "title_case": false,
1645
- "toggle": false,
1646
- "tool_mode": false,
1647
- "trace_as_metadata": true,
1648
- "type": "str",
1649
- "value": ""
1650
- }
1651
- }
1652
- }
1653
- }
1654
- },
1655
- "functionality": "create"
1656
- },
1657
- "display_name": "Database",
1658
- "dynamic": false,
1659
- "info": "The Database name for the Astra DB instance.",
1660
- "load_from_db": false,
1661
- "name": "database_name",
1662
- "options": [],
1663
- "options_metadata": [
1664
- {
1665
- "api_endpoint": "https://5b8bb22c-4a38-4f0a-865c-a18ed7590bd1-us-east-2.apps.astra.datastax.com",
1666
- "collections": 5,
1667
- "keyspaces": [
1668
- "default_keyspace",
1669
- "samples_dataflow"
1670
- ],
1671
- "org_id": "260f986d-e65c-4f05-94a3-7cebfcb867a3",
1672
- "status": null
1673
- }
1674
- ],
1675
- "placeholder": "",
1676
- "real_time_refresh": true,
1677
- "refresh_button": true,
1678
- "required": true,
1679
- "show": true,
1680
- "title_case": false,
1681
- "toggle": false,
1682
- "tool_mode": false,
1683
- "trace_as_metadata": true,
1684
- "type": "str",
1685
- "value": ""
1686
- },
1687
- "deletion_field": {
1688
- "_input_type": "StrInput",
1689
- "advanced": true,
1690
- "display_name": "Deletion Based On Field",
1691
- "dynamic": false,
1692
- "info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.",
1693
- "list": false,
1694
- "list_add_label": "Add More",
1695
- "load_from_db": false,
1696
- "name": "deletion_field",
1697
- "placeholder": "",
1698
- "required": false,
1699
- "show": true,
1700
- "title_case": false,
1701
- "tool_mode": false,
1702
- "trace_as_metadata": true,
1703
- "type": "str",
1704
- "value": ""
1705
- },
1706
- "embedding_model": {
1707
- "_input_type": "HandleInput",
1708
- "advanced": false,
1709
- "display_name": "Embedding Model",
1710
- "dynamic": false,
1711
- "info": "Specify the Embedding Model. Not required for Astra Vectorize collections.",
1712
- "input_types": [
1713
- "Embeddings"
1714
- ],
1715
- "list": false,
1716
- "list_add_label": "Add More",
1717
- "name": "embedding_model",
1718
- "placeholder": "",
1719
- "required": false,
1720
- "show": false,
1721
- "title_case": false,
1722
- "trace_as_metadata": true,
1723
- "type": "other",
1724
- "value": ""
1725
- },
1726
- "environment": {
1727
- "_input_type": "DropdownInput",
1728
- "advanced": true,
1729
- "combobox": true,
1409
+ "combobox": false,
1730
1410
  "dialog_inputs": {},
1731
- "display_name": "Environment",
1411
+ "display_name": "Model Provider",
1732
1412
  "dynamic": false,
1733
- "info": "The environment for the Astra DB API Endpoint.",
1734
- "name": "environment",
1413
+ "info": "Select the model provider",
1414
+ "name": "provider",
1735
1415
  "options": [
1736
- "prod",
1737
- "test",
1738
- "dev"
1416
+ "OpenAI",
1417
+ "Anthropic",
1418
+ "Google"
1419
+ ],
1420
+ "options_metadata": [
1421
+ {
1422
+ "icon": "OpenAI"
1423
+ },
1424
+ {
1425
+ "icon": "Anthropic"
1426
+ },
1427
+ {
1428
+ "icon": "Google"
1429
+ }
1739
1430
  ],
1740
- "options_metadata": [],
1741
1431
  "placeholder": "",
1742
1432
  "real_time_refresh": true,
1743
1433
  "required": false,
@@ -1747,17 +1437,17 @@
1747
1437
  "tool_mode": false,
1748
1438
  "trace_as_metadata": true,
1749
1439
  "type": "str",
1750
- "value": "prod"
1440
+ "value": "OpenAI"
1751
1441
  },
1752
- "ignore_invalid_documents": {
1442
+ "stream": {
1753
1443
  "_input_type": "BoolInput",
1754
1444
  "advanced": true,
1755
- "display_name": "Ignore Invalid Documents",
1445
+ "display_name": "Stream",
1756
1446
  "dynamic": false,
1757
- "info": "Boolean flag to determine whether to ignore invalid documents at runtime.",
1447
+ "info": "Whether to stream the response",
1758
1448
  "list": false,
1759
1449
  "list_add_label": "Add More",
1760
- "name": "ignore_invalid_documents",
1450
+ "name": "stream",
1761
1451
  "placeholder": "",
1762
1452
  "required": false,
1763
1453
  "show": true,
@@ -1767,140 +1457,269 @@
1767
1457
  "type": "bool",
1768
1458
  "value": false
1769
1459
  },
1770
- "ingest_data": {
1771
- "_input_type": "HandleInput",
1772
- "advanced": false,
1773
- "display_name": "Ingest Data",
1460
+ "system_message": {
1461
+ "_input_type": "MultilineInput",
1462
+ "advanced": true,
1463
+ "copy_field": false,
1464
+ "display_name": "System Message",
1774
1465
  "dynamic": false,
1775
- "info": "",
1466
+ "info": "A system message that helps set the behavior of the assistant",
1776
1467
  "input_types": [
1777
- "Data",
1778
- "DataFrame"
1468
+ "Message"
1779
1469
  ],
1780
- "list": true,
1470
+ "list": false,
1781
1471
  "list_add_label": "Add More",
1782
- "name": "ingest_data",
1472
+ "load_from_db": false,
1473
+ "multiline": true,
1474
+ "name": "system_message",
1783
1475
  "placeholder": "",
1784
1476
  "required": false,
1785
1477
  "show": true,
1786
1478
  "title_case": false,
1479
+ "tool_mode": false,
1480
+ "trace_as_input": true,
1787
1481
  "trace_as_metadata": true,
1788
- "type": "other",
1482
+ "type": "str",
1789
1483
  "value": ""
1790
1484
  },
1791
- "keyspace": {
1792
- "_input_type": "DropdownInput",
1485
+ "temperature": {
1486
+ "_input_type": "SliderInput",
1793
1487
  "advanced": true,
1794
- "combobox": false,
1795
- "dialog_inputs": {},
1796
- "display_name": "Keyspace",
1488
+ "display_name": "Temperature",
1797
1489
  "dynamic": false,
1798
- "info": "Optional keyspace within Astra DB to use for the collection.",
1799
- "load_from_db": false,
1800
- "name": "keyspace",
1801
- "options": [],
1802
- "options_metadata": [],
1490
+ "info": "Controls randomness in responses",
1491
+ "max_label": "",
1492
+ "max_label_icon": "",
1493
+ "min_label": "",
1494
+ "min_label_icon": "",
1495
+ "name": "temperature",
1803
1496
  "placeholder": "",
1804
- "real_time_refresh": true,
1497
+ "range_spec": {
1498
+ "max": 1,
1499
+ "min": 0,
1500
+ "step": 0.01,
1501
+ "step_type": "float"
1502
+ },
1805
1503
  "required": false,
1806
1504
  "show": true,
1505
+ "slider_buttons": false,
1506
+ "slider_buttons_options": [],
1507
+ "slider_input": false,
1807
1508
  "title_case": false,
1808
- "toggle": false,
1809
1509
  "tool_mode": false,
1810
- "trace_as_metadata": true,
1811
- "type": "str",
1812
- "value": "default_keyspace"
1510
+ "type": "slider",
1511
+ "value": 0.1
1512
+ }
1513
+ },
1514
+ "tool_mode": false
1515
+ },
1516
+ "selected_output": "text_output",
1517
+ "showNode": true,
1518
+ "type": "LanguageModelComponent"
1519
+ },
1520
+ "dragging": false,
1521
+ "id": "LanguageModelComponent-CRZxx",
1522
+ "measured": {
1523
+ "height": 451,
1524
+ "width": 320
1525
+ },
1526
+ "position": {
1527
+ "x": 320.756607335245,
1528
+ "y": 486.0770655861057
1529
+ },
1530
+ "selected": false,
1531
+ "type": "genericNode"
1532
+ },
1533
+ {
1534
+ "data": {
1535
+ "id": "LanguageModelComponent-MD9V5",
1536
+ "node": {
1537
+ "base_classes": [
1538
+ "LanguageModel",
1539
+ "Message"
1540
+ ],
1541
+ "beta": false,
1542
+ "conditional_paths": [],
1543
+ "custom_fields": {},
1544
+ "description": "Runs a language model given a specified provider. ",
1545
+ "display_name": "Language Model",
1546
+ "documentation": "",
1547
+ "edited": false,
1548
+ "field_order": [
1549
+ "provider",
1550
+ "model_name",
1551
+ "api_key",
1552
+ "input_value",
1553
+ "system_message",
1554
+ "stream",
1555
+ "temperature"
1556
+ ],
1557
+ "frozen": false,
1558
+ "icon": "brain-circuit",
1559
+ "last_updated": "2025-08-26T16:33:20.962Z",
1560
+ "legacy": false,
1561
+ "metadata": {
1562
+ "keywords": [
1563
+ "model",
1564
+ "llm",
1565
+ "language model",
1566
+ "large language model"
1567
+ ]
1568
+ },
1569
+ "minimized": false,
1570
+ "output_types": [],
1571
+ "outputs": [
1572
+ {
1573
+ "allows_loop": false,
1574
+ "cache": true,
1575
+ "display_name": "Model Response",
1576
+ "group_outputs": false,
1577
+ "method": "text_response",
1578
+ "name": "text_output",
1579
+ "options": null,
1580
+ "required_inputs": null,
1581
+ "selected": "Message",
1582
+ "tool_mode": true,
1583
+ "types": [
1584
+ "Message"
1585
+ ],
1586
+ "value": "__UNDEFINED__"
1813
1587
  },
1814
- "lexical_terms": {
1815
- "_input_type": "QueryInput",
1588
+ {
1589
+ "allows_loop": false,
1590
+ "cache": true,
1591
+ "display_name": "Language Model",
1592
+ "group_outputs": false,
1593
+ "method": "build_model",
1594
+ "name": "model_output",
1595
+ "options": null,
1596
+ "required_inputs": null,
1597
+ "selected": "LanguageModel",
1598
+ "tool_mode": true,
1599
+ "types": [
1600
+ "LanguageModel"
1601
+ ],
1602
+ "value": "__UNDEFINED__"
1603
+ }
1604
+ ],
1605
+ "pinned": false,
1606
+ "priority": 0,
1607
+ "template": {
1608
+ "_type": "Component",
1609
+ "api_key": {
1610
+ "_input_type": "SecretStrInput",
1816
1611
  "advanced": false,
1817
- "display_name": "Lexical Terms",
1612
+ "display_name": "OpenAI API Key",
1818
1613
  "dynamic": false,
1819
- "info": "Add additional terms/keywords to augment search precision.",
1820
- "input_types": [
1821
- "Message"
1822
- ],
1823
- "list": false,
1824
- "list_add_label": "Add More",
1825
- "load_from_db": false,
1826
- "name": "lexical_terms",
1827
- "placeholder": "Enter terms to search...",
1614
+ "info": "Model Provider API key",
1615
+ "input_types": [],
1616
+ "load_from_db": true,
1617
+ "name": "api_key",
1618
+ "password": true,
1619
+ "placeholder": "",
1620
+ "real_time_refresh": true,
1828
1621
  "required": false,
1829
- "separator": " ",
1830
- "show": false,
1622
+ "show": true,
1831
1623
  "title_case": false,
1832
- "tool_mode": false,
1833
- "trace_as_input": true,
1834
- "trace_as_metadata": true,
1835
- "type": "query",
1836
- "value": ""
1624
+ "type": "str",
1625
+ "value": "OPENAI_API_KEY"
1837
1626
  },
1838
- "number_of_results": {
1839
- "_input_type": "IntInput",
1627
+ "code": {
1840
1628
  "advanced": true,
1841
- "display_name": "Number of Search Results",
1629
+ "dynamic": true,
1630
+ "fileTypes": [],
1631
+ "file_path": "",
1632
+ "info": "",
1633
+ "list": false,
1634
+ "load_from_db": false,
1635
+ "multiline": true,
1636
+ "name": "code",
1637
+ "password": false,
1638
+ "placeholder": "",
1639
+ "required": true,
1640
+ "show": true,
1641
+ "title_case": false,
1642
+ "type": "code",
1643
+ "value": "from typing import Any\n\nfrom langchain_anthropic import ChatAnthropic\nfrom langchain_google_genai import ChatGoogleGenerativeAI\nfrom langchain_openai import ChatOpenAI\n\nfrom langflow.base.models.anthropic_constants import ANTHROPIC_MODELS\nfrom langflow.base.models.google_generative_ai_constants import GOOGLE_GENERATIVE_AI_MODELS\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_CHAT_MODEL_NAMES, OPENAI_REASONING_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageInput, MultilineInput, SecretStrInput, SliderInput\nfrom langflow.schema.dotdict import dotdict\n\n\nclass LanguageModelComponent(LCModelComponent):\n display_name = \"Language Model\"\n description = \"Runs a language model given a specified provider.\"\n documentation: str = \"https://docs.langflow.org/components-models\"\n icon = \"brain-circuit\"\n category = \"models\"\n priority = 0 # Set priority to 0 to make it appear first\n\n inputs = [\n DropdownInput(\n name=\"provider\",\n display_name=\"Model Provider\",\n options=[\"OpenAI\", \"Anthropic\", \"Google\"],\n value=\"OpenAI\",\n info=\"Select the model provider\",\n real_time_refresh=True,\n options_metadata=[{\"icon\": \"OpenAI\"}, {\"icon\": \"Anthropic\"}, {\"icon\": \"GoogleGenerativeAI\"}],\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES,\n value=OPENAI_CHAT_MODEL_NAMES[0],\n info=\"Select the model to use\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"Model Provider API key\",\n required=False,\n show=True,\n real_time_refresh=True,\n ),\n MessageInput(\n name=\"input_value\",\n display_name=\"Input\",\n info=\"The input text to send to the model\",\n ),\n MultilineInput(\n name=\"system_message\",\n display_name=\"System Message\",\n info=\"A system message that helps set the behavior of the assistant\",\n advanced=False,\n ),\n BoolInput(\n name=\"stream\",\n display_name=\"Stream\",\n info=\"Whether to stream the response\",\n value=False,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n info=\"Controls randomness in responses\",\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n ]\n\n def build_model(self) -> LanguageModel:\n provider = self.provider\n model_name = self.model_name\n temperature = self.temperature\n stream = self.stream\n\n if provider == \"OpenAI\":\n if not self.api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n\n if model_name in OPENAI_REASONING_MODEL_NAMES:\n # reasoning models do not support temperature (yet)\n temperature = None\n\n return ChatOpenAI(\n model_name=model_name,\n temperature=temperature,\n streaming=stream,\n openai_api_key=self.api_key,\n )\n if provider == \"Anthropic\":\n if not self.api_key:\n msg = \"Anthropic API key is required when using Anthropic provider\"\n raise ValueError(msg)\n return ChatAnthropic(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n anthropic_api_key=self.api_key,\n )\n if provider == \"Google\":\n if not self.api_key:\n msg = \"Google API key is required when using Google provider\"\n raise ValueError(msg)\n return ChatGoogleGenerativeAI(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n google_api_key=self.api_key,\n )\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n if field_name == \"provider\":\n if field_value == \"OpenAI\":\n build_config[\"model_name\"][\"options\"] = OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES\n build_config[\"model_name\"][\"value\"] = OPENAI_CHAT_MODEL_NAMES[0]\n build_config[\"api_key\"][\"display_name\"] = \"OpenAI API Key\"\n elif field_value == \"Anthropic\":\n build_config[\"model_name\"][\"options\"] = ANTHROPIC_MODELS\n build_config[\"model_name\"][\"value\"] = ANTHROPIC_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Anthropic API Key\"\n elif field_value == \"Google\":\n build_config[\"model_name\"][\"options\"] = GOOGLE_GENERATIVE_AI_MODELS\n build_config[\"model_name\"][\"value\"] = GOOGLE_GENERATIVE_AI_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Google API Key\"\n elif field_name == \"model_name\" and field_value.startswith(\"o1\") and self.provider == \"OpenAI\":\n # Hide system_message for o1 models - currently unsupported\n if \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = False\n elif field_name == \"model_name\" and not field_value.startswith(\"o1\") and \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = True\n return build_config\n"
1644
+ },
1645
+ "input_value": {
1646
+ "_input_type": "MessageInput",
1647
+ "advanced": false,
1648
+ "display_name": "Input",
1842
1649
  "dynamic": false,
1843
- "info": "Number of search results to return.",
1650
+ "info": "The input text to send to the model",
1651
+ "input_types": [
1652
+ "Message"
1653
+ ],
1844
1654
  "list": false,
1845
1655
  "list_add_label": "Add More",
1846
- "name": "number_of_results",
1656
+ "load_from_db": false,
1657
+ "name": "input_value",
1847
1658
  "placeholder": "",
1848
1659
  "required": false,
1849
1660
  "show": true,
1850
1661
  "title_case": false,
1851
1662
  "tool_mode": false,
1663
+ "trace_as_input": true,
1852
1664
  "trace_as_metadata": true,
1853
- "type": "int",
1854
- "value": 4
1665
+ "type": "str",
1666
+ "value": ""
1855
1667
  },
1856
- "reranker": {
1668
+ "model_name": {
1857
1669
  "_input_type": "DropdownInput",
1858
1670
  "advanced": false,
1859
1671
  "combobox": false,
1860
1672
  "dialog_inputs": {},
1861
- "display_name": "Reranker",
1862
- "dynamic": false,
1863
- "info": "Post-retrieval model that re-scores results for optimal relevance ranking.",
1864
- "load_from_db": false,
1865
- "name": "reranker",
1866
- "options": [],
1867
- "options_metadata": [
1868
- {
1869
- "icon": "NVIDIA"
1870
- }
1673
+ "display_name": "Model Name",
1674
+ "dynamic": false,
1675
+ "info": "Select the model to use",
1676
+ "name": "model_name",
1677
+ "options": [
1678
+ "gpt-4o-mini",
1679
+ "gpt-4o",
1680
+ "gpt-4.1",
1681
+ "gpt-4.1-mini",
1682
+ "gpt-4.1-nano",
1683
+ "gpt-4.5-preview",
1684
+ "gpt-4-turbo",
1685
+ "gpt-4-turbo-preview",
1686
+ "gpt-4",
1687
+ "gpt-3.5-turbo"
1871
1688
  ],
1689
+ "options_metadata": [],
1872
1690
  "placeholder": "",
1873
1691
  "required": false,
1874
1692
  "show": true,
1875
1693
  "title_case": false,
1876
- "toggle": true,
1877
- "toggle_disable": true,
1878
- "toggle_value": true,
1694
+ "toggle": false,
1879
1695
  "tool_mode": false,
1880
1696
  "trace_as_metadata": true,
1881
1697
  "type": "str",
1882
- "value": "nvidia/llama-3.2-nv-rerankqa-1b-v2"
1698
+ "value": "gpt-4o-mini"
1883
1699
  },
1884
- "search_method": {
1700
+ "provider": {
1885
1701
  "_input_type": "DropdownInput",
1886
- "advanced": true,
1702
+ "advanced": false,
1887
1703
  "combobox": false,
1888
1704
  "dialog_inputs": {},
1889
- "display_name": "Search Method",
1705
+ "display_name": "Model Provider",
1890
1706
  "dynamic": false,
1891
- "info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.",
1892
- "load_from_db": false,
1893
- "name": "search_method",
1707
+ "info": "Select the model provider",
1708
+ "name": "provider",
1894
1709
  "options": [
1895
- "Hybrid Search",
1896
- "Vector Search"
1710
+ "OpenAI",
1711
+ "Anthropic",
1712
+ "Google"
1897
1713
  ],
1898
1714
  "options_metadata": [
1899
1715
  {
1900
- "icon": "SearchHybrid"
1716
+ "icon": "OpenAI"
1901
1717
  },
1902
1718
  {
1903
- "icon": "SearchVector"
1719
+ "icon": "Anthropic"
1720
+ },
1721
+ {
1722
+ "icon": "Google"
1904
1723
  }
1905
1724
  ],
1906
1725
  "placeholder": "",
@@ -1912,164 +1731,143 @@
1912
1731
  "tool_mode": false,
1913
1732
  "trace_as_metadata": true,
1914
1733
  "type": "str",
1915
- "value": "Hybrid Search"
1916
- },
1917
- "search_query": {
1918
- "_input_type": "QueryInput",
1919
- "advanced": false,
1920
- "display_name": "Search Query",
1921
- "dynamic": false,
1922
- "info": "Enter a query to run a similarity search.",
1923
- "input_types": [
1924
- "Message"
1925
- ],
1926
- "list": false,
1927
- "list_add_label": "Add More",
1928
- "load_from_db": false,
1929
- "name": "search_query",
1930
- "placeholder": "Enter a query...",
1931
- "required": false,
1932
- "show": true,
1933
- "title_case": false,
1934
- "tool_mode": true,
1935
- "trace_as_input": true,
1936
- "trace_as_metadata": true,
1937
- "type": "query",
1938
- "value": ""
1734
+ "value": "OpenAI"
1939
1735
  },
1940
- "search_score_threshold": {
1941
- "_input_type": "FloatInput",
1736
+ "stream": {
1737
+ "_input_type": "BoolInput",
1942
1738
  "advanced": true,
1943
- "display_name": "Search Score Threshold",
1739
+ "display_name": "Stream",
1944
1740
  "dynamic": false,
1945
- "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')",
1741
+ "info": "Whether to stream the response",
1946
1742
  "list": false,
1947
1743
  "list_add_label": "Add More",
1948
- "name": "search_score_threshold",
1744
+ "name": "stream",
1949
1745
  "placeholder": "",
1950
1746
  "required": false,
1951
1747
  "show": true,
1952
1748
  "title_case": false,
1953
1749
  "tool_mode": false,
1954
1750
  "trace_as_metadata": true,
1955
- "type": "float",
1956
- "value": 0
1751
+ "type": "bool",
1752
+ "value": false
1957
1753
  },
1958
- "search_type": {
1959
- "_input_type": "DropdownInput",
1754
+ "system_message": {
1755
+ "_input_type": "MultilineInput",
1960
1756
  "advanced": true,
1961
- "combobox": false,
1962
- "dialog_inputs": {},
1963
- "display_name": "Search Type",
1757
+ "copy_field": false,
1758
+ "display_name": "System Message",
1964
1759
  "dynamic": false,
1965
- "info": "Search type to use",
1966
- "name": "search_type",
1967
- "options": [
1968
- "Similarity",
1969
- "Similarity with score threshold",
1970
- "MMR (Max Marginal Relevance)"
1760
+ "info": "A system message that helps set the behavior of the assistant",
1761
+ "input_types": [
1762
+ "Message"
1971
1763
  ],
1972
- "options_metadata": [],
1764
+ "list": false,
1765
+ "list_add_label": "Add More",
1766
+ "load_from_db": false,
1767
+ "multiline": true,
1768
+ "name": "system_message",
1973
1769
  "placeholder": "",
1974
1770
  "required": false,
1975
1771
  "show": true,
1976
1772
  "title_case": false,
1977
- "toggle": false,
1978
1773
  "tool_mode": false,
1774
+ "trace_as_input": true,
1979
1775
  "trace_as_metadata": true,
1980
1776
  "type": "str",
1981
- "value": "Similarity"
1777
+ "value": ""
1982
1778
  },
1983
- "should_cache_vector_store": {
1984
- "_input_type": "BoolInput",
1779
+ "temperature": {
1780
+ "_input_type": "SliderInput",
1985
1781
  "advanced": true,
1986
- "display_name": "Cache Vector Store",
1782
+ "display_name": "Temperature",
1987
1783
  "dynamic": false,
1988
- "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.",
1989
- "list": false,
1990
- "list_add_label": "Add More",
1991
- "name": "should_cache_vector_store",
1784
+ "info": "Controls randomness in responses",
1785
+ "max_label": "",
1786
+ "max_label_icon": "",
1787
+ "min_label": "",
1788
+ "min_label_icon": "",
1789
+ "name": "temperature",
1992
1790
  "placeholder": "",
1791
+ "range_spec": {
1792
+ "max": 1,
1793
+ "min": 0,
1794
+ "step": 0.01,
1795
+ "step_type": "float"
1796
+ },
1993
1797
  "required": false,
1994
1798
  "show": true,
1799
+ "slider_buttons": false,
1800
+ "slider_buttons_options": [],
1801
+ "slider_input": false,
1995
1802
  "title_case": false,
1996
1803
  "tool_mode": false,
1997
- "trace_as_metadata": true,
1998
- "type": "bool",
1999
- "value": true
2000
- },
2001
- "token": {
2002
- "_input_type": "SecretStrInput",
2003
- "advanced": false,
2004
- "display_name": "Astra DB Application Token",
2005
- "dynamic": false,
2006
- "info": "Authentication token for accessing Astra DB.",
2007
- "input_types": [],
2008
- "load_from_db": true,
2009
- "name": "token",
2010
- "password": true,
2011
- "placeholder": "",
2012
- "real_time_refresh": true,
2013
- "required": true,
2014
- "show": true,
2015
- "title_case": false,
2016
- "type": "str",
2017
- "value": "ASTRA_DB_APPLICATION_TOKEN"
1804
+ "type": "slider",
1805
+ "value": 0.1
2018
1806
  }
2019
1807
  },
2020
1808
  "tool_mode": false
2021
1809
  },
2022
- "selected_output": "dataframe",
1810
+ "selected_output": "model_output",
2023
1811
  "showNode": true,
2024
- "type": "AstraDB"
1812
+ "type": "LanguageModelComponent"
2025
1813
  },
2026
1814
  "dragging": false,
2027
- "id": "AstraDB-7gXip",
1815
+ "id": "LanguageModelComponent-MD9V5",
2028
1816
  "measured": {
2029
- "height": 540,
1817
+ "height": 451,
2030
1818
  "width": 320
2031
1819
  },
2032
1820
  "position": {
2033
- "x": 1548.269269836593,
2034
- "y": 162.5619344372189
1821
+ "x": 322.5971643968167,
1822
+ "y": -36.64113990031162
2035
1823
  },
2036
1824
  "selected": false,
2037
1825
  "type": "genericNode"
2038
1826
  },
2039
1827
  {
2040
1828
  "data": {
2041
- "id": "LanguageModelComponent-NEQ8S",
1829
+ "id": "StructuredOutput-AUzID",
2042
1830
  "node": {
2043
1831
  "base_classes": [
2044
- "LanguageModel",
2045
- "Message"
1832
+ "Data"
2046
1833
  ],
2047
1834
  "beta": false,
2048
1835
  "conditional_paths": [],
2049
1836
  "custom_fields": {},
2050
- "description": "Runs a language model given a specified provider. ",
2051
- "display_name": "Language Model",
1837
+ "description": "Uses an LLM to generate structured data. Ideal for extraction and consistency.",
1838
+ "display_name": "Structured Output",
2052
1839
  "documentation": "",
2053
1840
  "edited": false,
2054
1841
  "field_order": [
2055
- "provider",
2056
- "model_name",
2057
- "api_key",
1842
+ "llm",
2058
1843
  "input_value",
2059
- "system_message",
2060
- "stream",
2061
- "temperature"
1844
+ "system_prompt",
1845
+ "schema_name",
1846
+ "output_schema"
2062
1847
  ],
2063
1848
  "frozen": false,
2064
- "icon": "brain-circuit",
1849
+ "icon": "braces",
2065
1850
  "legacy": false,
2066
1851
  "metadata": {
2067
- "keywords": [
2068
- "model",
2069
- "llm",
2070
- "language model",
2071
- "large language model"
2072
- ]
1852
+ "code_hash": "ad2a6f4552c0",
1853
+ "dependencies": {
1854
+ "dependencies": [
1855
+ {
1856
+ "name": "pydantic",
1857
+ "version": "2.10.6"
1858
+ },
1859
+ {
1860
+ "name": "trustcall",
1861
+ "version": "0.0.39"
1862
+ },
1863
+ {
1864
+ "name": "langflow",
1865
+ "version": null
1866
+ }
1867
+ ],
1868
+ "total_dependencies": 3
1869
+ },
1870
+ "module": "langflow.components.processing.structured_output.StructuredOutputComponent"
2073
1871
  },
2074
1872
  "minimized": false,
2075
1873
  "output_types": [],
@@ -2077,58 +1875,35 @@
2077
1875
  {
2078
1876
  "allows_loop": false,
2079
1877
  "cache": true,
2080
- "display_name": "Model Response",
1878
+ "display_name": "Structured Output",
2081
1879
  "group_outputs": false,
2082
- "method": "text_response",
2083
- "name": "text_output",
2084
- "options": null,
2085
- "required_inputs": null,
2086
- "selected": "Message",
1880
+ "method": "build_structured_output",
1881
+ "name": "structured_output",
1882
+ "selected": "Data",
2087
1883
  "tool_mode": true,
2088
1884
  "types": [
2089
- "Message"
1885
+ "Data"
2090
1886
  ],
2091
1887
  "value": "__UNDEFINED__"
2092
1888
  },
2093
1889
  {
2094
1890
  "allows_loop": false,
2095
1891
  "cache": true,
2096
- "display_name": "Language Model",
2097
- "group_outputs": false,
2098
- "method": "build_model",
2099
- "name": "model_output",
2100
- "options": null,
2101
- "required_inputs": null,
2102
- "selected": "LanguageModel",
2103
- "tool_mode": true,
2104
- "types": [
2105
- "LanguageModel"
2106
- ],
2107
- "value": "__UNDEFINED__"
2108
- }
2109
- ],
2110
- "pinned": false,
2111
- "priority": 0,
2112
- "template": {
2113
- "_type": "Component",
2114
- "api_key": {
2115
- "_input_type": "SecretStrInput",
2116
- "advanced": false,
2117
- "display_name": "OpenAI API Key",
2118
- "dynamic": false,
2119
- "info": "Model Provider API key",
2120
- "input_types": [],
2121
- "load_from_db": true,
2122
- "name": "api_key",
2123
- "password": true,
2124
- "placeholder": "",
2125
- "real_time_refresh": true,
2126
- "required": false,
2127
- "show": true,
2128
- "title_case": false,
2129
- "type": "str",
2130
- "value": "OPENAI_API_KEY"
2131
- },
1892
+ "display_name": "Structured Output",
1893
+ "group_outputs": false,
1894
+ "method": "build_structured_dataframe",
1895
+ "name": "dataframe_output",
1896
+ "selected": null,
1897
+ "tool_mode": true,
1898
+ "types": [
1899
+ "DataFrame"
1900
+ ],
1901
+ "value": "__UNDEFINED__"
1902
+ }
1903
+ ],
1904
+ "pinned": false,
1905
+ "template": {
1906
+ "_type": "Component",
2132
1907
  "code": {
2133
1908
  "advanced": true,
2134
1909
  "dynamic": true,
@@ -2145,14 +1920,14 @@
2145
1920
  "show": true,
2146
1921
  "title_case": false,
2147
1922
  "type": "code",
2148
- "value": "from typing import Any\n\nfrom langchain_anthropic import ChatAnthropic\nfrom langchain_google_genai import ChatGoogleGenerativeAI\nfrom langchain_openai import ChatOpenAI\n\nfrom langflow.base.models.anthropic_constants import ANTHROPIC_MODELS\nfrom langflow.base.models.google_generative_ai_constants import GOOGLE_GENERATIVE_AI_MODELS\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_CHAT_MODEL_NAMES, OPENAI_REASONING_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageInput, MultilineInput, SecretStrInput, SliderInput\nfrom langflow.schema.dotdict import dotdict\n\n\nclass LanguageModelComponent(LCModelComponent):\n display_name = \"Language Model\"\n description = \"Runs a language model given a specified provider.\"\n documentation: str = \"https://docs.langflow.org/components-models\"\n icon = \"brain-circuit\"\n category = \"models\"\n priority = 0 # Set priority to 0 to make it appear first\n\n inputs = [\n DropdownInput(\n name=\"provider\",\n display_name=\"Model Provider\",\n options=[\"OpenAI\", \"Anthropic\", \"Google\"],\n value=\"OpenAI\",\n info=\"Select the model provider\",\n real_time_refresh=True,\n options_metadata=[{\"icon\": \"OpenAI\"}, {\"icon\": \"Anthropic\"}, {\"icon\": \"GoogleGenerativeAI\"}],\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES,\n value=OPENAI_CHAT_MODEL_NAMES[0],\n info=\"Select the model to use\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"Model Provider API key\",\n required=False,\n show=True,\n real_time_refresh=True,\n ),\n MessageInput(\n name=\"input_value\",\n display_name=\"Input\",\n info=\"The input text to send to the model\",\n ),\n MultilineInput(\n name=\"system_message\",\n display_name=\"System Message\",\n info=\"A system message that helps set the behavior of the assistant\",\n advanced=False,\n ),\n BoolInput(\n name=\"stream\",\n display_name=\"Stream\",\n info=\"Whether to stream the response\",\n value=False,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n info=\"Controls randomness in responses\",\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n ]\n\n def build_model(self) -> LanguageModel:\n provider = self.provider\n model_name = self.model_name\n temperature = self.temperature\n stream = self.stream\n\n if provider == \"OpenAI\":\n if not self.api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n\n if model_name in OPENAI_REASONING_MODEL_NAMES:\n # reasoning models do not support temperature (yet)\n temperature = None\n\n return ChatOpenAI(\n model_name=model_name,\n temperature=temperature,\n streaming=stream,\n openai_api_key=self.api_key,\n )\n if provider == \"Anthropic\":\n if not self.api_key:\n msg = \"Anthropic API key is required when using Anthropic provider\"\n raise ValueError(msg)\n return ChatAnthropic(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n anthropic_api_key=self.api_key,\n )\n if provider == \"Google\":\n if not self.api_key:\n msg = \"Google API key is required when using Google provider\"\n raise ValueError(msg)\n return ChatGoogleGenerativeAI(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n google_api_key=self.api_key,\n )\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n if field_name == \"provider\":\n if field_value == \"OpenAI\":\n build_config[\"model_name\"][\"options\"] = OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES\n build_config[\"model_name\"][\"value\"] = OPENAI_CHAT_MODEL_NAMES[0]\n build_config[\"api_key\"][\"display_name\"] = \"OpenAI API Key\"\n elif field_value == \"Anthropic\":\n build_config[\"model_name\"][\"options\"] = ANTHROPIC_MODELS\n build_config[\"model_name\"][\"value\"] = ANTHROPIC_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Anthropic API Key\"\n elif field_value == \"Google\":\n build_config[\"model_name\"][\"options\"] = GOOGLE_GENERATIVE_AI_MODELS\n build_config[\"model_name\"][\"value\"] = GOOGLE_GENERATIVE_AI_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Google API Key\"\n elif field_name == \"model_name\" and field_value.startswith(\"o1\") and self.provider == \"OpenAI\":\n # Hide system_message for o1 models - currently unsupported\n if \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = False\n elif field_name == \"model_name\" and not field_value.startswith(\"o1\") and \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = True\n return build_config\n"
1923
+ "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n documentation: str = \"https://docs.langflow.org/components-processing#structured-output\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n Output(\n name=\"dataframe_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_dataframe\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) == 1:\n return Data(data=output[0])\n if len(output) > 1:\n # Multiple outputs - wrap them in a results container\n return Data(data={\"results\": output})\n return Data()\n\n def build_structured_dataframe(self) -> DataFrame:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n data_list = [Data(data=output[0])] if len(output) == 1 else [Data(data=item) for item in output]\n\n return DataFrame(data_list)\n"
2149
1924
  },
2150
1925
  "input_value": {
2151
- "_input_type": "MessageInput",
1926
+ "_input_type": "MessageTextInput",
2152
1927
  "advanced": false,
2153
- "display_name": "Input",
1928
+ "display_name": "Input Message",
2154
1929
  "dynamic": false,
2155
- "info": "The input text to send to the model",
1930
+ "info": "The input message to the language model.",
2156
1931
  "input_types": [
2157
1932
  "Message"
2158
1933
  ],
@@ -2161,108 +1936,156 @@
2161
1936
  "load_from_db": false,
2162
1937
  "name": "input_value",
2163
1938
  "placeholder": "",
2164
- "required": false,
1939
+ "required": true,
2165
1940
  "show": true,
2166
1941
  "title_case": false,
2167
- "tool_mode": false,
1942
+ "tool_mode": true,
2168
1943
  "trace_as_input": true,
2169
1944
  "trace_as_metadata": true,
2170
1945
  "type": "str",
2171
- "value": "You are an AI system designed to extract structured information from unstructured text.Given the input_text, return a JSON object with predefined keys based on the expected structure.Extract values accurately and format them according to the specified type (e.g., string, integer, float, date).If a value is missing or cannot be determined, return a default (e.g., null, 0, or 'N/A').If multiple instances of the expected structure exist within the input_text, stream each as a separate JSON object."
1946
+ "value": ""
2172
1947
  },
2173
- "model_name": {
2174
- "_input_type": "DropdownInput",
1948
+ "llm": {
1949
+ "_input_type": "HandleInput",
2175
1950
  "advanced": false,
2176
- "combobox": false,
2177
- "dialog_inputs": {},
2178
- "display_name": "Model Name",
1951
+ "display_name": "Language Model",
2179
1952
  "dynamic": false,
2180
- "info": "Select the model to use",
2181
- "name": "model_name",
2182
- "options": [
2183
- "gpt-4o-mini",
2184
- "gpt-4o",
2185
- "gpt-4.1",
2186
- "gpt-4.1-mini",
2187
- "gpt-4.1-nano",
2188
- "gpt-4.5-preview",
2189
- "gpt-4-turbo",
2190
- "gpt-4-turbo-preview",
2191
- "gpt-4",
2192
- "gpt-3.5-turbo"
1953
+ "info": "The language model to use to generate the structured output.",
1954
+ "input_types": [
1955
+ "LanguageModel"
2193
1956
  ],
2194
- "options_metadata": [],
1957
+ "list": false,
1958
+ "list_add_label": "Add More",
1959
+ "name": "llm",
2195
1960
  "placeholder": "",
2196
- "required": false,
1961
+ "required": true,
2197
1962
  "show": true,
2198
1963
  "title_case": false,
2199
- "toggle": false,
2200
- "tool_mode": false,
2201
1964
  "trace_as_metadata": true,
2202
- "type": "str",
2203
- "value": "gpt-4o-mini"
1965
+ "type": "other",
1966
+ "value": ""
2204
1967
  },
2205
- "provider": {
2206
- "_input_type": "DropdownInput",
1968
+ "output_schema": {
1969
+ "_input_type": "TableInput",
2207
1970
  "advanced": false,
2208
- "combobox": false,
2209
- "dialog_inputs": {},
2210
- "display_name": "Model Provider",
1971
+ "display_name": "Output Schema",
2211
1972
  "dynamic": false,
2212
- "info": "Select the model provider",
2213
- "name": "provider",
2214
- "options": [
2215
- "OpenAI",
2216
- "Anthropic",
2217
- "Google"
2218
- ],
2219
- "options_metadata": [
2220
- {
2221
- "icon": "OpenAI"
2222
- },
2223
- {
2224
- "icon": "Anthropic"
2225
- },
2226
- {
2227
- "icon": "Google"
2228
- }
2229
- ],
1973
+ "info": "Define the structure and data types for the model's output.",
1974
+ "is_list": true,
1975
+ "list_add_label": "Add More",
1976
+ "name": "output_schema",
2230
1977
  "placeholder": "",
2231
- "real_time_refresh": true,
2232
- "required": false,
1978
+ "required": true,
2233
1979
  "show": true,
1980
+ "table_icon": "Table",
1981
+ "table_schema": {
1982
+ "columns": [
1983
+ {
1984
+ "default": "field",
1985
+ "description": "Specify the name of the output field.",
1986
+ "disable_edit": false,
1987
+ "display_name": "Name",
1988
+ "edit_mode": "inline",
1989
+ "filterable": true,
1990
+ "formatter": "text",
1991
+ "hidden": false,
1992
+ "name": "name",
1993
+ "sortable": true,
1994
+ "type": "str"
1995
+ },
1996
+ {
1997
+ "default": "description of field",
1998
+ "description": "Describe the purpose of the output field.",
1999
+ "disable_edit": false,
2000
+ "display_name": "Description",
2001
+ "edit_mode": "popover",
2002
+ "filterable": true,
2003
+ "formatter": "text",
2004
+ "hidden": false,
2005
+ "name": "description",
2006
+ "sortable": true,
2007
+ "type": "str"
2008
+ },
2009
+ {
2010
+ "default": "str",
2011
+ "description": "Indicate the data type of the output field (e.g., str, int, float, bool, dict).",
2012
+ "disable_edit": false,
2013
+ "display_name": "Type",
2014
+ "edit_mode": "inline",
2015
+ "filterable": true,
2016
+ "formatter": "text",
2017
+ "hidden": false,
2018
+ "name": "type",
2019
+ "options": [
2020
+ "str",
2021
+ "int",
2022
+ "float",
2023
+ "bool",
2024
+ "dict"
2025
+ ],
2026
+ "sortable": true,
2027
+ "type": "str"
2028
+ },
2029
+ {
2030
+ "default": false,
2031
+ "description": "Set to True if this output field should be a list of the specified type.",
2032
+ "disable_edit": false,
2033
+ "display_name": "As List",
2034
+ "edit_mode": "inline",
2035
+ "filterable": true,
2036
+ "formatter": "boolean",
2037
+ "hidden": false,
2038
+ "name": "multiple",
2039
+ "sortable": true,
2040
+ "type": "boolean"
2041
+ }
2042
+ ]
2043
+ },
2234
2044
  "title_case": false,
2235
- "toggle": false,
2236
2045
  "tool_mode": false,
2237
2046
  "trace_as_metadata": true,
2238
- "type": "str",
2239
- "value": "OpenAI"
2047
+ "trigger_icon": "Table",
2048
+ "trigger_text": "Open table",
2049
+ "type": "table",
2050
+ "value": [
2051
+ {
2052
+ "description": "description of field",
2053
+ "multiple": "False",
2054
+ "name": "field",
2055
+ "type": "str"
2056
+ }
2057
+ ]
2240
2058
  },
2241
- "stream": {
2242
- "_input_type": "BoolInput",
2059
+ "schema_name": {
2060
+ "_input_type": "MessageTextInput",
2243
2061
  "advanced": true,
2244
- "display_name": "Stream",
2062
+ "display_name": "Schema Name",
2245
2063
  "dynamic": false,
2246
- "info": "Whether to stream the response",
2064
+ "info": "Provide a name for the output data schema.",
2065
+ "input_types": [
2066
+ "Message"
2067
+ ],
2247
2068
  "list": false,
2248
2069
  "list_add_label": "Add More",
2249
- "name": "stream",
2070
+ "load_from_db": false,
2071
+ "name": "schema_name",
2250
2072
  "placeholder": "",
2251
2073
  "required": false,
2252
2074
  "show": true,
2253
2075
  "title_case": false,
2254
2076
  "tool_mode": false,
2077
+ "trace_as_input": true,
2255
2078
  "trace_as_metadata": true,
2256
- "type": "bool",
2257
- "value": false
2079
+ "type": "str",
2080
+ "value": ""
2258
2081
  },
2259
- "system_message": {
2082
+ "system_prompt": {
2260
2083
  "_input_type": "MultilineInput",
2261
2084
  "advanced": true,
2262
2085
  "copy_field": false,
2263
- "display_name": "System Message",
2086
+ "display_name": "Format Instructions",
2264
2087
  "dynamic": false,
2265
- "info": "A system message that helps set the behavior of the assistant",
2088
+ "info": "The instructions to the language model for formatting the output.",
2266
2089
  "input_types": [
2267
2090
  "Message"
2268
2091
  ],
@@ -2270,99 +2093,130 @@
2270
2093
  "list_add_label": "Add More",
2271
2094
  "load_from_db": false,
2272
2095
  "multiline": true,
2273
- "name": "system_message",
2096
+ "name": "system_prompt",
2274
2097
  "placeholder": "",
2275
- "required": false,
2098
+ "required": true,
2276
2099
  "show": true,
2277
2100
  "title_case": false,
2278
2101
  "tool_mode": false,
2279
2102
  "trace_as_input": true,
2280
2103
  "trace_as_metadata": true,
2281
2104
  "type": "str",
2282
- "value": ""
2283
- },
2284
- "temperature": {
2285
- "_input_type": "SliderInput",
2286
- "advanced": true,
2287
- "display_name": "Temperature",
2288
- "dynamic": false,
2289
- "info": "Controls randomness in responses",
2290
- "max_label": "",
2291
- "max_label_icon": "",
2292
- "min_label": "",
2293
- "min_label_icon": "",
2294
- "name": "temperature",
2295
- "placeholder": "",
2296
- "range_spec": {
2297
- "max": 1,
2298
- "min": 0,
2299
- "step": 0.01,
2300
- "step_type": "float"
2301
- },
2302
- "required": false,
2303
- "show": true,
2304
- "slider_buttons": false,
2305
- "slider_buttons_options": [],
2306
- "slider_input": false,
2307
- "title_case": false,
2308
- "tool_mode": false,
2309
- "type": "slider",
2310
- "value": 0.1
2105
+ "value": "You are an AI that extracts structured JSON objects from unstructured text. Use a predefined schema with expected types (str, int, float, bool, dict). Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. Fill missing or ambiguous values with defaults: null for missing values. Remove exact duplicates but keep variations that have different field values. Always return valid JSON in the expected format, never throw errors. If multiple objects can be extracted, return them all in the structured format."
2311
2106
  }
2312
2107
  },
2313
2108
  "tool_mode": false
2314
2109
  },
2315
- "selected_output": "text_output",
2316
- "showNode": true,
2317
- "type": "LanguageModelComponent"
2110
+ "selected_output": "structured_output",
2111
+ "showNode": true,
2112
+ "type": "StructuredOutput"
2113
+ },
2114
+ "dragging": false,
2115
+ "id": "StructuredOutput-AUzID",
2116
+ "measured": {
2117
+ "height": 349,
2118
+ "width": 320
2119
+ },
2120
+ "position": {
2121
+ "x": 735.3215653605321,
2122
+ "y": 423.7970360460631
2123
+ },
2124
+ "selected": false,
2125
+ "type": "genericNode"
2126
+ },
2127
+ {
2128
+ "data": {
2129
+ "id": "note-IkRDS",
2130
+ "node": {
2131
+ "description": "# Hybrid Search RAG\n\nHybrid search performs a vector similarity search and a lexical search, compares the results of both searches, and then returns the most relevant results overall.\n\n## Prerequisites\n\n* An [OpenAI API key](https://platform.openai.com/)\n* An [Astra DB Application Token](https://docs.datastax.com/en/astra-db-serverless/databases/create-database.html) for the Astra DB component.\n\n## Quickstart\n\n1. In the Astra DB component, add your Astra DB Application Token.\nThis connects Langflow to your Astra database.\n2. Select an Astra collection that is hybrid-enabled.\nFor more information, see the [Datastax documentation](https://docs.datastax.com/en/astra-db-serverless/databases/hybrid-search.html).\nThe connection appears between the Parser component and the Astra DB component when a vector database is connected.\n3. Ensure the **Lexical Terms** and **Parsed Text** ports are connected.\n4. Add your OpenAI API key in the **Language Model** model component.\n5. Open the Playground and ask a question, like \"What are the features of my data?\"",
2132
+ "display_name": "",
2133
+ "documentation": "",
2134
+ "template": {
2135
+ "backgroundColor": "blue"
2136
+ }
2137
+ },
2138
+ "type": "note"
2318
2139
  },
2319
2140
  "dragging": false,
2320
- "id": "LanguageModelComponent-NEQ8S",
2141
+ "id": "note-IkRDS",
2321
2142
  "measured": {
2322
- "height": 451,
2323
- "width": 320
2143
+ "height": 601,
2144
+ "width": 575
2324
2145
  },
2325
2146
  "position": {
2326
- "x": 320.756607335245,
2327
- "y": 486.0770655861057
2147
+ "x": 816.3801044575429,
2148
+ "y": -279.19595575780494
2328
2149
  },
2329
2150
  "selected": false,
2330
- "type": "genericNode"
2151
+ "type": "noteNode"
2331
2152
  },
2332
2153
  {
2333
2154
  "data": {
2334
- "id": "LanguageModelComponent-pB4iD",
2155
+ "id": "AstraDB-93cal",
2335
2156
  "node": {
2336
2157
  "base_classes": [
2337
- "LanguageModel",
2338
- "Message"
2158
+ "Data",
2159
+ "DataFrame",
2160
+ "VectorStore"
2339
2161
  ],
2340
2162
  "beta": false,
2341
2163
  "conditional_paths": [],
2342
2164
  "custom_fields": {},
2343
- "description": "Runs a language model given a specified provider. ",
2344
- "display_name": "Language Model",
2345
- "documentation": "",
2165
+ "description": "Ingest and search documents in Astra DB",
2166
+ "display_name": "Astra DB",
2167
+ "documentation": "https://docs.datastax.com/en/langflow/astra-components.html",
2346
2168
  "edited": false,
2347
2169
  "field_order": [
2348
- "provider",
2349
- "model_name",
2350
- "api_key",
2351
- "input_value",
2352
- "system_message",
2353
- "stream",
2354
- "temperature"
2170
+ "token",
2171
+ "environment",
2172
+ "database_name",
2173
+ "api_endpoint",
2174
+ "keyspace",
2175
+ "collection_name",
2176
+ "embedding_model",
2177
+ "ingest_data",
2178
+ "search_query",
2179
+ "should_cache_vector_store",
2180
+ "search_method",
2181
+ "reranker",
2182
+ "lexical_terms",
2183
+ "number_of_results",
2184
+ "search_type",
2185
+ "search_score_threshold",
2186
+ "advanced_search_filter",
2187
+ "autodetect_collection",
2188
+ "content_field",
2189
+ "deletion_field",
2190
+ "ignore_invalid_documents",
2191
+ "astradb_vectorstore_kwargs"
2355
2192
  ],
2356
2193
  "frozen": false,
2357
- "icon": "brain-circuit",
2194
+ "icon": "AstraDB",
2358
2195
  "legacy": false,
2359
2196
  "metadata": {
2360
- "keywords": [
2361
- "model",
2362
- "llm",
2363
- "language model",
2364
- "large language model"
2365
- ]
2197
+ "code_hash": "23fbe9daca09",
2198
+ "dependencies": {
2199
+ "dependencies": [
2200
+ {
2201
+ "name": "astrapy",
2202
+ "version": "2.0.1"
2203
+ },
2204
+ {
2205
+ "name": "langchain_astradb",
2206
+ "version": "0.6.0"
2207
+ },
2208
+ {
2209
+ "name": "langchain_core",
2210
+ "version": "0.3.75"
2211
+ },
2212
+ {
2213
+ "name": "langflow",
2214
+ "version": null
2215
+ }
2216
+ ],
2217
+ "total_dependencies": 4
2218
+ },
2219
+ "module": "langflow.components.datastax.astradb_vectorstore.AstraDBVectorStoreComponent"
2366
2220
  },
2367
2221
  "minimized": false,
2368
2222
  "output_types": [],
@@ -2370,156 +2224,444 @@
2370
2224
  {
2371
2225
  "allows_loop": false,
2372
2226
  "cache": true,
2373
- "display_name": "Model Response",
2227
+ "display_name": "Search Results",
2374
2228
  "group_outputs": false,
2375
- "method": "text_response",
2376
- "name": "text_output",
2377
- "options": null,
2378
- "required_inputs": null,
2379
- "selected": "Message",
2229
+ "method": "search_documents",
2230
+ "name": "search_results",
2231
+ "selected": "Data",
2380
2232
  "tool_mode": true,
2381
2233
  "types": [
2382
- "Message"
2234
+ "Data"
2383
2235
  ],
2384
2236
  "value": "__UNDEFINED__"
2385
2237
  },
2386
2238
  {
2387
2239
  "allows_loop": false,
2388
2240
  "cache": true,
2389
- "display_name": "Language Model",
2241
+ "display_name": "DataFrame",
2390
2242
  "group_outputs": false,
2391
- "method": "build_model",
2392
- "name": "model_output",
2393
- "options": null,
2394
- "required_inputs": null,
2395
- "selected": "LanguageModel",
2243
+ "method": "as_dataframe",
2244
+ "name": "dataframe",
2245
+ "selected": "DataFrame",
2396
2246
  "tool_mode": true,
2397
2247
  "types": [
2398
- "LanguageModel"
2248
+ "DataFrame"
2249
+ ],
2250
+ "value": "__UNDEFINED__"
2251
+ },
2252
+ {
2253
+ "allows_loop": false,
2254
+ "cache": true,
2255
+ "display_name": "Vector Store Connection",
2256
+ "group_outputs": false,
2257
+ "hidden": true,
2258
+ "method": "as_vector_store",
2259
+ "name": "vectorstoreconnection",
2260
+ "selected": "VectorStore",
2261
+ "tool_mode": true,
2262
+ "types": [
2263
+ "VectorStore"
2399
2264
  ],
2400
2265
  "value": "__UNDEFINED__"
2401
2266
  }
2402
2267
  ],
2403
2268
  "pinned": false,
2404
- "priority": 0,
2405
2269
  "template": {
2406
2270
  "_type": "Component",
2407
- "api_key": {
2408
- "_input_type": "SecretStrInput",
2271
+ "advanced_search_filter": {
2272
+ "_input_type": "NestedDictInput",
2273
+ "advanced": true,
2274
+ "display_name": "Search Metadata Filter",
2275
+ "dynamic": false,
2276
+ "info": "Optional dictionary of filters to apply to the search query.",
2277
+ "list": false,
2278
+ "list_add_label": "Add More",
2279
+ "name": "advanced_search_filter",
2280
+ "placeholder": "",
2281
+ "required": false,
2282
+ "show": true,
2283
+ "title_case": false,
2284
+ "tool_mode": false,
2285
+ "trace_as_input": true,
2286
+ "trace_as_metadata": true,
2287
+ "type": "NestedDict",
2288
+ "value": {}
2289
+ },
2290
+ "api_endpoint": {
2291
+ "_input_type": "DropdownInput",
2292
+ "advanced": true,
2293
+ "combobox": false,
2294
+ "dialog_inputs": {},
2295
+ "display_name": "Astra DB API Endpoint",
2296
+ "dynamic": false,
2297
+ "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.",
2298
+ "name": "api_endpoint",
2299
+ "options": [],
2300
+ "options_metadata": [],
2301
+ "placeholder": "",
2302
+ "required": false,
2303
+ "show": true,
2304
+ "title_case": false,
2305
+ "toggle": false,
2306
+ "tool_mode": false,
2307
+ "trace_as_metadata": true,
2308
+ "type": "str",
2309
+ "value": ""
2310
+ },
2311
+ "astradb_vectorstore_kwargs": {
2312
+ "_input_type": "NestedDictInput",
2313
+ "advanced": true,
2314
+ "display_name": "AstraDBVectorStore Parameters",
2315
+ "dynamic": false,
2316
+ "info": "Optional dictionary of additional parameters for the AstraDBVectorStore.",
2317
+ "list": false,
2318
+ "list_add_label": "Add More",
2319
+ "name": "astradb_vectorstore_kwargs",
2320
+ "placeholder": "",
2321
+ "required": false,
2322
+ "show": true,
2323
+ "title_case": false,
2324
+ "tool_mode": false,
2325
+ "trace_as_input": true,
2326
+ "trace_as_metadata": true,
2327
+ "type": "NestedDict",
2328
+ "value": {}
2329
+ },
2330
+ "autodetect_collection": {
2331
+ "_input_type": "BoolInput",
2332
+ "advanced": true,
2333
+ "display_name": "Autodetect Collection",
2334
+ "dynamic": false,
2335
+ "info": "Boolean flag to determine whether to autodetect the collection.",
2336
+ "list": false,
2337
+ "list_add_label": "Add More",
2338
+ "name": "autodetect_collection",
2339
+ "placeholder": "",
2340
+ "required": false,
2341
+ "show": true,
2342
+ "title_case": false,
2343
+ "tool_mode": false,
2344
+ "trace_as_metadata": true,
2345
+ "type": "bool",
2346
+ "value": true
2347
+ },
2348
+ "code": {
2349
+ "advanced": true,
2350
+ "dynamic": true,
2351
+ "fileTypes": [],
2352
+ "file_path": "",
2353
+ "info": "",
2354
+ "list": false,
2355
+ "load_from_db": false,
2356
+ "multiline": true,
2357
+ "name": "code",
2358
+ "password": false,
2359
+ "placeholder": "",
2360
+ "required": true,
2361
+ "show": true,
2362
+ "title_case": false,
2363
+ "type": "code",
2364
+ "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n '<a class=\"underline\" href=\"https://astra.datastax.com/\" target=\" _blank\" '\n 'rel=\"noopener noreferrer\">your database in Astra DB</a>'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f'<a class=\"underline\" target=\"_blank\" rel=\"noopener noreferrer\" '\n f'href=\"https://astra.datastax.com/org/{org_id}/database/{db_id}/data-explorer?createCollection=1&namespace={keyspace}\">'\n \"your database in Astra DB</a>.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n"
2365
+ },
2366
+ "collection_name": {
2367
+ "_input_type": "DropdownInput",
2409
2368
  "advanced": false,
2410
- "display_name": "OpenAI API Key",
2369
+ "combobox": true,
2370
+ "dialog_inputs": {
2371
+ "fields": {
2372
+ "data": {
2373
+ "node": {
2374
+ "description": "Please allow several seconds for creation to complete.",
2375
+ "display_name": "Create new collection",
2376
+ "field_order": [
2377
+ "01_new_collection_name",
2378
+ "02_embedding_generation_provider",
2379
+ "03_embedding_generation_model",
2380
+ "04_dimension"
2381
+ ],
2382
+ "name": "create_collection",
2383
+ "template": {
2384
+ "01_new_collection_name": {
2385
+ "_input_type": "StrInput",
2386
+ "advanced": false,
2387
+ "display_name": "Name",
2388
+ "dynamic": false,
2389
+ "info": "Name of the new collection to create in Astra DB.",
2390
+ "list": false,
2391
+ "list_add_label": "Add More",
2392
+ "load_from_db": false,
2393
+ "name": "new_collection_name",
2394
+ "placeholder": "",
2395
+ "required": true,
2396
+ "show": true,
2397
+ "title_case": false,
2398
+ "tool_mode": false,
2399
+ "trace_as_metadata": true,
2400
+ "type": "str",
2401
+ "value": ""
2402
+ },
2403
+ "02_embedding_generation_provider": {
2404
+ "_input_type": "DropdownInput",
2405
+ "advanced": false,
2406
+ "combobox": false,
2407
+ "dialog_inputs": {},
2408
+ "display_name": "Embedding generation method",
2409
+ "dynamic": false,
2410
+ "helper_text": "To create collections with more embedding provider options, go to <a class=\"underline\" href=\"https://astra.datastax.com/\" target=\" _blank\" rel=\"noopener noreferrer\">your database in Astra DB</a>",
2411
+ "info": "Provider to use for generating embeddings.",
2412
+ "name": "embedding_generation_provider",
2413
+ "options": [],
2414
+ "options_metadata": [],
2415
+ "placeholder": "",
2416
+ "real_time_refresh": true,
2417
+ "required": true,
2418
+ "show": true,
2419
+ "title_case": false,
2420
+ "toggle": false,
2421
+ "tool_mode": false,
2422
+ "trace_as_metadata": true,
2423
+ "type": "str",
2424
+ "value": ""
2425
+ },
2426
+ "03_embedding_generation_model": {
2427
+ "_input_type": "DropdownInput",
2428
+ "advanced": false,
2429
+ "combobox": false,
2430
+ "dialog_inputs": {},
2431
+ "display_name": "Embedding model",
2432
+ "dynamic": false,
2433
+ "info": "Model to use for generating embeddings.",
2434
+ "name": "embedding_generation_model",
2435
+ "options": [],
2436
+ "options_metadata": [],
2437
+ "placeholder": "",
2438
+ "real_time_refresh": true,
2439
+ "required": false,
2440
+ "show": true,
2441
+ "title_case": false,
2442
+ "toggle": false,
2443
+ "tool_mode": false,
2444
+ "trace_as_metadata": true,
2445
+ "type": "str",
2446
+ "value": ""
2447
+ },
2448
+ "04_dimension": {
2449
+ "_input_type": "IntInput",
2450
+ "advanced": false,
2451
+ "display_name": "Dimensions",
2452
+ "dynamic": false,
2453
+ "info": "Dimensions of the embeddings to generate.",
2454
+ "list": false,
2455
+ "list_add_label": "Add More",
2456
+ "name": "dimension",
2457
+ "placeholder": "",
2458
+ "required": false,
2459
+ "show": true,
2460
+ "title_case": false,
2461
+ "tool_mode": false,
2462
+ "trace_as_metadata": true,
2463
+ "type": "int"
2464
+ }
2465
+ }
2466
+ }
2467
+ }
2468
+ },
2469
+ "functionality": "create"
2470
+ },
2471
+ "display_name": "Collection",
2411
2472
  "dynamic": false,
2412
- "info": "Model Provider API key",
2413
- "input_types": [],
2414
- "load_from_db": true,
2415
- "name": "api_key",
2416
- "password": true,
2473
+ "info": "The name of the collection within Astra DB where the vectors will be stored.",
2474
+ "name": "collection_name",
2475
+ "options": [],
2476
+ "options_metadata": [],
2417
2477
  "placeholder": "",
2418
2478
  "real_time_refresh": true,
2419
- "required": false,
2420
- "show": true,
2479
+ "refresh_button": true,
2480
+ "required": true,
2481
+ "show": false,
2421
2482
  "title_case": false,
2483
+ "toggle": false,
2484
+ "tool_mode": false,
2485
+ "trace_as_metadata": true,
2422
2486
  "type": "str",
2423
- "value": "OPENAI_API_KEY"
2487
+ "value": ""
2424
2488
  },
2425
- "code": {
2489
+ "content_field": {
2490
+ "_input_type": "StrInput",
2426
2491
  "advanced": true,
2427
- "dynamic": true,
2428
- "fileTypes": [],
2429
- "file_path": "",
2430
- "info": "",
2492
+ "display_name": "Content Field",
2493
+ "dynamic": false,
2494
+ "info": "Field to use as the text content field for the vector store.",
2431
2495
  "list": false,
2496
+ "list_add_label": "Add More",
2432
2497
  "load_from_db": false,
2433
- "multiline": true,
2434
- "name": "code",
2435
- "password": false,
2498
+ "name": "content_field",
2436
2499
  "placeholder": "",
2437
- "required": true,
2500
+ "required": false,
2438
2501
  "show": true,
2439
2502
  "title_case": false,
2440
- "type": "code",
2441
- "value": "from typing import Any\n\nfrom langchain_anthropic import ChatAnthropic\nfrom langchain_google_genai import ChatGoogleGenerativeAI\nfrom langchain_openai import ChatOpenAI\n\nfrom langflow.base.models.anthropic_constants import ANTHROPIC_MODELS\nfrom langflow.base.models.google_generative_ai_constants import GOOGLE_GENERATIVE_AI_MODELS\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_CHAT_MODEL_NAMES, OPENAI_REASONING_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageInput, MultilineInput, SecretStrInput, SliderInput\nfrom langflow.schema.dotdict import dotdict\n\n\nclass LanguageModelComponent(LCModelComponent):\n display_name = \"Language Model\"\n description = \"Runs a language model given a specified provider.\"\n documentation: str = \"https://docs.langflow.org/components-models\"\n icon = \"brain-circuit\"\n category = \"models\"\n priority = 0 # Set priority to 0 to make it appear first\n\n inputs = [\n DropdownInput(\n name=\"provider\",\n display_name=\"Model Provider\",\n options=[\"OpenAI\", \"Anthropic\", \"Google\"],\n value=\"OpenAI\",\n info=\"Select the model provider\",\n real_time_refresh=True,\n options_metadata=[{\"icon\": \"OpenAI\"}, {\"icon\": \"Anthropic\"}, {\"icon\": \"GoogleGenerativeAI\"}],\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES,\n value=OPENAI_CHAT_MODEL_NAMES[0],\n info=\"Select the model to use\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"Model Provider API key\",\n required=False,\n show=True,\n real_time_refresh=True,\n ),\n MessageInput(\n name=\"input_value\",\n display_name=\"Input\",\n info=\"The input text to send to the model\",\n ),\n MultilineInput(\n name=\"system_message\",\n display_name=\"System Message\",\n info=\"A system message that helps set the behavior of the assistant\",\n advanced=False,\n ),\n BoolInput(\n name=\"stream\",\n display_name=\"Stream\",\n info=\"Whether to stream the response\",\n value=False,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n info=\"Controls randomness in responses\",\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n ]\n\n def build_model(self) -> LanguageModel:\n provider = self.provider\n model_name = self.model_name\n temperature = self.temperature\n stream = self.stream\n\n if provider == \"OpenAI\":\n if not self.api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n\n if model_name in OPENAI_REASONING_MODEL_NAMES:\n # reasoning models do not support temperature (yet)\n temperature = None\n\n return ChatOpenAI(\n model_name=model_name,\n temperature=temperature,\n streaming=stream,\n openai_api_key=self.api_key,\n )\n if provider == \"Anthropic\":\n if not self.api_key:\n msg = \"Anthropic API key is required when using Anthropic provider\"\n raise ValueError(msg)\n return ChatAnthropic(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n anthropic_api_key=self.api_key,\n )\n if provider == \"Google\":\n if not self.api_key:\n msg = \"Google API key is required when using Google provider\"\n raise ValueError(msg)\n return ChatGoogleGenerativeAI(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n google_api_key=self.api_key,\n )\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n if field_name == \"provider\":\n if field_value == \"OpenAI\":\n build_config[\"model_name\"][\"options\"] = OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES\n build_config[\"model_name\"][\"value\"] = OPENAI_CHAT_MODEL_NAMES[0]\n build_config[\"api_key\"][\"display_name\"] = \"OpenAI API Key\"\n elif field_value == \"Anthropic\":\n build_config[\"model_name\"][\"options\"] = ANTHROPIC_MODELS\n build_config[\"model_name\"][\"value\"] = ANTHROPIC_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Anthropic API Key\"\n elif field_value == \"Google\":\n build_config[\"model_name\"][\"options\"] = GOOGLE_GENERATIVE_AI_MODELS\n build_config[\"model_name\"][\"value\"] = GOOGLE_GENERATIVE_AI_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Google API Key\"\n elif field_name == \"model_name\" and field_value.startswith(\"o1\") and self.provider == \"OpenAI\":\n # Hide system_message for o1 models - currently unsupported\n if \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = False\n elif field_name == \"model_name\" and not field_value.startswith(\"o1\") and \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = True\n return build_config\n"
2503
+ "tool_mode": false,
2504
+ "trace_as_metadata": true,
2505
+ "type": "str",
2506
+ "value": ""
2442
2507
  },
2443
- "input_value": {
2444
- "_input_type": "MessageInput",
2508
+ "database_name": {
2509
+ "_input_type": "DropdownInput",
2445
2510
  "advanced": false,
2446
- "display_name": "Input",
2511
+ "combobox": true,
2512
+ "dialog_inputs": {
2513
+ "fields": {
2514
+ "data": {
2515
+ "node": {
2516
+ "description": "Please allow several minutes for creation to complete.",
2517
+ "display_name": "Create new database",
2518
+ "field_order": [
2519
+ "01_new_database_name",
2520
+ "02_cloud_provider",
2521
+ "03_region"
2522
+ ],
2523
+ "name": "create_database",
2524
+ "template": {
2525
+ "01_new_database_name": {
2526
+ "_input_type": "StrInput",
2527
+ "advanced": false,
2528
+ "display_name": "Name",
2529
+ "dynamic": false,
2530
+ "info": "Name of the new database to create in Astra DB.",
2531
+ "list": false,
2532
+ "list_add_label": "Add More",
2533
+ "load_from_db": false,
2534
+ "name": "new_database_name",
2535
+ "placeholder": "",
2536
+ "required": true,
2537
+ "show": true,
2538
+ "title_case": false,
2539
+ "tool_mode": false,
2540
+ "trace_as_metadata": true,
2541
+ "type": "str",
2542
+ "value": ""
2543
+ },
2544
+ "02_cloud_provider": {
2545
+ "_input_type": "DropdownInput",
2546
+ "advanced": false,
2547
+ "combobox": false,
2548
+ "dialog_inputs": {},
2549
+ "display_name": "Cloud provider",
2550
+ "dynamic": false,
2551
+ "info": "Cloud provider for the new database.",
2552
+ "name": "cloud_provider",
2553
+ "options": [],
2554
+ "options_metadata": [],
2555
+ "placeholder": "",
2556
+ "real_time_refresh": true,
2557
+ "required": true,
2558
+ "show": true,
2559
+ "title_case": false,
2560
+ "toggle": false,
2561
+ "tool_mode": false,
2562
+ "trace_as_metadata": true,
2563
+ "type": "str",
2564
+ "value": ""
2565
+ },
2566
+ "03_region": {
2567
+ "_input_type": "DropdownInput",
2568
+ "advanced": false,
2569
+ "combobox": false,
2570
+ "dialog_inputs": {},
2571
+ "display_name": "Region",
2572
+ "dynamic": false,
2573
+ "info": "Region for the new database.",
2574
+ "name": "region",
2575
+ "options": [],
2576
+ "options_metadata": [],
2577
+ "placeholder": "",
2578
+ "required": true,
2579
+ "show": true,
2580
+ "title_case": false,
2581
+ "toggle": false,
2582
+ "tool_mode": false,
2583
+ "trace_as_metadata": true,
2584
+ "type": "str",
2585
+ "value": ""
2586
+ }
2587
+ }
2588
+ }
2589
+ }
2590
+ },
2591
+ "functionality": "create"
2592
+ },
2593
+ "display_name": "Database",
2447
2594
  "dynamic": false,
2448
- "info": "The input text to send to the model",
2449
- "input_types": [
2450
- "Message"
2451
- ],
2595
+ "info": "The Database name for the Astra DB instance.",
2596
+ "name": "database_name",
2597
+ "options": [],
2598
+ "options_metadata": [],
2599
+ "placeholder": "",
2600
+ "real_time_refresh": true,
2601
+ "refresh_button": true,
2602
+ "required": true,
2603
+ "show": true,
2604
+ "title_case": false,
2605
+ "toggle": false,
2606
+ "tool_mode": false,
2607
+ "trace_as_metadata": true,
2608
+ "type": "str",
2609
+ "value": ""
2610
+ },
2611
+ "deletion_field": {
2612
+ "_input_type": "StrInput",
2613
+ "advanced": true,
2614
+ "display_name": "Deletion Based On Field",
2615
+ "dynamic": false,
2616
+ "info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.",
2452
2617
  "list": false,
2453
2618
  "list_add_label": "Add More",
2454
2619
  "load_from_db": false,
2455
- "name": "input_value",
2620
+ "name": "deletion_field",
2456
2621
  "placeholder": "",
2457
2622
  "required": false,
2458
2623
  "show": true,
2459
2624
  "title_case": false,
2460
2625
  "tool_mode": false,
2461
- "trace_as_input": true,
2462
2626
  "trace_as_metadata": true,
2463
2627
  "type": "str",
2464
2628
  "value": ""
2465
2629
  },
2466
- "model_name": {
2467
- "_input_type": "DropdownInput",
2630
+ "embedding_model": {
2631
+ "_input_type": "HandleInput",
2468
2632
  "advanced": false,
2469
- "combobox": false,
2470
- "dialog_inputs": {},
2471
- "display_name": "Model Name",
2633
+ "display_name": "Embedding Model",
2472
2634
  "dynamic": false,
2473
- "info": "Select the model to use",
2474
- "name": "model_name",
2475
- "options": [
2476
- "gpt-4o-mini",
2477
- "gpt-4o",
2478
- "gpt-4.1",
2479
- "gpt-4.1-mini",
2480
- "gpt-4.1-nano",
2481
- "gpt-4.5-preview",
2482
- "gpt-4-turbo",
2483
- "gpt-4-turbo-preview",
2484
- "gpt-4",
2485
- "gpt-3.5-turbo"
2635
+ "info": "Specify the Embedding Model. Not required for Astra Vectorize collections.",
2636
+ "input_types": [
2637
+ "Embeddings"
2486
2638
  ],
2487
- "options_metadata": [],
2639
+ "list": false,
2640
+ "list_add_label": "Add More",
2641
+ "name": "embedding_model",
2488
2642
  "placeholder": "",
2489
2643
  "required": false,
2490
- "show": true,
2644
+ "show": false,
2491
2645
  "title_case": false,
2492
- "toggle": false,
2493
- "tool_mode": false,
2494
2646
  "trace_as_metadata": true,
2495
- "type": "str",
2496
- "value": "gpt-4o-mini"
2647
+ "type": "other",
2648
+ "value": ""
2497
2649
  },
2498
- "provider": {
2650
+ "environment": {
2499
2651
  "_input_type": "DropdownInput",
2500
- "advanced": false,
2501
- "combobox": false,
2652
+ "advanced": true,
2653
+ "combobox": true,
2502
2654
  "dialog_inputs": {},
2503
- "display_name": "Model Provider",
2655
+ "display_name": "Environment",
2504
2656
  "dynamic": false,
2505
- "info": "Select the model provider",
2506
- "name": "provider",
2657
+ "info": "The environment for the Astra DB API Endpoint.",
2658
+ "name": "environment",
2507
2659
  "options": [
2508
- "OpenAI",
2509
- "Anthropic",
2510
- "Google"
2511
- ],
2512
- "options_metadata": [
2513
- {
2514
- "icon": "OpenAI"
2515
- },
2516
- {
2517
- "icon": "Anthropic"
2518
- },
2519
- {
2520
- "icon": "Google"
2521
- }
2660
+ "prod",
2661
+ "test",
2662
+ "dev"
2522
2663
  ],
2664
+ "options_metadata": [],
2523
2665
  "placeholder": "",
2524
2666
  "real_time_refresh": true,
2525
2667
  "required": false,
@@ -2529,17 +2671,17 @@
2529
2671
  "tool_mode": false,
2530
2672
  "trace_as_metadata": true,
2531
2673
  "type": "str",
2532
- "value": "OpenAI"
2674
+ "value": "prod"
2533
2675
  },
2534
- "stream": {
2676
+ "ignore_invalid_documents": {
2535
2677
  "_input_type": "BoolInput",
2536
2678
  "advanced": true,
2537
- "display_name": "Stream",
2679
+ "display_name": "Ignore Invalid Documents",
2538
2680
  "dynamic": false,
2539
- "info": "Whether to stream the response",
2681
+ "info": "Boolean flag to determine whether to ignore invalid documents at runtime.",
2540
2682
  "list": false,
2541
2683
  "list_add_label": "Add More",
2542
- "name": "stream",
2684
+ "name": "ignore_invalid_documents",
2543
2685
  "placeholder": "",
2544
2686
  "required": false,
2545
2687
  "show": true,
@@ -2549,416 +2691,278 @@
2549
2691
  "type": "bool",
2550
2692
  "value": false
2551
2693
  },
2552
- "system_message": {
2553
- "_input_type": "MultilineInput",
2694
+ "ingest_data": {
2695
+ "_input_type": "HandleInput",
2696
+ "advanced": false,
2697
+ "display_name": "Ingest Data",
2698
+ "dynamic": false,
2699
+ "info": "",
2700
+ "input_types": [
2701
+ "Data",
2702
+ "DataFrame"
2703
+ ],
2704
+ "list": true,
2705
+ "list_add_label": "Add More",
2706
+ "name": "ingest_data",
2707
+ "placeholder": "",
2708
+ "required": false,
2709
+ "show": true,
2710
+ "title_case": false,
2711
+ "trace_as_metadata": true,
2712
+ "type": "other",
2713
+ "value": ""
2714
+ },
2715
+ "keyspace": {
2716
+ "_input_type": "DropdownInput",
2554
2717
  "advanced": true,
2555
- "copy_field": false,
2556
- "display_name": "System Message",
2718
+ "combobox": false,
2719
+ "dialog_inputs": {},
2720
+ "display_name": "Keyspace",
2557
2721
  "dynamic": false,
2558
- "info": "A system message that helps set the behavior of the assistant",
2722
+ "info": "Optional keyspace within Astra DB to use for the collection.",
2723
+ "name": "keyspace",
2724
+ "options": [],
2725
+ "options_metadata": [],
2726
+ "placeholder": "",
2727
+ "real_time_refresh": true,
2728
+ "required": false,
2729
+ "show": true,
2730
+ "title_case": false,
2731
+ "toggle": false,
2732
+ "tool_mode": false,
2733
+ "trace_as_metadata": true,
2734
+ "type": "str",
2735
+ "value": ""
2736
+ },
2737
+ "lexical_terms": {
2738
+ "_input_type": "QueryInput",
2739
+ "advanced": false,
2740
+ "display_name": "Lexical Terms",
2741
+ "dynamic": false,
2742
+ "info": "Add additional terms/keywords to augment search precision.",
2559
2743
  "input_types": [
2560
2744
  "Message"
2561
2745
  ],
2562
2746
  "list": false,
2563
2747
  "list_add_label": "Add More",
2564
2748
  "load_from_db": false,
2565
- "multiline": true,
2566
- "name": "system_message",
2567
- "placeholder": "",
2749
+ "name": "lexical_terms",
2750
+ "placeholder": "Enter terms to search...",
2568
2751
  "required": false,
2752
+ "separator": " ",
2569
2753
  "show": true,
2570
2754
  "title_case": false,
2571
2755
  "tool_mode": false,
2572
2756
  "trace_as_input": true,
2573
2757
  "trace_as_metadata": true,
2574
- "type": "str",
2758
+ "type": "query",
2575
2759
  "value": ""
2576
2760
  },
2577
- "temperature": {
2578
- "_input_type": "SliderInput",
2761
+ "number_of_results": {
2762
+ "_input_type": "IntInput",
2579
2763
  "advanced": true,
2580
- "display_name": "Temperature",
2764
+ "display_name": "Number of Search Results",
2581
2765
  "dynamic": false,
2582
- "info": "Controls randomness in responses",
2583
- "max_label": "",
2584
- "max_label_icon": "",
2585
- "min_label": "",
2586
- "min_label_icon": "",
2587
- "name": "temperature",
2766
+ "info": "Number of search results to return.",
2767
+ "list": false,
2768
+ "list_add_label": "Add More",
2769
+ "name": "number_of_results",
2588
2770
  "placeholder": "",
2589
- "range_spec": {
2590
- "max": 1,
2591
- "min": 0,
2592
- "step": 0.01,
2593
- "step_type": "float"
2594
- },
2595
2771
  "required": false,
2596
2772
  "show": true,
2597
- "slider_buttons": false,
2598
- "slider_buttons_options": [],
2599
- "slider_input": false,
2600
2773
  "title_case": false,
2601
2774
  "tool_mode": false,
2602
- "type": "slider",
2603
- "value": 0.1
2604
- }
2605
- },
2606
- "tool_mode": false
2607
- },
2608
- "selected_output": "model_output",
2609
- "showNode": true,
2610
- "type": "LanguageModelComponent"
2611
- },
2612
- "dragging": false,
2613
- "id": "LanguageModelComponent-pB4iD",
2614
- "measured": {
2615
- "height": 451,
2616
- "width": 320
2617
- },
2618
- "position": {
2619
- "x": 322.5971643968167,
2620
- "y": -36.64113990031162
2621
- },
2622
- "selected": false,
2623
- "type": "genericNode"
2624
- },
2625
- {
2626
- "data": {
2627
- "id": "StructuredOutput-n8Y3t",
2628
- "node": {
2629
- "base_classes": [
2630
- "Data"
2631
- ],
2632
- "beta": false,
2633
- "conditional_paths": [],
2634
- "custom_fields": {},
2635
- "description": "Uses an LLM to generate structured data. Ideal for extraction and consistency.",
2636
- "display_name": "Structured Output",
2637
- "documentation": "",
2638
- "edited": false,
2639
- "field_order": [
2640
- "llm",
2641
- "input_value",
2642
- "system_prompt",
2643
- "schema_name",
2644
- "output_schema"
2645
- ],
2646
- "frozen": false,
2647
- "icon": "braces",
2648
- "legacy": false,
2649
- "metadata": {
2650
- "code_hash": "ad2a6f4552c0",
2651
- "dependencies": {
2652
- "dependencies": [
2653
- {
2654
- "name": "pydantic",
2655
- "version": "2.10.6"
2656
- },
2657
- {
2658
- "name": "trustcall",
2659
- "version": "0.0.39"
2660
- },
2661
- {
2662
- "name": "langflow",
2663
- "version": null
2664
- }
2665
- ],
2666
- "total_dependencies": 3
2775
+ "trace_as_metadata": true,
2776
+ "type": "int",
2777
+ "value": 4
2667
2778
  },
2668
- "module": "langflow.components.processing.structured_output.StructuredOutputComponent"
2669
- },
2670
- "minimized": false,
2671
- "output_types": [],
2672
- "outputs": [
2673
- {
2674
- "allows_loop": false,
2675
- "cache": true,
2676
- "display_name": "Structured Output",
2677
- "group_outputs": false,
2678
- "method": "build_structured_output",
2679
- "name": "structured_output",
2680
- "selected": "Data",
2681
- "tool_mode": true,
2682
- "types": [
2683
- "Data"
2684
- ],
2685
- "value": "__UNDEFINED__"
2779
+ "reranker": {
2780
+ "_input_type": "DropdownInput",
2781
+ "advanced": false,
2782
+ "combobox": false,
2783
+ "dialog_inputs": {},
2784
+ "display_name": "Reranker",
2785
+ "dynamic": false,
2786
+ "info": "Post-retrieval model that re-scores results for optimal relevance ranking.",
2787
+ "name": "reranker",
2788
+ "options": [],
2789
+ "options_metadata": [],
2790
+ "placeholder": "",
2791
+ "required": false,
2792
+ "show": false,
2793
+ "title_case": false,
2794
+ "toggle": true,
2795
+ "tool_mode": false,
2796
+ "trace_as_metadata": true,
2797
+ "type": "str",
2798
+ "value": ""
2686
2799
  },
2687
- {
2688
- "allows_loop": false,
2689
- "cache": true,
2690
- "display_name": "Structured Output",
2691
- "group_outputs": false,
2692
- "method": "build_structured_dataframe",
2693
- "name": "dataframe_output",
2694
- "selected": "DataFrame",
2695
- "tool_mode": true,
2696
- "types": [
2697
- "DataFrame"
2698
- ],
2699
- "value": "__UNDEFINED__"
2700
- }
2701
- ],
2702
- "pinned": false,
2703
- "template": {
2704
- "_type": "Component",
2705
- "code": {
2800
+ "search_method": {
2801
+ "_input_type": "DropdownInput",
2706
2802
  "advanced": true,
2707
- "dynamic": true,
2708
- "fileTypes": [],
2709
- "file_path": "",
2710
- "info": "",
2711
- "list": false,
2712
- "load_from_db": false,
2713
- "multiline": true,
2714
- "name": "code",
2715
- "password": false,
2803
+ "combobox": false,
2804
+ "dialog_inputs": {},
2805
+ "display_name": "Search Method",
2806
+ "dynamic": false,
2807
+ "info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.",
2808
+ "name": "search_method",
2809
+ "options": [
2810
+ "Hybrid Search",
2811
+ "Vector Search"
2812
+ ],
2813
+ "options_metadata": [
2814
+ {
2815
+ "icon": "SearchHybrid"
2816
+ },
2817
+ {
2818
+ "icon": "SearchVector"
2819
+ }
2820
+ ],
2716
2821
  "placeholder": "",
2717
- "required": true,
2822
+ "real_time_refresh": true,
2823
+ "required": false,
2718
2824
  "show": true,
2719
2825
  "title_case": false,
2720
- "type": "code",
2721
- "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n documentation: str = \"https://docs.langflow.org/components-processing#structured-output\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n Output(\n name=\"dataframe_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_dataframe\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) == 1:\n return Data(data=output[0])\n if len(output) > 1:\n # Multiple outputs - wrap them in a results container\n return Data(data={\"results\": output})\n return Data()\n\n def build_structured_dataframe(self) -> DataFrame:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n data_list = [Data(data=output[0])] if len(output) == 1 else [Data(data=item) for item in output]\n\n return DataFrame(data_list)\n"
2826
+ "toggle": false,
2827
+ "tool_mode": false,
2828
+ "trace_as_metadata": true,
2829
+ "type": "str",
2830
+ "value": "Vector Search"
2722
2831
  },
2723
- "input_value": {
2724
- "_input_type": "MessageTextInput",
2832
+ "search_query": {
2833
+ "_input_type": "QueryInput",
2725
2834
  "advanced": false,
2726
- "display_name": "Input Message",
2835
+ "display_name": "Search Query",
2727
2836
  "dynamic": false,
2728
- "info": "The input message to the language model.",
2837
+ "info": "Enter a query to run a similarity search.",
2729
2838
  "input_types": [
2730
2839
  "Message"
2731
2840
  ],
2732
2841
  "list": false,
2733
2842
  "list_add_label": "Add More",
2734
2843
  "load_from_db": false,
2735
- "name": "input_value",
2736
- "placeholder": "",
2737
- "required": true,
2844
+ "name": "search_query",
2845
+ "placeholder": "Enter a query...",
2846
+ "required": false,
2738
2847
  "show": true,
2739
2848
  "title_case": false,
2740
2849
  "tool_mode": true,
2741
2850
  "trace_as_input": true,
2742
2851
  "trace_as_metadata": true,
2743
- "type": "str",
2852
+ "type": "query",
2744
2853
  "value": ""
2745
2854
  },
2746
- "llm": {
2747
- "_input_type": "HandleInput",
2748
- "advanced": false,
2749
- "display_name": "Language Model",
2855
+ "search_score_threshold": {
2856
+ "_input_type": "FloatInput",
2857
+ "advanced": true,
2858
+ "display_name": "Search Score Threshold",
2750
2859
  "dynamic": false,
2751
- "info": "The language model to use to generate the structured output.",
2752
- "input_types": [
2753
- "LanguageModel"
2754
- ],
2860
+ "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')",
2755
2861
  "list": false,
2756
2862
  "list_add_label": "Add More",
2757
- "name": "llm",
2863
+ "name": "search_score_threshold",
2758
2864
  "placeholder": "",
2759
- "required": true,
2865
+ "required": false,
2760
2866
  "show": true,
2761
2867
  "title_case": false,
2868
+ "tool_mode": false,
2762
2869
  "trace_as_metadata": true,
2763
- "type": "other",
2764
- "value": ""
2870
+ "type": "float",
2871
+ "value": 0
2765
2872
  },
2766
- "output_schema": {
2767
- "_input_type": "TableInput",
2768
- "advanced": false,
2769
- "display_name": "Output Schema",
2873
+ "search_type": {
2874
+ "_input_type": "DropdownInput",
2875
+ "advanced": true,
2876
+ "combobox": false,
2877
+ "dialog_inputs": {},
2878
+ "display_name": "Search Type",
2770
2879
  "dynamic": false,
2771
- "info": "Define the structure and data types for the model's output.",
2772
- "is_list": true,
2773
- "list_add_label": "Add More",
2774
- "name": "output_schema",
2880
+ "info": "Search type to use",
2881
+ "name": "search_type",
2882
+ "options": [
2883
+ "Similarity",
2884
+ "Similarity with score threshold",
2885
+ "MMR (Max Marginal Relevance)"
2886
+ ],
2887
+ "options_metadata": [],
2775
2888
  "placeholder": "",
2776
- "required": true,
2889
+ "required": false,
2777
2890
  "show": true,
2778
- "table_icon": "Table",
2779
- "table_schema": {
2780
- "columns": [
2781
- {
2782
- "default": "field",
2783
- "description": "Specify the name of the output field.",
2784
- "disable_edit": false,
2785
- "display_name": "Name",
2786
- "edit_mode": "inline",
2787
- "filterable": true,
2788
- "formatter": "text",
2789
- "hidden": false,
2790
- "name": "name",
2791
- "sortable": true,
2792
- "type": "str"
2793
- },
2794
- {
2795
- "default": "description of field",
2796
- "description": "Describe the purpose of the output field.",
2797
- "disable_edit": false,
2798
- "display_name": "Description",
2799
- "edit_mode": "popover",
2800
- "filterable": true,
2801
- "formatter": "text",
2802
- "hidden": false,
2803
- "name": "description",
2804
- "sortable": true,
2805
- "type": "str"
2806
- },
2807
- {
2808
- "default": "str",
2809
- "description": "Indicate the data type of the output field (e.g., str, int, float, bool, dict).",
2810
- "disable_edit": false,
2811
- "display_name": "Type",
2812
- "edit_mode": "inline",
2813
- "filterable": true,
2814
- "formatter": "text",
2815
- "hidden": false,
2816
- "name": "type",
2817
- "options": [
2818
- "str",
2819
- "int",
2820
- "float",
2821
- "bool",
2822
- "dict"
2823
- ],
2824
- "sortable": true,
2825
- "type": "str"
2826
- },
2827
- {
2828
- "default": false,
2829
- "description": "Set to True if this output field should be a list of the specified type.",
2830
- "disable_edit": false,
2831
- "display_name": "As List",
2832
- "edit_mode": "inline",
2833
- "filterable": true,
2834
- "formatter": "boolean",
2835
- "hidden": false,
2836
- "name": "multiple",
2837
- "sortable": true,
2838
- "type": "boolean"
2839
- }
2840
- ]
2841
- },
2842
2891
  "title_case": false,
2892
+ "toggle": false,
2843
2893
  "tool_mode": false,
2844
2894
  "trace_as_metadata": true,
2845
- "trigger_icon": "Table",
2846
- "trigger_text": "Open table",
2847
- "type": "table",
2848
- "value": [
2849
- {
2850
- "description": "description of field",
2851
- "multiple": "False",
2852
- "name": "field",
2853
- "type": "str"
2854
- }
2855
- ]
2895
+ "type": "str",
2896
+ "value": "Similarity"
2856
2897
  },
2857
- "schema_name": {
2858
- "_input_type": "MessageTextInput",
2898
+ "should_cache_vector_store": {
2899
+ "_input_type": "BoolInput",
2859
2900
  "advanced": true,
2860
- "display_name": "Schema Name",
2901
+ "display_name": "Cache Vector Store",
2861
2902
  "dynamic": false,
2862
- "info": "Provide a name for the output data schema.",
2863
- "input_types": [
2864
- "Message"
2865
- ],
2903
+ "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.",
2866
2904
  "list": false,
2867
2905
  "list_add_label": "Add More",
2868
- "load_from_db": false,
2869
- "name": "schema_name",
2906
+ "name": "should_cache_vector_store",
2870
2907
  "placeholder": "",
2871
2908
  "required": false,
2872
2909
  "show": true,
2873
2910
  "title_case": false,
2874
2911
  "tool_mode": false,
2875
- "trace_as_input": true,
2876
2912
  "trace_as_metadata": true,
2877
- "type": "str",
2878
- "value": ""
2913
+ "type": "bool",
2914
+ "value": true
2879
2915
  },
2880
- "system_prompt": {
2881
- "_input_type": "MultilineInput",
2882
- "advanced": true,
2883
- "copy_field": false,
2884
- "display_name": "Format Instructions",
2916
+ "token": {
2917
+ "_input_type": "SecretStrInput",
2918
+ "advanced": false,
2919
+ "display_name": "Astra DB Application Token",
2885
2920
  "dynamic": false,
2886
- "info": "The instructions to the language model for formatting the output.",
2887
- "input_types": [
2888
- "Message"
2889
- ],
2890
- "list": false,
2891
- "list_add_label": "Add More",
2892
- "load_from_db": false,
2893
- "multiline": true,
2894
- "name": "system_prompt",
2921
+ "info": "Authentication token for accessing Astra DB.",
2922
+ "input_types": [],
2923
+ "load_from_db": true,
2924
+ "name": "token",
2925
+ "password": true,
2895
2926
  "placeholder": "",
2927
+ "real_time_refresh": true,
2896
2928
  "required": true,
2897
2929
  "show": true,
2898
2930
  "title_case": false,
2899
- "tool_mode": false,
2900
- "trace_as_input": true,
2901
- "trace_as_metadata": true,
2902
2931
  "type": "str",
2903
- "value": "You are an AI that extracts structured JSON objects from unstructured text. Use a predefined schema with expected types (str, int, float, bool, dict). Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. Fill missing or ambiguous values with defaults: null for missing values. Remove exact duplicates but keep variations that have different field values. Always return valid JSON in the expected format, never throw errors. If multiple objects can be extracted, return them all in the structured format."
2932
+ "value": "ASTRA_DB_APPLICATION_TOKEN"
2904
2933
  }
2905
2934
  },
2906
2935
  "tool_mode": false
2907
2936
  },
2937
+ "selected_output": "search_results",
2908
2938
  "showNode": true,
2909
- "type": "StructuredOutput"
2939
+ "type": "AstraDB"
2910
2940
  },
2911
2941
  "dragging": false,
2912
- "id": "StructuredOutput-n8Y3t",
2942
+ "id": "AstraDB-93cal",
2913
2943
  "measured": {
2914
- "height": 349,
2944
+ "height": 540,
2915
2945
  "width": 320
2916
2946
  },
2917
2947
  "position": {
2918
- "x": 735.3215653605321,
2919
- "y": 423.7970360460631
2948
+ "x": 1552.5270288197573,
2949
+ "y": 310.92605536703144
2920
2950
  },
2921
2951
  "selected": false,
2922
2952
  "type": "genericNode"
2923
- },
2924
- {
2925
- "data": {
2926
- "id": "note-AJ1HC",
2927
- "node": {
2928
- "description": "# Hybrid Search RAG\n\nHybrid search performs a vector similarity search and a lexical search, compares the results of both searches, and then returns the most relevant results overall.\n\n## Prerequisites\n\n* An [OpenAI API key](https://platform.openai.com/)\n* An [Astra DB Application Token](https://docs.datastax.com/en/astra-db-serverless/databases/create-database.html) for the Astra DB component.\n\n## Quickstart\n\n1. In the Astra DB component, add your Astra DB Application Token.\nThis connects Langflow to your Astra database.\n2. Select an Astra collection that is hybrid-enabled.\nFor more information, see the [Datastax documentation](https://docs.datastax.com/en/astra-db-serverless/databases/hybrid-search.html).\nThe connection appears between the Parser component and the Astra DB component when a vector database is connected.\n3. Ensure the **Lexical Terms** and **Parsed Text** ports are connected.\n4. Add your OpenAI API key in the **Language Model** model component.\n5. Open the Playground and ask a question, like \"What are the features of my data?\"",
2929
- "display_name": "",
2930
- "documentation": "",
2931
- "template": {
2932
- "backgroundColor": "blue"
2933
- }
2934
- },
2935
- "type": "note"
2936
- },
2937
- "dragging": false,
2938
- "id": "note-AJ1HC",
2939
- "measured": {
2940
- "height": 601,
2941
- "width": 575
2942
- },
2943
- "position": {
2944
- "x": 816.3801044575429,
2945
- "y": -279.19595575780494
2946
- },
2947
- "selected": false,
2948
- "type": "noteNode"
2949
2953
  }
2950
2954
  ],
2951
2955
  "viewport": {
2952
- "x": 28.84866644052977,
2953
- "y": 276.30129659855504,
2954
- "zoom": 0.5265349644912217
2956
+ "x": -29.911832824936937,
2957
+ "y": 88.77245200098008,
2958
+ "zoom": 0.582863818810844
2955
2959
  }
2956
2960
  },
2957
2961
  "description": "Explore Hybrid Search with a vector database.",
2958
2962
  "endpoint_name": null,
2959
- "id": "cd58d400-fe60-47c6-b2b5-4a7d3eada7b1",
2963
+ "id": "be9c7480-a8a2-4a12-ab32-67c1432e1504",
2960
2964
  "is_component": false,
2961
- "last_tested_version": "1.4.3",
2965
+ "last_tested_version": "1.5.0.post2",
2962
2966
  "name": "Hybrid Search RAG",
2963
2967
  "tags": [
2964
2968
  "openai",