lfx-nightly 0.2.0.dev0__py3-none-any.whl → 0.2.0.dev41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. lfx/_assets/component_index.json +1 -1
  2. lfx/base/agents/agent.py +21 -4
  3. lfx/base/agents/altk_base_agent.py +393 -0
  4. lfx/base/agents/altk_tool_wrappers.py +565 -0
  5. lfx/base/agents/events.py +2 -1
  6. lfx/base/composio/composio_base.py +159 -224
  7. lfx/base/data/base_file.py +97 -20
  8. lfx/base/data/docling_utils.py +61 -10
  9. lfx/base/data/storage_utils.py +301 -0
  10. lfx/base/data/utils.py +178 -14
  11. lfx/base/mcp/util.py +2 -2
  12. lfx/base/models/anthropic_constants.py +21 -12
  13. lfx/base/models/groq_constants.py +74 -58
  14. lfx/base/models/groq_model_discovery.py +265 -0
  15. lfx/base/models/model.py +1 -1
  16. lfx/base/models/model_utils.py +100 -0
  17. lfx/base/models/openai_constants.py +7 -0
  18. lfx/base/models/watsonx_constants.py +32 -8
  19. lfx/base/tools/run_flow.py +601 -129
  20. lfx/cli/commands.py +9 -4
  21. lfx/cli/common.py +2 -2
  22. lfx/cli/run.py +1 -1
  23. lfx/cli/script_loader.py +53 -11
  24. lfx/components/Notion/create_page.py +1 -1
  25. lfx/components/Notion/list_database_properties.py +1 -1
  26. lfx/components/Notion/list_pages.py +1 -1
  27. lfx/components/Notion/list_users.py +1 -1
  28. lfx/components/Notion/page_content_viewer.py +1 -1
  29. lfx/components/Notion/search.py +1 -1
  30. lfx/components/Notion/update_page_property.py +1 -1
  31. lfx/components/__init__.py +19 -5
  32. lfx/components/{agents → altk}/__init__.py +5 -9
  33. lfx/components/altk/altk_agent.py +193 -0
  34. lfx/components/apify/apify_actor.py +1 -1
  35. lfx/components/composio/__init__.py +70 -18
  36. lfx/components/composio/apollo_composio.py +11 -0
  37. lfx/components/composio/bitbucket_composio.py +11 -0
  38. lfx/components/composio/canva_composio.py +11 -0
  39. lfx/components/composio/coda_composio.py +11 -0
  40. lfx/components/composio/composio_api.py +10 -0
  41. lfx/components/composio/discord_composio.py +1 -1
  42. lfx/components/composio/elevenlabs_composio.py +11 -0
  43. lfx/components/composio/exa_composio.py +11 -0
  44. lfx/components/composio/firecrawl_composio.py +11 -0
  45. lfx/components/composio/fireflies_composio.py +11 -0
  46. lfx/components/composio/gmail_composio.py +1 -1
  47. lfx/components/composio/googlebigquery_composio.py +11 -0
  48. lfx/components/composio/googlecalendar_composio.py +1 -1
  49. lfx/components/composio/googledocs_composio.py +1 -1
  50. lfx/components/composio/googlemeet_composio.py +1 -1
  51. lfx/components/composio/googlesheets_composio.py +1 -1
  52. lfx/components/composio/googletasks_composio.py +1 -1
  53. lfx/components/composio/heygen_composio.py +11 -0
  54. lfx/components/composio/mem0_composio.py +11 -0
  55. lfx/components/composio/peopledatalabs_composio.py +11 -0
  56. lfx/components/composio/perplexityai_composio.py +11 -0
  57. lfx/components/composio/serpapi_composio.py +11 -0
  58. lfx/components/composio/slack_composio.py +3 -574
  59. lfx/components/composio/slackbot_composio.py +1 -1
  60. lfx/components/composio/snowflake_composio.py +11 -0
  61. lfx/components/composio/tavily_composio.py +11 -0
  62. lfx/components/composio/youtube_composio.py +2 -2
  63. lfx/components/cuga/__init__.py +34 -0
  64. lfx/components/cuga/cuga_agent.py +730 -0
  65. lfx/components/data/__init__.py +78 -28
  66. lfx/components/data_source/__init__.py +58 -0
  67. lfx/components/{data → data_source}/api_request.py +26 -3
  68. lfx/components/{data → data_source}/csv_to_data.py +15 -10
  69. lfx/components/{data → data_source}/json_to_data.py +15 -8
  70. lfx/components/{data → data_source}/news_search.py +1 -1
  71. lfx/components/{data → data_source}/rss.py +1 -1
  72. lfx/components/{data → data_source}/sql_executor.py +1 -1
  73. lfx/components/{data → data_source}/url.py +1 -1
  74. lfx/components/{data → data_source}/web_search.py +1 -1
  75. lfx/components/datastax/astradb_cql.py +1 -1
  76. lfx/components/datastax/astradb_graph.py +1 -1
  77. lfx/components/datastax/astradb_tool.py +1 -1
  78. lfx/components/datastax/astradb_vectorstore.py +1 -1
  79. lfx/components/datastax/hcd.py +1 -1
  80. lfx/components/deactivated/json_document_builder.py +1 -1
  81. lfx/components/docling/__init__.py +0 -3
  82. lfx/components/docling/chunk_docling_document.py +3 -1
  83. lfx/components/docling/export_docling_document.py +3 -1
  84. lfx/components/elastic/elasticsearch.py +1 -1
  85. lfx/components/files_and_knowledge/__init__.py +47 -0
  86. lfx/components/{data → files_and_knowledge}/directory.py +1 -1
  87. lfx/components/{data → files_and_knowledge}/file.py +304 -24
  88. lfx/components/{knowledge_bases → files_and_knowledge}/retrieval.py +2 -2
  89. lfx/components/{data → files_and_knowledge}/save_file.py +218 -31
  90. lfx/components/flow_controls/__init__.py +58 -0
  91. lfx/components/{logic → flow_controls}/conditional_router.py +1 -1
  92. lfx/components/{logic → flow_controls}/loop.py +43 -9
  93. lfx/components/flow_controls/run_flow.py +108 -0
  94. lfx/components/glean/glean_search_api.py +1 -1
  95. lfx/components/groq/groq.py +35 -28
  96. lfx/components/helpers/__init__.py +102 -0
  97. lfx/components/ibm/watsonx.py +7 -1
  98. lfx/components/input_output/__init__.py +3 -1
  99. lfx/components/input_output/chat.py +4 -3
  100. lfx/components/input_output/chat_output.py +10 -4
  101. lfx/components/input_output/text.py +1 -1
  102. lfx/components/input_output/text_output.py +1 -1
  103. lfx/components/{data → input_output}/webhook.py +1 -1
  104. lfx/components/knowledge_bases/__init__.py +59 -4
  105. lfx/components/langchain_utilities/character.py +1 -1
  106. lfx/components/langchain_utilities/csv_agent.py +84 -16
  107. lfx/components/langchain_utilities/json_agent.py +67 -12
  108. lfx/components/langchain_utilities/language_recursive.py +1 -1
  109. lfx/components/llm_operations/__init__.py +46 -0
  110. lfx/components/{processing → llm_operations}/batch_run.py +17 -8
  111. lfx/components/{processing → llm_operations}/lambda_filter.py +1 -1
  112. lfx/components/{logic → llm_operations}/llm_conditional_router.py +1 -1
  113. lfx/components/{processing/llm_router.py → llm_operations/llm_selector.py} +3 -3
  114. lfx/components/{processing → llm_operations}/structured_output.py +1 -1
  115. lfx/components/logic/__init__.py +126 -0
  116. lfx/components/mem0/mem0_chat_memory.py +11 -0
  117. lfx/components/models/__init__.py +64 -9
  118. lfx/components/models_and_agents/__init__.py +49 -0
  119. lfx/components/{agents → models_and_agents}/agent.py +6 -4
  120. lfx/components/models_and_agents/embedding_model.py +353 -0
  121. lfx/components/models_and_agents/language_model.py +398 -0
  122. lfx/components/{agents → models_and_agents}/mcp_component.py +53 -44
  123. lfx/components/{helpers → models_and_agents}/memory.py +1 -1
  124. lfx/components/nvidia/system_assist.py +1 -1
  125. lfx/components/olivya/olivya.py +1 -1
  126. lfx/components/ollama/ollama.py +24 -5
  127. lfx/components/processing/__init__.py +9 -60
  128. lfx/components/processing/converter.py +1 -1
  129. lfx/components/processing/dataframe_operations.py +1 -1
  130. lfx/components/processing/parse_json_data.py +2 -2
  131. lfx/components/processing/parser.py +1 -1
  132. lfx/components/processing/split_text.py +1 -1
  133. lfx/components/qdrant/qdrant.py +1 -1
  134. lfx/components/redis/redis.py +1 -1
  135. lfx/components/twelvelabs/split_video.py +10 -0
  136. lfx/components/twelvelabs/video_file.py +12 -0
  137. lfx/components/utilities/__init__.py +43 -0
  138. lfx/components/{helpers → utilities}/calculator_core.py +1 -1
  139. lfx/components/{helpers → utilities}/current_date.py +1 -1
  140. lfx/components/{processing → utilities}/python_repl_core.py +1 -1
  141. lfx/components/vectorstores/local_db.py +9 -0
  142. lfx/components/youtube/youtube_transcripts.py +118 -30
  143. lfx/custom/custom_component/component.py +57 -1
  144. lfx/custom/custom_component/custom_component.py +68 -6
  145. lfx/custom/directory_reader/directory_reader.py +5 -2
  146. lfx/graph/edge/base.py +43 -20
  147. lfx/graph/state/model.py +15 -2
  148. lfx/graph/utils.py +6 -0
  149. lfx/graph/vertex/param_handler.py +10 -7
  150. lfx/helpers/__init__.py +12 -0
  151. lfx/helpers/flow.py +117 -0
  152. lfx/inputs/input_mixin.py +24 -1
  153. lfx/inputs/inputs.py +13 -1
  154. lfx/interface/components.py +161 -83
  155. lfx/log/logger.py +5 -3
  156. lfx/schema/image.py +2 -12
  157. lfx/services/database/__init__.py +5 -0
  158. lfx/services/database/service.py +25 -0
  159. lfx/services/deps.py +87 -22
  160. lfx/services/interfaces.py +5 -0
  161. lfx/services/manager.py +24 -10
  162. lfx/services/mcp_composer/service.py +1029 -162
  163. lfx/services/session.py +5 -0
  164. lfx/services/settings/auth.py +18 -11
  165. lfx/services/settings/base.py +56 -30
  166. lfx/services/settings/constants.py +8 -0
  167. lfx/services/storage/local.py +108 -46
  168. lfx/services/storage/service.py +171 -29
  169. lfx/template/field/base.py +3 -0
  170. lfx/utils/image.py +29 -11
  171. lfx/utils/ssrf_protection.py +384 -0
  172. lfx/utils/validate_cloud.py +26 -0
  173. {lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/METADATA +38 -22
  174. {lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/RECORD +189 -160
  175. {lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/WHEEL +1 -1
  176. lfx/components/agents/altk_agent.py +0 -366
  177. lfx/components/agents/cuga_agent.py +0 -1013
  178. lfx/components/docling/docling_remote_vlm.py +0 -284
  179. lfx/components/logic/run_flow.py +0 -71
  180. lfx/components/models/embedding_model.py +0 -195
  181. lfx/components/models/language_model.py +0 -144
  182. lfx/components/processing/dataframe_to_toolset.py +0 -259
  183. /lfx/components/{data → data_source}/mock_data.py +0 -0
  184. /lfx/components/{knowledge_bases → files_and_knowledge}/ingestion.py +0 -0
  185. /lfx/components/{logic → flow_controls}/data_conditional_router.py +0 -0
  186. /lfx/components/{logic → flow_controls}/flow_tool.py +0 -0
  187. /lfx/components/{logic → flow_controls}/listen.py +0 -0
  188. /lfx/components/{logic → flow_controls}/notify.py +0 -0
  189. /lfx/components/{logic → flow_controls}/pass_message.py +0 -0
  190. /lfx/components/{logic → flow_controls}/sub_flow.py +0 -0
  191. /lfx/components/{processing → models_and_agents}/prompt.py +0 -0
  192. /lfx/components/{helpers → processing}/create_list.py +0 -0
  193. /lfx/components/{helpers → processing}/output_parser.py +0 -0
  194. /lfx/components/{helpers → processing}/store_message.py +0 -0
  195. /lfx/components/{helpers → utilities}/id_generator.py +0 -0
  196. {lfx_nightly-0.2.0.dev0.dist-info → lfx_nightly-0.2.0.dev41.dist-info}/entry_points.txt +0 -0
@@ -101,6 +101,7 @@ class ChatOllamaComponent(LCModelComponent):
101
101
  info="Refer to https://ollama.com/library for more models.",
102
102
  refresh_button=True,
103
103
  real_time_refresh=True,
104
+ required=True,
104
105
  ),
105
106
  SecretStrInput(
106
107
  name="api_key",
@@ -122,9 +123,9 @@ class ChatOllamaComponent(LCModelComponent):
122
123
  name="format",
123
124
  display_name="Format",
124
125
  info="Specify the format of the output.",
125
- advanced=False,
126
126
  table_schema=TABLE_SCHEMA,
127
127
  value=default_table_row,
128
+ show=False,
128
129
  ),
129
130
  DictInput(name="metadata", display_name="Metadata", info="Metadata to add to the run trace.", advanced=True),
130
131
  DropdownInput(
@@ -215,6 +216,14 @@ class ChatOllamaComponent(LCModelComponent):
215
216
  MessageTextInput(
216
217
  name="template", display_name="Template", info="Template to use for generating text.", advanced=True
217
218
  ),
219
+ BoolInput(
220
+ name="enable_structured_output",
221
+ display_name="Enable Structured Output",
222
+ info="Whether to enable structured output in the model.",
223
+ value=False,
224
+ advanced=False,
225
+ real_time_refresh=True,
226
+ ),
218
227
  *LCModelComponent.get_base_inputs(),
219
228
  ]
220
229
 
@@ -254,7 +263,7 @@ class ChatOllamaComponent(LCModelComponent):
254
263
  )
255
264
 
256
265
  try:
257
- output_format = self._parse_format_field(self.format)
266
+ output_format = self._parse_format_field(self.format) if self.enable_structured_output else None
258
267
  except Exception as e:
259
268
  msg = f"Failed to parse the format field: {e}"
260
269
  raise ValueError(msg) from e
@@ -264,7 +273,7 @@ class ChatOllamaComponent(LCModelComponent):
264
273
  "base_url": transformed_base_url,
265
274
  "model": self.model_name,
266
275
  "mirostat": mirostat_value,
267
- "format": output_format,
276
+ "format": output_format or None,
268
277
  "metadata": self.metadata,
269
278
  "tags": self.tags.split(",") if self.tags else None,
270
279
  "mirostat_eta": mirostat_eta,
@@ -319,6 +328,9 @@ class ChatOllamaComponent(LCModelComponent):
319
328
  return False
320
329
 
321
330
  async def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None):
331
+ if field_name == "enable_structured_output": # bind enable_structured_output boolean to format show value
332
+ build_config["format"]["show"] = field_value
333
+
322
334
  if field_name == "mirostat":
323
335
  if field_value == "Disabled":
324
336
  build_config["mirostat_eta"]["advanced"] = True
@@ -338,10 +350,17 @@ class ChatOllamaComponent(LCModelComponent):
338
350
  build_config["mirostat_tau"]["value"] = 5
339
351
 
340
352
  if field_name in {"model_name", "base_url", "tool_model_enabled"}:
341
- if await self.is_valid_ollama_url(self.base_url):
353
+ # Use field_value if base_url is being updated, otherwise use self.base_url
354
+ base_url_to_check = field_value if field_name == "base_url" else self.base_url
355
+ # Fallback to self.base_url if field_value is None or empty
356
+ if not base_url_to_check and field_name == "base_url":
357
+ base_url_to_check = self.base_url
358
+ logger.warning(f"Fetching Ollama models from updated URL: {base_url_to_check}")
359
+
360
+ if base_url_to_check and await self.is_valid_ollama_url(base_url_to_check):
342
361
  tool_model_enabled = build_config["tool_model_enabled"].get("value", False) or self.tool_model_enabled
343
362
  build_config["model_name"]["options"] = await self.get_models(
344
- self.base_url, tool_model_enabled=tool_model_enabled
363
+ base_url_to_check, tool_model_enabled=tool_model_enabled
345
364
  )
346
365
  else:
347
366
  build_config["model_name"]["options"] = []
@@ -7,98 +7,47 @@ from typing import TYPE_CHECKING, Any
7
7
  from lfx.components._importing import import_mod
8
8
 
9
9
  if TYPE_CHECKING:
10
- from lfx.components.processing.alter_metadata import AlterMetadataComponent
11
- from lfx.components.processing.batch_run import BatchRunComponent
12
10
  from lfx.components.processing.combine_text import CombineTextComponent
13
11
  from lfx.components.processing.converter import TypeConverterComponent
14
- from lfx.components.processing.create_data import CreateDataComponent
12
+ from lfx.components.processing.create_list import CreateListComponent
15
13
  from lfx.components.processing.data_operations import DataOperationsComponent
16
- from lfx.components.processing.data_to_dataframe import DataToDataFrameComponent
17
14
  from lfx.components.processing.dataframe_operations import DataFrameOperationsComponent
18
- from lfx.components.processing.dataframe_to_toolset import DataFrameToToolsetComponent
19
- from lfx.components.processing.dynamic_create_data import DynamicCreateDataComponent
20
- from lfx.components.processing.extract_key import ExtractDataKeyComponent
21
- from lfx.components.processing.filter_data import FilterDataComponent
22
- from lfx.components.processing.filter_data_values import DataFilterComponent
23
15
  from lfx.components.processing.json_cleaner import JSONCleaner
24
- from lfx.components.processing.lambda_filter import LambdaFilterComponent
25
- from lfx.components.processing.llm_router import LLMRouterComponent
26
- from lfx.components.processing.merge_data import MergeDataComponent
27
- from lfx.components.processing.message_to_data import MessageToDataComponent
16
+ from lfx.components.processing.output_parser import OutputParserComponent
28
17
  from lfx.components.processing.parse_data import ParseDataComponent
29
- from lfx.components.processing.parse_dataframe import ParseDataFrameComponent
30
- from lfx.components.processing.parse_json_data import ParseJSONDataComponent
31
18
  from lfx.components.processing.parser import ParserComponent
32
- from lfx.components.processing.prompt import PromptComponent
33
- from lfx.components.processing.python_repl_core import PythonREPLComponent
34
19
  from lfx.components.processing.regex import RegexExtractorComponent
35
- from lfx.components.processing.select_data import SelectDataComponent
36
20
  from lfx.components.processing.split_text import SplitTextComponent
37
- from lfx.components.processing.structured_output import StructuredOutputComponent
38
- from lfx.components.processing.update_data import UpdateDataComponent
21
+ from lfx.components.processing.store_message import MessageStoreComponent
39
22
 
40
23
  _dynamic_imports = {
41
- "AlterMetadataComponent": "alter_metadata",
42
- "BatchRunComponent": "batch_run",
43
24
  "CombineTextComponent": "combine_text",
44
25
  "TypeConverterComponent": "converter",
45
- "CreateDataComponent": "create_data",
26
+ "CreateListComponent": "create_list",
46
27
  "DataOperationsComponent": "data_operations",
47
- "DataToDataFrameComponent": "data_to_dataframe",
48
28
  "DataFrameOperationsComponent": "dataframe_operations",
49
- "DataFrameToToolsetComponent": "dataframe_to_toolset",
50
- "DynamicCreateDataComponent": "dynamic_create_data",
51
- "ExtractDataKeyComponent": "extract_key",
52
- "FilterDataComponent": "filter_data",
53
- "DataFilterComponent": "filter_data_values",
54
29
  "JSONCleaner": "json_cleaner",
55
- "LambdaFilterComponent": "lambda_filter",
56
- "LLMRouterComponent": "llm_router",
57
- "MergeDataComponent": "merge_data",
58
- "MessageToDataComponent": "message_to_data",
30
+ "OutputParserComponent": "output_parser",
59
31
  "ParseDataComponent": "parse_data",
60
- "ParseDataFrameComponent": "parse_dataframe",
61
- "ParseJSONDataComponent": "parse_json_data",
62
32
  "ParserComponent": "parser",
63
- "PromptComponent": "prompt",
64
- "PythonREPLComponent": "python_repl_core",
65
33
  "RegexExtractorComponent": "regex",
66
- "SelectDataComponent": "select_data",
67
34
  "SplitTextComponent": "split_text",
68
- "StructuredOutputComponent": "structured_output",
69
- "UpdateDataComponent": "update_data",
35
+ "MessageStoreComponent": "store_message",
70
36
  }
71
37
 
72
38
  __all__ = [
73
- "AlterMetadataComponent",
74
- "BatchRunComponent",
75
39
  "CombineTextComponent",
76
- "CreateDataComponent",
77
- "DataFilterComponent",
40
+ "CreateListComponent",
78
41
  "DataFrameOperationsComponent",
79
- "DataFrameToToolsetComponent",
80
42
  "DataOperationsComponent",
81
- "DataToDataFrameComponent",
82
- "DynamicCreateDataComponent",
83
- "ExtractDataKeyComponent",
84
- "FilterDataComponent",
85
43
  "JSONCleaner",
86
- "LLMRouterComponent",
87
- "LambdaFilterComponent",
88
- "MergeDataComponent",
89
- "MessageToDataComponent",
44
+ "MessageStoreComponent",
45
+ "OutputParserComponent",
90
46
  "ParseDataComponent",
91
- "ParseDataFrameComponent",
92
- "ParseJSONDataComponent",
93
47
  "ParserComponent",
94
- "PromptComponent",
95
- "PythonREPLComponent",
96
48
  "RegexExtractorComponent",
97
- "SelectDataComponent",
98
49
  "SplitTextComponent",
99
- "StructuredOutputComponent",
100
50
  "TypeConverterComponent",
101
- "UpdateDataComponent",
102
51
  ]
103
52
 
104
53
 
@@ -140,7 +140,7 @@ def _parse_csv_to_data(text: str) -> Data:
140
140
  class TypeConverterComponent(Component):
141
141
  display_name = "Type Convert"
142
142
  description = "Convert between different types (Message, Data, DataFrame)"
143
- documentation: str = "https://docs.langflow.org/components-processing#type-convert"
143
+ documentation: str = "https://docs.langflow.org/type-convert"
144
144
  icon = "repeat"
145
145
 
146
146
  inputs = [
@@ -10,7 +10,7 @@ from lfx.schema.dataframe import DataFrame
10
10
  class DataFrameOperationsComponent(Component):
11
11
  display_name = "DataFrame Operations"
12
12
  description = "Perform various operations on a DataFrame."
13
- documentation: str = "https://docs.langflow.org/components-processing#dataframe-operations"
13
+ documentation: str = "https://docs.langflow.org/dataframe-operations"
14
14
  icon = "table"
15
15
  name = "DataFrameOperations"
16
16
 
@@ -84,8 +84,8 @@ class ParseJSONDataComponent(Component):
84
84
 
85
85
  full_filter_str = json.dumps(to_filter_as_dict)
86
86
 
87
- logger.info("to_filter: ", to_filter)
87
+ logger.info("to_filter: %s", to_filter)
88
88
 
89
89
  results = jq.compile(self.query).input_text(full_filter_str).all()
90
- logger.info("results: ", results)
90
+ logger.info("results: %s", results)
91
91
  return [Data(data=value) if isinstance(value, dict) else Data(text=str(value)) for value in results]
@@ -10,7 +10,7 @@ from lfx.template.field.base import Output
10
10
  class ParserComponent(Component):
11
11
  display_name = "Parser"
12
12
  description = "Extracts text using a template."
13
- documentation: str = "https://docs.langflow.org/components-processing#parser"
13
+ documentation: str = "https://docs.langflow.org/parser"
14
14
  icon = "braces"
15
15
 
16
16
  inputs = [
@@ -11,7 +11,7 @@ from lfx.utils.util import unescape_string
11
11
  class SplitTextComponent(Component):
12
12
  display_name: str = "Split Text"
13
13
  description: str = "Split text into chunks based on specified criteria."
14
- documentation: str = "https://docs.langflow.org/components-processing#split-text"
14
+ documentation: str = "https://docs.langflow.org/split-text"
15
15
  icon = "scissors-line-dashed"
16
16
  name = "SplitText"
17
17
 
@@ -1,5 +1,5 @@
1
- from langchain.embeddings.base import Embeddings
2
1
  from langchain_community.vectorstores import Qdrant
2
+ from langchain_core.embeddings import Embeddings
3
3
 
4
4
  from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
5
5
  from lfx.helpers.data import docs_to_data
@@ -1,7 +1,7 @@
1
1
  from pathlib import Path
2
2
 
3
- from langchain.text_splitter import CharacterTextSplitter
4
3
  from langchain_community.vectorstores.redis import Redis
4
+ from langchain_text_splitters import CharacterTextSplitter
5
5
 
6
6
  from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
7
7
  from lfx.helpers.data import docs_to_data
@@ -9,6 +9,13 @@ from lfx.custom import Component
9
9
  from lfx.inputs import BoolInput, DropdownInput, HandleInput, IntInput
10
10
  from lfx.schema import Data
11
11
  from lfx.template import Output
12
+ from lfx.utils.validate_cloud import raise_error_if_astra_cloud_disable_component
13
+
14
+ disable_component_in_astra_cloud_msg = (
15
+ "Video processing is not supported in Astra cloud environment. "
16
+ "Video components require local file system access for processing. "
17
+ "Please use local storage mode or process videos locally before uploading."
18
+ )
12
19
 
13
20
 
14
21
  class SplitVideoComponent(Component):
@@ -267,6 +274,9 @@ class SplitVideoComponent(Component):
267
274
 
268
275
  def process(self) -> list[Data]:
269
276
  """Process the input video and return a list of Data objects containing the clips."""
277
+ # Check if we're in Astra cloud environment and raise an error if we are.
278
+ raise_error_if_astra_cloud_disable_component(disable_component_in_astra_cloud_msg)
279
+
270
280
  try:
271
281
  # Get the input video path from the previous component
272
282
  if not hasattr(self, "videodata") or not isinstance(self.videodata, list) or len(self.videodata) != 1:
@@ -3,6 +3,13 @@ from pathlib import Path
3
3
  from lfx.base.data import BaseFileComponent
4
4
  from lfx.io import FileInput
5
5
  from lfx.schema import Data, DataFrame
6
+ from lfx.utils.validate_cloud import raise_error_if_astra_cloud_disable_component
7
+
8
+ disable_component_in_astra_cloud_msg = (
9
+ "Video processing is not supported in Astra cloud environment. "
10
+ "Video components require local file system access for processing. "
11
+ "Please use local storage mode or process videos locally before uploading."
12
+ )
6
13
 
7
14
 
8
15
  class VideoFileComponent(BaseFileComponent):
@@ -97,6 +104,8 @@ class VideoFileComponent(BaseFileComponent):
97
104
 
98
105
  def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
99
106
  """Process video files."""
107
+ # Check if we're in Astra cloud environment and raise an error if we are.
108
+ raise_error_if_astra_cloud_disable_component(disable_component_in_astra_cloud_msg)
100
109
  self.log(f"DEBUG: Processing video files: {len(file_list)}")
101
110
 
102
111
  if not file_list:
@@ -137,6 +146,9 @@ class VideoFileComponent(BaseFileComponent):
137
146
 
138
147
  def load_files(self) -> DataFrame:
139
148
  """Load video files and return a list of Data objects."""
149
+ # Check if we're in Astra cloud environment and raise an error if we are.
150
+ raise_error_if_astra_cloud_disable_component(disable_component_in_astra_cloud_msg)
151
+
140
152
  try:
141
153
  self.log("DEBUG: Starting video file load")
142
154
  if not hasattr(self, "file_path") or not self.file_path:
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ from lfx.components._importing import import_mod
6
+
7
+ if TYPE_CHECKING:
8
+ from lfx.components.utilities.calculator_core import CalculatorComponent
9
+ from lfx.components.utilities.current_date import CurrentDateComponent
10
+ from lfx.components.utilities.id_generator import IDGeneratorComponent
11
+ from lfx.components.utilities.python_repl_core import PythonREPLComponent
12
+
13
+ _dynamic_imports = {
14
+ "CalculatorComponent": "calculator_core",
15
+ "CurrentDateComponent": "current_date",
16
+ "IDGeneratorComponent": "id_generator",
17
+ "PythonREPLComponent": "python_repl_core",
18
+ }
19
+
20
+ __all__ = [
21
+ "CalculatorComponent",
22
+ "CurrentDateComponent",
23
+ "IDGeneratorComponent",
24
+ "PythonREPLComponent",
25
+ ]
26
+
27
+
28
+ def __getattr__(attr_name: str) -> Any:
29
+ """Lazily import utility components on attribute access."""
30
+ if attr_name not in _dynamic_imports:
31
+ msg = f"module '{__name__}' has no attribute '{attr_name}'"
32
+ raise AttributeError(msg)
33
+ try:
34
+ result = import_mod(attr_name, _dynamic_imports[attr_name], __spec__.parent)
35
+ except (ModuleNotFoundError, ImportError, AttributeError) as e:
36
+ msg = f"Could not import '{attr_name}' from '{__name__}': {e}"
37
+ raise AttributeError(msg) from e
38
+ globals()[attr_name] = result
39
+ return result
40
+
41
+
42
+ def __dir__() -> list[str]:
43
+ return list(__all__)
@@ -11,7 +11,7 @@ from lfx.schema.data import Data
11
11
  class CalculatorComponent(Component):
12
12
  display_name = "Calculator"
13
13
  description = "Perform basic arithmetic operations on a given expression."
14
- documentation: str = "https://docs.langflow.org/components-helpers#calculator"
14
+ documentation: str = "https://docs.langflow.org/calculator"
15
15
  icon = "calculator"
16
16
 
17
17
  # Cache operators dictionary as a class variable
@@ -10,7 +10,7 @@ from lfx.schema.message import Message
10
10
  class CurrentDateComponent(Component):
11
11
  display_name = "Current Date"
12
12
  description = "Returns the current date and time in the selected timezone."
13
- documentation: str = "https://docs.langflow.org/components-helpers#current-date"
13
+ documentation: str = "https://docs.langflow.org/current-date"
14
14
  icon = "clock"
15
15
  name = "CurrentDate"
16
16
 
@@ -10,7 +10,7 @@ from lfx.schema.data import Data
10
10
  class PythonREPLComponent(Component):
11
11
  display_name = "Python Interpreter"
12
12
  description = "Run Python code with optional imports. Use print() to see the output."
13
- documentation: str = "https://docs.langflow.org/components-processing#python-interpreter"
13
+ documentation: str = "https://docs.langflow.org/python-interpreter"
14
14
  icon = "square-terminal"
15
15
 
16
16
  inputs = [
@@ -12,6 +12,13 @@ from lfx.log.logger import logger
12
12
  from lfx.schema.data import Data
13
13
  from lfx.schema.dataframe import DataFrame
14
14
  from lfx.template.field.base import Output
15
+ from lfx.utils.validate_cloud import raise_error_if_astra_cloud_disable_component
16
+
17
+ disable_component_in_astra_cloud_msg = (
18
+ "Local vector stores are not supported in S3/cloud mode. "
19
+ "Local vector stores require local file system access for persistence. "
20
+ "Please use cloud-based vector stores (Pinecone, Weaviate, etc.) or local storage mode."
21
+ )
15
22
 
16
23
 
17
24
  class LocalDBComponent(LCVectorStoreComponent):
@@ -193,6 +200,8 @@ class LocalDBComponent(LCVectorStoreComponent):
193
200
  @check_cached_vector_store
194
201
  def build_vector_store(self) -> Chroma:
195
202
  """Builds the Chroma object."""
203
+ raise_error_if_astra_cloud_disable_component(disable_component_in_astra_cloud_msg)
204
+
196
205
  try:
197
206
  from langchain_chroma import Chroma
198
207
  except ImportError as e:
@@ -1,7 +1,7 @@
1
+ import re
2
+
1
3
  import pandas as pd
2
- import youtube_transcript_api
3
- from langchain_community.document_loaders import YoutubeLoader
4
- from langchain_community.document_loaders.youtube import TranscriptFormat
4
+ from youtube_transcript_api import NoTranscriptFound, TranscriptsDisabled, YouTubeTranscriptApi
5
5
 
6
6
  from lfx.custom.custom_component.component import Component
7
7
  from lfx.inputs.inputs import DropdownInput, IntInput, MultilineInput
@@ -48,43 +48,133 @@ class YouTubeTranscriptsComponent(Component):
48
48
  Output(name="data_output", display_name="Transcript + Source", method="get_data_output"),
49
49
  ]
50
50
 
51
+ def _extract_video_id(self, url: str) -> str:
52
+ """Extract video ID from YouTube URL."""
53
+ patterns = [
54
+ r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([^&\n?#]+)",
55
+ r"youtube\.com\/watch\?.*?v=([^&\n?#]+)",
56
+ ]
57
+ for pattern in patterns:
58
+ match = re.search(pattern, url)
59
+ if match:
60
+ return match.group(1)
61
+ msg = f"Could not extract video ID from URL: {url}"
62
+ raise ValueError(msg)
63
+
51
64
  def _load_transcripts(self, *, as_chunks: bool = True):
52
65
  """Internal method to load transcripts from YouTube."""
53
- loader = YoutubeLoader.from_youtube_url(
54
- self.url,
55
- transcript_format=TranscriptFormat.CHUNKS if as_chunks else TranscriptFormat.TEXT,
56
- chunk_size_seconds=self.chunk_size_seconds,
57
- translation=self.translation or None,
58
- )
59
- return loader.load()
66
+ try:
67
+ video_id = self._extract_video_id(self.url)
68
+ except ValueError as e:
69
+ msg = f"Invalid YouTube URL: {e}"
70
+ raise ValueError(msg) from e
71
+
72
+ try:
73
+ # Use new v1.0+ API - create instance
74
+ api = YouTubeTranscriptApi()
75
+ transcript_list = api.list(video_id)
76
+
77
+ # Get transcript in specified language or default to English
78
+ if self.translation:
79
+ # Get any available transcript and translate it
80
+ transcript = transcript_list.find_transcript(["en"])
81
+ transcript = transcript.translate(self.translation)
82
+ else:
83
+ # Try to get transcript in available languages
84
+ try:
85
+ transcript = transcript_list.find_transcript(["en"])
86
+ except NoTranscriptFound:
87
+ # Try auto-generated English
88
+ transcript = transcript_list.find_generated_transcript(["en"])
89
+
90
+ # Fetch the transcript data
91
+ transcript_data = api.fetch(transcript.video_id, [transcript.language_code])
92
+
93
+ except (TranscriptsDisabled, NoTranscriptFound) as e:
94
+ error_type = type(e).__name__
95
+ msg = (
96
+ f"Could not retrieve transcripts for video '{video_id}'. "
97
+ "Possible reasons:\n"
98
+ "1. This video does not have captions/transcripts enabled\n"
99
+ "2. The video is private, restricted, or deleted\n"
100
+ f"\nTechnical error ({error_type}): {e}"
101
+ )
102
+ raise RuntimeError(msg) from e
103
+ except Exception as e:
104
+ error_type = type(e).__name__
105
+ msg = (
106
+ f"Could not retrieve transcripts for video '{video_id}'. "
107
+ "Possible reasons:\n"
108
+ "1. This video does not have captions/transcripts enabled\n"
109
+ "2. The video is private, restricted, or deleted\n"
110
+ "3. YouTube is blocking automated requests\n"
111
+ f"\nTechnical error ({error_type}): {e}"
112
+ )
113
+ raise RuntimeError(msg) from e
114
+
115
+ if as_chunks:
116
+ # Group into chunks based on chunk_size_seconds
117
+ return self._chunk_transcript(transcript_data)
118
+ # Return as continuous text
119
+ return transcript_data
120
+
121
+ def _chunk_transcript(self, transcript_data):
122
+ """Group transcript segments into time-based chunks."""
123
+ chunks = []
124
+ current_chunk = []
125
+ chunk_start = 0
126
+
127
+ for segment in transcript_data:
128
+ # Handle both dict (old API) and object (new API) formats
129
+ segment_start = segment.start if hasattr(segment, "start") else segment["start"]
130
+
131
+ # If this segment starts beyond the current chunk window, start a new chunk
132
+ if segment_start - chunk_start >= self.chunk_size_seconds and current_chunk:
133
+ chunk_text = " ".join(s.text if hasattr(s, "text") else s["text"] for s in current_chunk)
134
+ chunks.append({"start": chunk_start, "text": chunk_text})
135
+ current_chunk = []
136
+ chunk_start = segment_start
137
+
138
+ current_chunk.append(segment)
139
+
140
+ # Add the last chunk
141
+ if current_chunk:
142
+ chunk_text = " ".join(s.text if hasattr(s, "text") else s["text"] for s in current_chunk)
143
+ chunks.append({"start": chunk_start, "text": chunk_text})
144
+
145
+ return chunks
60
146
 
61
147
  def get_dataframe_output(self) -> DataFrame:
62
148
  """Provides transcript output as a DataFrame with timestamp and text columns."""
63
149
  try:
64
- transcripts = self._load_transcripts(as_chunks=True)
150
+ chunks = self._load_transcripts(as_chunks=True)
65
151
 
66
152
  # Create DataFrame with timestamp and text columns
67
153
  data = []
68
- for doc in transcripts:
69
- start_seconds = int(doc.metadata["start_seconds"])
154
+ for chunk in chunks:
155
+ start_seconds = int(chunk["start"])
70
156
  start_minutes = start_seconds // 60
71
- start_seconds %= 60
72
- timestamp = f"{start_minutes:02d}:{start_seconds:02d}"
73
- data.append({"timestamp": timestamp, "text": doc.page_content})
157
+ start_seconds_remainder = start_seconds % 60
158
+ timestamp = f"{start_minutes:02d}:{start_seconds_remainder:02d}"
159
+ data.append({"timestamp": timestamp, "text": chunk["text"]})
74
160
 
75
161
  return DataFrame(pd.DataFrame(data))
76
162
 
77
- except (youtube_transcript_api.TranscriptsDisabled, youtube_transcript_api.NoTranscriptFound) as exc:
78
- return DataFrame(pd.DataFrame({"error": [f"Failed to get YouTube transcripts: {exc!s}"]}))
163
+ except (TranscriptsDisabled, NoTranscriptFound, RuntimeError, ValueError) as exc:
164
+ error_msg = f"Failed to get YouTube transcripts: {exc!s}"
165
+ return DataFrame(pd.DataFrame({"error": [error_msg]}))
79
166
 
80
167
  def get_message_output(self) -> Message:
81
168
  """Provides transcript output as continuous text."""
82
169
  try:
83
- transcripts = self._load_transcripts(as_chunks=False)
84
- result = transcripts[0].page_content
170
+ transcript_data = self._load_transcripts(as_chunks=False)
171
+ # Handle both dict (old API) and object (new API) formats
172
+ result = " ".join(
173
+ segment.text if hasattr(segment, "text") else segment["text"] for segment in transcript_data
174
+ )
85
175
  return Message(text=result)
86
176
 
87
- except (youtube_transcript_api.TranscriptsDisabled, youtube_transcript_api.NoTranscriptFound) as exc:
177
+ except (TranscriptsDisabled, NoTranscriptFound, RuntimeError, ValueError) as exc:
88
178
  error_msg = f"Failed to get YouTube transcripts: {exc!s}"
89
179
  return Message(text=error_msg)
90
180
 
@@ -100,19 +190,17 @@ class YouTubeTranscriptsComponent(Component):
100
190
  default_data = {"transcript": "", "video_url": self.url, "error": None}
101
191
 
102
192
  try:
103
- transcripts = self._load_transcripts(as_chunks=False)
104
- if not transcripts:
193
+ transcript_data = self._load_transcripts(as_chunks=False)
194
+ if not transcript_data:
105
195
  default_data["error"] = "No transcripts found."
106
196
  return Data(data=default_data)
107
197
 
108
- # Combine all transcript parts
109
- full_transcript = " ".join(doc.page_content for doc in transcripts)
198
+ # Combine all transcript segments - handle both dict and object formats
199
+ full_transcript = " ".join(
200
+ segment.text if hasattr(segment, "text") else segment["text"] for segment in transcript_data
201
+ )
110
202
  return Data(data={"transcript": full_transcript, "video_url": self.url})
111
203
 
112
- except (
113
- youtube_transcript_api.TranscriptsDisabled,
114
- youtube_transcript_api.NoTranscriptFound,
115
- youtube_transcript_api.CouldNotRetrieveTranscript,
116
- ) as exc:
204
+ except (TranscriptsDisabled, NoTranscriptFound, RuntimeError, ValueError) as exc:
117
205
  default_data["error"] = str(exc)
118
206
  return Data(data=default_data)