lfx-nightly 0.2.0.dev41__py3-none-any.whl → 0.3.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. lfx/__main__.py +137 -6
  2. lfx/_assets/component_index.json +1 -1
  3. lfx/base/agents/agent.py +10 -6
  4. lfx/base/agents/altk_base_agent.py +5 -3
  5. lfx/base/agents/altk_tool_wrappers.py +1 -1
  6. lfx/base/agents/events.py +1 -1
  7. lfx/base/agents/utils.py +4 -0
  8. lfx/base/composio/composio_base.py +78 -41
  9. lfx/base/data/cloud_storage_utils.py +156 -0
  10. lfx/base/data/docling_utils.py +130 -55
  11. lfx/base/datastax/astradb_base.py +75 -64
  12. lfx/base/embeddings/embeddings_class.py +113 -0
  13. lfx/base/models/__init__.py +11 -1
  14. lfx/base/models/google_generative_ai_constants.py +33 -9
  15. lfx/base/models/model_metadata.py +6 -0
  16. lfx/base/models/ollama_constants.py +196 -30
  17. lfx/base/models/openai_constants.py +37 -10
  18. lfx/base/models/unified_models.py +1123 -0
  19. lfx/base/models/watsonx_constants.py +43 -4
  20. lfx/base/prompts/api_utils.py +40 -5
  21. lfx/base/tools/component_tool.py +2 -9
  22. lfx/cli/__init__.py +10 -2
  23. lfx/cli/commands.py +3 -0
  24. lfx/cli/run.py +65 -409
  25. lfx/cli/script_loader.py +18 -7
  26. lfx/cli/validation.py +6 -3
  27. lfx/components/__init__.py +0 -3
  28. lfx/components/composio/github_composio.py +1 -1
  29. lfx/components/cuga/cuga_agent.py +39 -27
  30. lfx/components/data_source/api_request.py +4 -2
  31. lfx/components/datastax/astradb_assistant_manager.py +4 -2
  32. lfx/components/docling/__init__.py +45 -11
  33. lfx/components/docling/docling_inline.py +39 -49
  34. lfx/components/docling/docling_remote.py +1 -0
  35. lfx/components/elastic/opensearch_multimodal.py +1733 -0
  36. lfx/components/files_and_knowledge/file.py +384 -36
  37. lfx/components/files_and_knowledge/ingestion.py +8 -0
  38. lfx/components/files_and_knowledge/retrieval.py +10 -0
  39. lfx/components/files_and_knowledge/save_file.py +91 -88
  40. lfx/components/langchain_utilities/ibm_granite_handler.py +211 -0
  41. lfx/components/langchain_utilities/tool_calling.py +37 -6
  42. lfx/components/llm_operations/batch_run.py +64 -18
  43. lfx/components/llm_operations/lambda_filter.py +213 -101
  44. lfx/components/llm_operations/llm_conditional_router.py +39 -7
  45. lfx/components/llm_operations/structured_output.py +38 -12
  46. lfx/components/models/__init__.py +16 -74
  47. lfx/components/models_and_agents/agent.py +51 -203
  48. lfx/components/models_and_agents/embedding_model.py +171 -255
  49. lfx/components/models_and_agents/language_model.py +54 -318
  50. lfx/components/models_and_agents/mcp_component.py +96 -10
  51. lfx/components/models_and_agents/prompt.py +105 -18
  52. lfx/components/ollama/ollama_embeddings.py +111 -29
  53. lfx/components/openai/openai_chat_model.py +1 -1
  54. lfx/components/processing/text_operations.py +580 -0
  55. lfx/components/vllm/__init__.py +37 -0
  56. lfx/components/vllm/vllm.py +141 -0
  57. lfx/components/vllm/vllm_embeddings.py +110 -0
  58. lfx/custom/custom_component/component.py +65 -10
  59. lfx/custom/custom_component/custom_component.py +8 -6
  60. lfx/events/observability/__init__.py +0 -0
  61. lfx/events/observability/lifecycle_events.py +111 -0
  62. lfx/field_typing/__init__.py +57 -58
  63. lfx/graph/graph/base.py +40 -1
  64. lfx/graph/utils.py +109 -30
  65. lfx/graph/vertex/base.py +75 -23
  66. lfx/graph/vertex/vertex_types.py +0 -5
  67. lfx/inputs/__init__.py +2 -0
  68. lfx/inputs/input_mixin.py +55 -0
  69. lfx/inputs/inputs.py +120 -0
  70. lfx/interface/components.py +24 -7
  71. lfx/interface/initialize/loading.py +42 -12
  72. lfx/io/__init__.py +2 -0
  73. lfx/run/__init__.py +5 -0
  74. lfx/run/base.py +464 -0
  75. lfx/schema/__init__.py +50 -0
  76. lfx/schema/data.py +1 -1
  77. lfx/schema/image.py +26 -7
  78. lfx/schema/message.py +104 -11
  79. lfx/schema/workflow.py +171 -0
  80. lfx/services/deps.py +12 -0
  81. lfx/services/interfaces.py +43 -1
  82. lfx/services/mcp_composer/service.py +7 -1
  83. lfx/services/schema.py +1 -0
  84. lfx/services/settings/auth.py +95 -4
  85. lfx/services/settings/base.py +11 -1
  86. lfx/services/settings/constants.py +2 -0
  87. lfx/services/settings/utils.py +82 -0
  88. lfx/services/storage/local.py +13 -8
  89. lfx/services/transaction/__init__.py +5 -0
  90. lfx/services/transaction/service.py +35 -0
  91. lfx/tests/unit/components/__init__.py +0 -0
  92. lfx/utils/constants.py +2 -0
  93. lfx/utils/mustache_security.py +79 -0
  94. lfx/utils/validate_cloud.py +81 -3
  95. {lfx_nightly-0.2.0.dev41.dist-info → lfx_nightly-0.3.0.dev3.dist-info}/METADATA +7 -2
  96. {lfx_nightly-0.2.0.dev41.dist-info → lfx_nightly-0.3.0.dev3.dist-info}/RECORD +98 -80
  97. {lfx_nightly-0.2.0.dev41.dist-info → lfx_nightly-0.3.0.dev3.dist-info}/WHEEL +0 -0
  98. {lfx_nightly-0.2.0.dev41.dist-info → lfx_nightly-0.3.0.dev3.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,580 @@
1
+ import contextlib
2
+ import re
3
+ from typing import Any
4
+
5
+ import pandas as pd
6
+
7
+ from lfx.custom import Component
8
+ from lfx.field_typing import RangeSpec
9
+ from lfx.inputs import (
10
+ BoolInput,
11
+ DropdownInput,
12
+ IntInput,
13
+ SortableListInput,
14
+ StrInput,
15
+ )
16
+ from lfx.inputs.inputs import MultilineInput
17
+ from lfx.io import Output
18
+ from lfx.schema.data import Data
19
+ from lfx.schema.dataframe import DataFrame
20
+ from lfx.schema.message import Message
21
+
22
+
23
+ class TextOperations(Component):
24
+ display_name = "Text Operations"
25
+ description = "Perform various text processing operations including text-to-DataFrame conversion."
26
+ icon = "type"
27
+ name = "TextOperations"
28
+
29
+ # Configuration for operation-specific input fields
30
+ OPERATION_FIELDS: dict[str, list[str]] = {
31
+ "Text to DataFrame": ["table_separator", "has_header"],
32
+ "Word Count": ["count_words", "count_characters", "count_lines"],
33
+ "Case Conversion": ["case_type"],
34
+ "Text Replace": ["search_pattern", "replacement_text", "use_regex"],
35
+ "Text Extract": ["extract_pattern", "max_matches"],
36
+ "Text Head": ["head_characters"],
37
+ "Text Tail": ["tail_characters"],
38
+ "Text Strip": ["strip_mode", "strip_characters"],
39
+ "Text Join": ["text_input_2"],
40
+ "Text Clean": ["remove_extra_spaces", "remove_special_chars", "remove_empty_lines"],
41
+ }
42
+
43
+ ALL_DYNAMIC_FIELDS: list[str] = [
44
+ "table_separator",
45
+ "has_header",
46
+ "count_words",
47
+ "count_characters",
48
+ "count_lines",
49
+ "case_type",
50
+ "search_pattern",
51
+ "replacement_text",
52
+ "use_regex",
53
+ "extract_pattern",
54
+ "max_matches",
55
+ "head_characters",
56
+ "tail_characters",
57
+ "strip_mode",
58
+ "strip_characters",
59
+ "text_input_2",
60
+ "remove_extra_spaces",
61
+ "remove_special_chars",
62
+ "remove_empty_lines",
63
+ ]
64
+
65
+ CASE_CONVERTERS: dict[str, Any] = {
66
+ "uppercase": str.upper,
67
+ "lowercase": str.lower,
68
+ "title": str.title,
69
+ "capitalize": str.capitalize,
70
+ "swapcase": str.swapcase,
71
+ }
72
+
73
+ inputs = [
74
+ MultilineInput(
75
+ name="text_input",
76
+ display_name="Text Input",
77
+ info="The input text to process.",
78
+ required=True,
79
+ ),
80
+ SortableListInput(
81
+ name="operation",
82
+ display_name="Operation",
83
+ placeholder="Select Operation",
84
+ info="Select the text operation to perform.",
85
+ options=[
86
+ {"name": "Word Count", "icon": "hash"},
87
+ {"name": "Case Conversion", "icon": "type"},
88
+ {"name": "Text Replace", "icon": "replace"},
89
+ {"name": "Text Extract", "icon": "search"},
90
+ {"name": "Text Head", "icon": "chevron-left"},
91
+ {"name": "Text Tail", "icon": "chevron-right"},
92
+ {"name": "Text Strip", "icon": "minus"},
93
+ {"name": "Text Join", "icon": "link"},
94
+ {"name": "Text Clean", "icon": "sparkles"},
95
+ {"name": "Text to DataFrame", "icon": "table"},
96
+ ],
97
+ real_time_refresh=True,
98
+ limit=1,
99
+ ),
100
+ StrInput(
101
+ name="table_separator",
102
+ display_name="Table Separator",
103
+ info="Separator used in the table (default: '|').",
104
+ value="|",
105
+ dynamic=True,
106
+ show=False,
107
+ ),
108
+ BoolInput(
109
+ name="has_header",
110
+ display_name="Has Header",
111
+ info="Whether the table has a header row.",
112
+ value=True,
113
+ dynamic=True,
114
+ advanced=True,
115
+ show=False,
116
+ ),
117
+ BoolInput(
118
+ name="count_words",
119
+ display_name="Count Words",
120
+ info="Include word count in analysis.",
121
+ value=True,
122
+ dynamic=True,
123
+ advanced=True,
124
+ show=False,
125
+ ),
126
+ BoolInput(
127
+ name="count_characters",
128
+ display_name="Count Characters",
129
+ info="Include character count in analysis.",
130
+ value=True,
131
+ dynamic=True,
132
+ advanced=True,
133
+ show=False,
134
+ ),
135
+ BoolInput(
136
+ name="count_lines",
137
+ display_name="Count Lines",
138
+ info="Include line count in analysis.",
139
+ value=True,
140
+ dynamic=True,
141
+ advanced=True,
142
+ show=False,
143
+ ),
144
+ DropdownInput(
145
+ name="case_type",
146
+ display_name="Case Type",
147
+ options=["uppercase", "lowercase", "title", "capitalize", "swapcase"],
148
+ value="lowercase",
149
+ info="Type of case conversion to apply.",
150
+ dynamic=True,
151
+ show=False,
152
+ ),
153
+ BoolInput(
154
+ name="use_regex",
155
+ display_name="Use Regex",
156
+ info="Whether to treat search pattern as regex.",
157
+ value=False,
158
+ dynamic=True,
159
+ show=False,
160
+ ),
161
+ StrInput(
162
+ name="search_pattern",
163
+ display_name="Search Pattern",
164
+ info="Text pattern to search for (supports regex).",
165
+ dynamic=True,
166
+ show=False,
167
+ ),
168
+ StrInput(
169
+ name="replacement_text",
170
+ display_name="Replacement Text",
171
+ info="Text to replace the search pattern with.",
172
+ dynamic=True,
173
+ show=False,
174
+ ),
175
+ StrInput(
176
+ name="extract_pattern",
177
+ display_name="Extract Pattern",
178
+ info="Regex pattern to extract from text.",
179
+ dynamic=True,
180
+ show=False,
181
+ ),
182
+ IntInput(
183
+ name="max_matches",
184
+ display_name="Max Matches",
185
+ info="Maximum number of matches to extract.",
186
+ value=10,
187
+ dynamic=True,
188
+ show=False,
189
+ ),
190
+ IntInput(
191
+ name="head_characters",
192
+ display_name="Characters from Start",
193
+ info="Number of characters to extract from the beginning of text. Must be non-negative.",
194
+ value=100,
195
+ dynamic=True,
196
+ show=False,
197
+ range_spec=RangeSpec(min=0, max=1000000, step=1, step_type="int"),
198
+ ),
199
+ IntInput(
200
+ name="tail_characters",
201
+ display_name="Characters from End",
202
+ info="Number of characters to extract from the end of text. Must be non-negative.",
203
+ value=100,
204
+ dynamic=True,
205
+ show=False,
206
+ range_spec=RangeSpec(min=0, max=1000000, step=1, step_type="int"),
207
+ ),
208
+ DropdownInput(
209
+ name="strip_mode",
210
+ display_name="Strip Mode",
211
+ options=["both", "left", "right"],
212
+ value="both",
213
+ info="Which sides to strip whitespace from.",
214
+ dynamic=True,
215
+ show=False,
216
+ ),
217
+ StrInput(
218
+ name="strip_characters",
219
+ display_name="Characters to Strip",
220
+ info="Specific characters to remove (leave empty for whitespace).",
221
+ value="",
222
+ dynamic=True,
223
+ show=False,
224
+ ),
225
+ MultilineInput(
226
+ name="text_input_2",
227
+ display_name="Second Text Input",
228
+ info="Second text to join with the first text.",
229
+ dynamic=True,
230
+ show=False,
231
+ ),
232
+ BoolInput(
233
+ name="remove_extra_spaces",
234
+ display_name="Remove Extra Spaces",
235
+ info="Remove multiple consecutive spaces.",
236
+ value=True,
237
+ dynamic=True,
238
+ show=False,
239
+ ),
240
+ BoolInput(
241
+ name="remove_special_chars",
242
+ display_name="Remove Special Characters",
243
+ info="Remove special characters except alphanumeric and spaces.",
244
+ value=False,
245
+ dynamic=True,
246
+ show=False,
247
+ ),
248
+ BoolInput(
249
+ name="remove_empty_lines",
250
+ display_name="Remove Empty Lines",
251
+ info="Remove empty lines from text.",
252
+ value=False,
253
+ dynamic=True,
254
+ show=False,
255
+ ),
256
+ ]
257
+
258
+ outputs = []
259
+
260
+ def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:
261
+ """Update build configuration to show/hide relevant inputs based on operation."""
262
+ for field in self.ALL_DYNAMIC_FIELDS:
263
+ if field in build_config:
264
+ build_config[field]["show"] = False
265
+
266
+ if field_name != "operation":
267
+ return build_config
268
+
269
+ operation_name = self._extract_operation_name(field_value)
270
+ if not operation_name:
271
+ return build_config
272
+
273
+ fields_to_show = self.OPERATION_FIELDS.get(operation_name, [])
274
+ for field in fields_to_show:
275
+ if field in build_config:
276
+ build_config[field]["show"] = True
277
+
278
+ return build_config
279
+
280
+ def update_outputs(self, frontend_node: dict, field_name: str, field_value: Any) -> dict:
281
+ """Create dynamic outputs based on selected operation."""
282
+ if field_name != "operation":
283
+ return frontend_node
284
+
285
+ frontend_node["outputs"] = []
286
+ operation_name = self._extract_operation_name(field_value)
287
+
288
+ if operation_name == "Word Count":
289
+ frontend_node["outputs"].append(Output(display_name="Data", name="data", method="get_data"))
290
+ elif operation_name == "Text to DataFrame":
291
+ frontend_node["outputs"].append(Output(display_name="DataFrame", name="dataframe", method="get_dataframe"))
292
+ elif operation_name == "Text Join":
293
+ frontend_node["outputs"].append(Output(display_name="Text", name="text", method="get_text"))
294
+ frontend_node["outputs"].append(Output(display_name="Message", name="message", method="get_message"))
295
+ elif operation_name:
296
+ frontend_node["outputs"].append(Output(display_name="Message", name="message", method="get_message"))
297
+
298
+ return frontend_node
299
+
300
+ def _extract_operation_name(self, field_value: Any) -> str:
301
+ """Extract operation name from SortableListInput value."""
302
+ if isinstance(field_value, list) and len(field_value) > 0:
303
+ return field_value[0].get("name", "")
304
+ return ""
305
+
306
+ def get_operation_name(self) -> str:
307
+ """Get the selected operation name."""
308
+ operation_input = getattr(self, "operation", [])
309
+ return self._extract_operation_name(operation_input)
310
+
311
+ def process_text(self) -> Any:
312
+ """Process text based on selected operation."""
313
+ text = getattr(self, "text_input", "")
314
+ operation = self.get_operation_name()
315
+
316
+ # Allow empty text for Text Join (second input might have content)
317
+ # and Word Count (should return zeros for empty text)
318
+ if not text and operation not in ("Text Join", "Word Count"):
319
+ return None
320
+ operation_handlers = {
321
+ "Text to DataFrame": self._text_to_dataframe,
322
+ "Word Count": self._word_count,
323
+ "Case Conversion": self._case_conversion,
324
+ "Text Replace": self._text_replace,
325
+ "Text Extract": self._text_extract,
326
+ "Text Head": self._text_head,
327
+ "Text Tail": self._text_tail,
328
+ "Text Strip": self._text_strip,
329
+ "Text Join": self._text_join,
330
+ "Text Clean": self._text_clean,
331
+ }
332
+
333
+ handler = operation_handlers.get(operation)
334
+ if handler:
335
+ return handler(text)
336
+ return text
337
+
338
+ def _text_to_dataframe(self, text: str) -> DataFrame:
339
+ """Convert markdown-style table text to DataFrame."""
340
+ lines = [line.strip() for line in text.strip().split("\n") if line.strip()]
341
+ if not lines:
342
+ return DataFrame(pd.DataFrame())
343
+
344
+ separator = getattr(self, "table_separator", "|")
345
+ has_header = getattr(self, "has_header", True)
346
+
347
+ rows = self._parse_table_rows(lines, separator)
348
+ if not rows:
349
+ return DataFrame(pd.DataFrame())
350
+
351
+ df = self._create_dataframe(rows, has_header=has_header)
352
+ self._convert_numeric_columns(df)
353
+
354
+ self.log(f"Converted text to DataFrame: {len(df)} rows, {len(df.columns)} columns")
355
+ return DataFrame(df)
356
+
357
+ def _parse_table_rows(self, lines: list[str], separator: str) -> list[list[str]]:
358
+ """Parse table lines into rows of cells."""
359
+ rows = []
360
+ for line in lines:
361
+ cleaned_line = line.strip(separator)
362
+ cells = [cell.strip() for cell in cleaned_line.split(separator)]
363
+ rows.append(cells)
364
+ return rows
365
+
366
+ def _create_dataframe(self, rows: list[list[str]], *, has_header: bool) -> pd.DataFrame:
367
+ """Create DataFrame from parsed rows."""
368
+ if has_header and len(rows) > 1:
369
+ header = rows[0]
370
+ data_rows = rows[1:]
371
+ header_col_count = len(header)
372
+
373
+ # Validate that all data rows have the same number of columns as header
374
+ for i, row in enumerate(data_rows):
375
+ row_col_count = len(row)
376
+ if row_col_count != header_col_count:
377
+ msg = (
378
+ f"Header mismatch: {header_col_count} column(s) in header vs "
379
+ f"{row_col_count} column(s) in data row {i + 1}. "
380
+ "Please ensure the header has the same number of columns as your data."
381
+ )
382
+ raise ValueError(msg)
383
+
384
+ return pd.DataFrame(data_rows, columns=header)
385
+
386
+ max_cols = max(len(row) for row in rows) if rows else 0
387
+ columns = [f"col_{i}" for i in range(max_cols)]
388
+ return pd.DataFrame(rows, columns=columns)
389
+
390
+ def _convert_numeric_columns(self, df: pd.DataFrame) -> None:
391
+ """Attempt to convert string columns to numeric where possible."""
392
+ for col in df.columns:
393
+ with contextlib.suppress(ValueError, TypeError):
394
+ df[col] = pd.to_numeric(df[col])
395
+
396
+ def _word_count(self, text: str) -> dict[str, Any]:
397
+ """Count words, characters, and lines in text."""
398
+ result: dict[str, Any] = {}
399
+
400
+ # Handle empty or whitespace-only text - return zeros
401
+ text_str = str(text) if text else ""
402
+ is_empty = not text_str or not text_str.strip()
403
+
404
+ if getattr(self, "count_words", True):
405
+ if is_empty:
406
+ result["word_count"] = 0
407
+ result["unique_words"] = 0
408
+ else:
409
+ words = text_str.split()
410
+ result["word_count"] = len(words)
411
+ result["unique_words"] = len(set(words))
412
+
413
+ if getattr(self, "count_characters", True):
414
+ if is_empty:
415
+ result["character_count"] = 0
416
+ result["character_count_no_spaces"] = 0
417
+ else:
418
+ result["character_count"] = len(text_str)
419
+ result["character_count_no_spaces"] = len(text_str.replace(" ", ""))
420
+
421
+ if getattr(self, "count_lines", True):
422
+ if is_empty:
423
+ result["line_count"] = 0
424
+ result["non_empty_lines"] = 0
425
+ else:
426
+ lines = text_str.split("\n")
427
+ result["line_count"] = len(lines)
428
+ result["non_empty_lines"] = len([line for line in lines if line.strip()])
429
+
430
+ return result
431
+
432
+ def _case_conversion(self, text: str) -> str:
433
+ """Convert text case."""
434
+ case_type = getattr(self, "case_type", "lowercase")
435
+ converter = self.CASE_CONVERTERS.get(case_type)
436
+ return converter(text) if converter else text
437
+
438
+ def _text_replace(self, text: str) -> str:
439
+ """Replace text patterns."""
440
+ search_pattern = getattr(self, "search_pattern", "")
441
+ if not search_pattern:
442
+ return text
443
+
444
+ replacement_text = getattr(self, "replacement_text", "")
445
+ use_regex = getattr(self, "use_regex", False)
446
+
447
+ if use_regex:
448
+ try:
449
+ return re.sub(search_pattern, replacement_text, text)
450
+ except re.error as e:
451
+ self.log(f"Invalid regex pattern: {e}")
452
+ return text
453
+
454
+ return text.replace(search_pattern, replacement_text)
455
+
456
+ def _text_extract(self, text: str) -> list[str]:
457
+ """Extract text matching patterns."""
458
+ extract_pattern = getattr(self, "extract_pattern", "")
459
+ if not extract_pattern:
460
+ return []
461
+
462
+ max_matches = getattr(self, "max_matches", 10)
463
+
464
+ try:
465
+ matches = re.findall(extract_pattern, text)
466
+ except re.error as e:
467
+ msg = f"Invalid regex pattern '{extract_pattern}': {e}"
468
+ raise ValueError(msg) from e
469
+
470
+ return matches[:max_matches] if max_matches > 0 else matches
471
+
472
+ def _text_head(self, text: str) -> str:
473
+ """Extract characters from the beginning of text."""
474
+ head_characters = getattr(self, "head_characters", 100)
475
+ if head_characters < 0:
476
+ msg = f"Characters from Start must be a non-negative integer, got {head_characters}"
477
+ raise ValueError(msg)
478
+ if head_characters == 0:
479
+ return ""
480
+ return text[:head_characters]
481
+
482
+ def _text_tail(self, text: str) -> str:
483
+ """Extract characters from the end of text."""
484
+ tail_characters = getattr(self, "tail_characters", 100)
485
+ if tail_characters < 0:
486
+ msg = f"Characters from End must be a non-negative integer, got {tail_characters}"
487
+ raise ValueError(msg)
488
+ if tail_characters == 0:
489
+ return ""
490
+ return text[-tail_characters:]
491
+
492
+ def _text_strip(self, text: str) -> str:
493
+ """Remove whitespace or specific characters from text edges."""
494
+ strip_mode = getattr(self, "strip_mode", "both")
495
+ strip_characters = getattr(self, "strip_characters", "")
496
+
497
+ # Convert to string to ensure proper handling
498
+ text_str = str(text) if text else ""
499
+
500
+ # None means strip all whitespace (spaces, tabs, newlines, etc.)
501
+ chars_to_strip = strip_characters if strip_characters else None
502
+
503
+ if strip_mode == "left":
504
+ return text_str.lstrip(chars_to_strip)
505
+ if strip_mode == "right":
506
+ return text_str.rstrip(chars_to_strip)
507
+ # Default: "both"
508
+ return text_str.strip(chars_to_strip)
509
+
510
+ def _text_join(self, text: str) -> str:
511
+ """Join two texts with line break separator."""
512
+ text_input_2 = getattr(self, "text_input_2", "")
513
+
514
+ text1 = str(text) if text else ""
515
+ text2 = str(text_input_2) if text_input_2 else ""
516
+
517
+ if text1 and text2:
518
+ return f"{text1}\n{text2}"
519
+ return text1 or text2
520
+
521
+ def _text_clean(self, text: str) -> str:
522
+ """Clean text by removing extra spaces, special chars, etc."""
523
+ result = text
524
+
525
+ if getattr(self, "remove_extra_spaces", True):
526
+ result = re.sub(r"\s+", " ", result)
527
+
528
+ if getattr(self, "remove_special_chars", False):
529
+ # Remove ALL special characters except alphanumeric and spaces
530
+ result = re.sub(r"[^\w\s]", "", result)
531
+
532
+ if getattr(self, "remove_empty_lines", False):
533
+ lines = [line for line in result.split("\n") if line.strip()]
534
+ result = "\n".join(lines)
535
+
536
+ return result
537
+
538
+ def _format_result_as_text(self, result: Any) -> str:
539
+ """Format result as text string."""
540
+ if result is None:
541
+ return ""
542
+ if isinstance(result, list):
543
+ return "\n".join(str(item) for item in result)
544
+ return str(result)
545
+
546
+ def get_dataframe(self) -> DataFrame:
547
+ """Return result as DataFrame - only for Text to DataFrame operation."""
548
+ if self.get_operation_name() != "Text to DataFrame":
549
+ return DataFrame(pd.DataFrame())
550
+
551
+ text = getattr(self, "text_input", "")
552
+ if not text:
553
+ return DataFrame(pd.DataFrame())
554
+
555
+ return self._text_to_dataframe(text)
556
+
557
+ def get_text(self) -> Message:
558
+ """Return result as Message - for text operations only."""
559
+ result = self.process_text()
560
+ return Message(text=self._format_result_as_text(result))
561
+
562
+ def get_data(self) -> Data:
563
+ """Return result as Data object - only for Word Count operation."""
564
+ if self.get_operation_name() != "Word Count":
565
+ return Data(data={})
566
+
567
+ result = self.process_text()
568
+ if result is None:
569
+ return Data(data={})
570
+
571
+ if isinstance(result, dict):
572
+ return Data(data=result)
573
+ if isinstance(result, list):
574
+ return Data(data={"items": result})
575
+ return Data(data={"result": str(result)})
576
+
577
+ def get_message(self) -> Message:
578
+ """Return result as simple message with the processed text."""
579
+ result = self.process_text()
580
+ return Message(text=self._format_result_as_text(result))
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ from lfx.components._importing import import_mod
6
+
7
+ if TYPE_CHECKING:
8
+ from lfx.components.vllm.vllm import VllmComponent
9
+ from lfx.components.vllm.vllm_embeddings import VllmEmbeddingsComponent
10
+
11
+ _dynamic_imports = {
12
+ "VllmComponent": "vllm",
13
+ "VllmEmbeddingsComponent": "vllm_embeddings",
14
+ }
15
+
16
+ __all__ = [
17
+ "VllmComponent",
18
+ "VllmEmbeddingsComponent",
19
+ ]
20
+
21
+
22
+ def __getattr__(attr_name: str) -> Any:
23
+ """Lazily import vLLM components on attribute access."""
24
+ if attr_name not in _dynamic_imports:
25
+ msg = f"module '{__name__}' has no attribute '{attr_name}'"
26
+ raise AttributeError(msg)
27
+ try:
28
+ result = import_mod(attr_name, _dynamic_imports[attr_name], __spec__.parent)
29
+ except (ImportError, AttributeError) as e:
30
+ msg = f"Could not import '{attr_name}' from '{__name__}': {e}"
31
+ raise AttributeError(msg) from e
32
+ globals()[attr_name] = result
33
+ return result
34
+
35
+
36
+ def __dir__() -> list[str]:
37
+ return list(__all__)