lfx-nightly 0.1.12.dev23__py3-none-any.whl → 0.1.12.dev24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lfx-nightly might be problematic. Click here for more details.
- lfx/base/data/base_file.py +50 -9
- lfx/utils/constants.py +28 -0
- lfx/utils/helpers.py +6 -0
- {lfx_nightly-0.1.12.dev23.dist-info → lfx_nightly-0.1.12.dev24.dist-info}/METADATA +1 -1
- {lfx_nightly-0.1.12.dev23.dist-info → lfx_nightly-0.1.12.dev24.dist-info}/RECORD +7 -7
- {lfx_nightly-0.1.12.dev23.dist-info → lfx_nightly-0.1.12.dev24.dist-info}/WHEEL +0 -0
- {lfx_nightly-0.1.12.dev23.dist-info → lfx_nightly-0.1.12.dev24.dist-info}/entry_points.txt +0 -0
lfx/base/data/base_file.py
CHANGED
|
@@ -8,6 +8,7 @@ from tempfile import TemporaryDirectory
|
|
|
8
8
|
from typing import TYPE_CHECKING
|
|
9
9
|
from zipfile import ZipFile, is_zipfile
|
|
10
10
|
|
|
11
|
+
import anyio
|
|
11
12
|
import pandas as pd
|
|
12
13
|
|
|
13
14
|
from lfx.custom.custom_component.component import Component
|
|
@@ -15,6 +16,7 @@ from lfx.io import BoolInput, FileInput, HandleInput, Output, StrInput
|
|
|
15
16
|
from lfx.schema.data import Data
|
|
16
17
|
from lfx.schema.dataframe import DataFrame
|
|
17
18
|
from lfx.schema.message import Message
|
|
19
|
+
from lfx.utils.helpers import build_content_type_from_extension
|
|
18
20
|
|
|
19
21
|
if TYPE_CHECKING:
|
|
20
22
|
from collections.abc import Callable
|
|
@@ -242,25 +244,64 @@ class BaseFileComponent(Component, ABC):
|
|
|
242
244
|
return [Data()]
|
|
243
245
|
return data_list
|
|
244
246
|
|
|
245
|
-
def
|
|
247
|
+
async def _extract_file_metadata(self, data_item) -> dict:
|
|
248
|
+
"""Extract metadata from a data item with file_path."""
|
|
249
|
+
metadata = {}
|
|
250
|
+
if not hasattr(data_item, "file_path"):
|
|
251
|
+
return metadata
|
|
252
|
+
|
|
253
|
+
file_path = data_item.file_path
|
|
254
|
+
file_path_obj = anyio.Path(file_path)
|
|
255
|
+
file_size_stat = await file_path_obj.stat()
|
|
256
|
+
filename = file_path_obj.name
|
|
257
|
+
|
|
258
|
+
# Basic file metadata
|
|
259
|
+
metadata["filename"] = filename
|
|
260
|
+
metadata["file_size"] = file_size_stat.st_size
|
|
261
|
+
|
|
262
|
+
# Add MIME type from extension
|
|
263
|
+
extension = filename.split(".")[-1]
|
|
264
|
+
if extension:
|
|
265
|
+
metadata["mimetype"] = build_content_type_from_extension(extension)
|
|
266
|
+
|
|
267
|
+
# Copy additional metadata from data if available
|
|
268
|
+
if hasattr(data_item, "data") and isinstance(data_item.data, dict):
|
|
269
|
+
metadata_fields = ["mimetype", "file_size", "created_time", "modified_time"]
|
|
270
|
+
for field in metadata_fields:
|
|
271
|
+
if field in data_item.data:
|
|
272
|
+
metadata[field] = data_item.data[field]
|
|
273
|
+
|
|
274
|
+
return metadata
|
|
275
|
+
|
|
276
|
+
def _extract_text(self, data_item) -> str:
|
|
277
|
+
"""Extract text content from a data item."""
|
|
278
|
+
if isinstance(data_item.data, dict):
|
|
279
|
+
text = getattr(data_item, "get_text", lambda: None)() or data_item.data.get("text")
|
|
280
|
+
return text if text is not None else str(data_item)
|
|
281
|
+
return str(data_item)
|
|
282
|
+
|
|
283
|
+
async def load_files_message(self) -> Message:
|
|
246
284
|
"""Load files and return as Message.
|
|
247
285
|
|
|
248
286
|
Returns:
|
|
249
|
-
|
|
287
|
+
Message: Message containing all file data
|
|
250
288
|
"""
|
|
251
289
|
data_list = self.load_files_core()
|
|
252
290
|
if not data_list:
|
|
253
|
-
return Message()
|
|
291
|
+
return Message()
|
|
254
292
|
|
|
255
293
|
sep: str = getattr(self, "separator", "\n\n") or "\n\n"
|
|
256
|
-
|
|
257
294
|
parts: list[str] = []
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
parts.append(
|
|
295
|
+
metadata = {}
|
|
296
|
+
|
|
297
|
+
for data_item in data_list:
|
|
298
|
+
parts.append(self._extract_text(data_item))
|
|
299
|
+
|
|
300
|
+
# Set metadata from first file only
|
|
301
|
+
if not metadata:
|
|
302
|
+
metadata = await self._extract_file_metadata(data_item)
|
|
262
303
|
|
|
263
|
-
return Message(text=sep.join(parts))
|
|
304
|
+
return Message(text=sep.join(parts), metadata=metadata)
|
|
264
305
|
|
|
265
306
|
def load_files_path(self) -> Message:
|
|
266
307
|
"""Returns a Message containing file paths from loaded files.
|
lfx/utils/constants.py
CHANGED
|
@@ -203,3 +203,31 @@ MESSAGE_SENDER_AI = "Machine"
|
|
|
203
203
|
MESSAGE_SENDER_USER = "User"
|
|
204
204
|
MESSAGE_SENDER_NAME_AI = "AI"
|
|
205
205
|
MESSAGE_SENDER_NAME_USER = "User"
|
|
206
|
+
EXTENSION_TO_CONTENT_TYPE = {
|
|
207
|
+
"json": "application/json",
|
|
208
|
+
"txt": "text/plain",
|
|
209
|
+
"csv": "text/csv",
|
|
210
|
+
"html": "text/html",
|
|
211
|
+
"pdf": "application/pdf",
|
|
212
|
+
"png": "image/png",
|
|
213
|
+
"jpg": "image/jpeg",
|
|
214
|
+
"jpeg": "image/jpeg",
|
|
215
|
+
"gif": "image/gif",
|
|
216
|
+
"svg": "image/svg+xml",
|
|
217
|
+
"mp3": "audio/mpeg",
|
|
218
|
+
"wav": "audio/wav",
|
|
219
|
+
"mp4": "video/mp4",
|
|
220
|
+
"webm": "video/webm",
|
|
221
|
+
"zip": "application/zip",
|
|
222
|
+
"tar": "application/x-tar",
|
|
223
|
+
"gz": "application/gzip",
|
|
224
|
+
"doc": "application/msword",
|
|
225
|
+
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
226
|
+
"xls": "application/vnd.ms-excel",
|
|
227
|
+
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
228
|
+
"ppt": "application/vnd.ms-powerpoint",
|
|
229
|
+
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
230
|
+
"xml": "application/xml",
|
|
231
|
+
"yaml": "application/x-yaml",
|
|
232
|
+
"yml": "application/x-yaml",
|
|
233
|
+
}
|
lfx/utils/helpers.py
CHANGED
|
@@ -5,6 +5,8 @@ from __future__ import annotations
|
|
|
5
5
|
import mimetypes
|
|
6
6
|
from typing import TYPE_CHECKING
|
|
7
7
|
|
|
8
|
+
from lfx.utils.constants import EXTENSION_TO_CONTENT_TYPE
|
|
9
|
+
|
|
8
10
|
if TYPE_CHECKING:
|
|
9
11
|
from pathlib import Path
|
|
10
12
|
|
|
@@ -26,3 +28,7 @@ def get_mime_type(file_path: str | Path) -> str:
|
|
|
26
28
|
msg = f"Could not determine MIME type for: {file_path}"
|
|
27
29
|
raise ValueError(msg)
|
|
28
30
|
return mime_type
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def build_content_type_from_extension(extension: str):
|
|
34
|
+
return EXTENSION_TO_CONTENT_TYPE.get(extension.lower(), "application/octet-stream")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lfx-nightly
|
|
3
|
-
Version: 0.1.12.
|
|
3
|
+
Version: 0.1.12.dev24
|
|
4
4
|
Summary: Langflow Executor - A lightweight CLI tool for executing and serving Langflow AI flows
|
|
5
5
|
Author-email: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
|
|
6
6
|
Requires-Python: <3.14,>=3.10
|
|
@@ -28,7 +28,7 @@ lfx/base/compressors/model.py,sha256=-FFBAPAy9bAgvklIo7x_uwShZR5NoMHakF6f_hNnLHg
|
|
|
28
28
|
lfx/base/curl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
lfx/base/curl/parse.py,sha256=Yw6mMbGg7e-ffrBItEUJeTiljneCXlNyt5afzEP9eUI,6094
|
|
30
30
|
lfx/base/data/__init__.py,sha256=lQsYYMyAg_jA9ZF7oc-LNZsRE2uMGT6g16WzsUByHqs,81
|
|
31
|
-
lfx/base/data/base_file.py,sha256=
|
|
31
|
+
lfx/base/data/base_file.py,sha256=lHTwlupvcxm9eZLHBB_zIu1tL4VC0gqfJMfbHAMeTGc,26980
|
|
32
32
|
lfx/base/data/docling_utils.py,sha256=gVDxOZghSJEo5n-UNkVGBQYqkvfNqkNkltBhAnoaJd4,13048
|
|
33
33
|
lfx/base/data/utils.py,sha256=dGqEO4zE5s_V2Cs4j0EEeyLjYLX6Zex-EGzIOznK76o,5960
|
|
34
34
|
lfx/base/document_transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -707,10 +707,10 @@ lfx/utils/async_helpers.py,sha256=py1koriS60Y0DAcX8FY0HLSWP7o7cWiYN3T0avermhs,13
|
|
|
707
707
|
lfx/utils/component_utils.py,sha256=Zq2_HvXGd5V6ERMulY0slo-piKzKiXRK7QCOWeTnlqM,5734
|
|
708
708
|
lfx/utils/concurrency.py,sha256=2k6hwDvGejH1Zr1yLylziG9LDePoQ18eIM2vkpyb6lo,1636
|
|
709
709
|
lfx/utils/connection_string_parser.py,sha256=NmqhphFRNbhh7jvyNywDvUFgA4hPr8ikL-Sn11riizY,453
|
|
710
|
-
lfx/utils/constants.py,sha256=
|
|
710
|
+
lfx/utils/constants.py,sha256=4M8i93bROuQ7zmeKgfdNW85Znw7JFrK8KiagcDBpMRc,7036
|
|
711
711
|
lfx/utils/data_structure.py,sha256=xU3JNa_4jcGOVa_ctfMxiImEj6dKQQPE_zZsTAyy2T4,6888
|
|
712
712
|
lfx/utils/exceptions.py,sha256=RgIkI4uBssJsJUnuhluNGDSzdcuW5fnxPLhGfXYU9Uc,973
|
|
713
|
-
lfx/utils/helpers.py,sha256=
|
|
713
|
+
lfx/utils/helpers.py,sha256=0LE0barnVp-8Y5cCoDRzhDzesvXqgiT7IXP6vtTSyGE,889
|
|
714
714
|
lfx/utils/image.py,sha256=wMWBEI1gW3cFlQcio3mWgfHBaOw1uoAnqNmEacE_8xo,2133
|
|
715
715
|
lfx/utils/lazy_load.py,sha256=UDtXi8N7NT9r-FRGxsLUfDtGU_X8yqt-RQqgpc9TqAw,394
|
|
716
716
|
lfx/utils/request_utils.py,sha256=A6vmwpr7f3ZUxHg6Sz2-BdUUsyAwg84-7N_DNoPC8_Q,518
|
|
@@ -718,7 +718,7 @@ lfx/utils/schemas.py,sha256=NbOtVQBrn4d0BAu-0H_eCTZI2CXkKZlRY37XCSmuJwc,3865
|
|
|
718
718
|
lfx/utils/util.py,sha256=xGR32XDRr_TtruhjnXfI7lEWmk-vgywHAy3kz5SBowc,15725
|
|
719
719
|
lfx/utils/util_strings.py,sha256=nU_IcdphNaj6bAPbjeL-c1cInQPfTBit8mp5Y57lwQk,1686
|
|
720
720
|
lfx/utils/version.py,sha256=cHpbO0OJD2JQAvVaTH_6ibYeFbHJV0QDHs_YXXZ-bT8,671
|
|
721
|
-
lfx_nightly-0.1.12.
|
|
722
|
-
lfx_nightly-0.1.12.
|
|
723
|
-
lfx_nightly-0.1.12.
|
|
724
|
-
lfx_nightly-0.1.12.
|
|
721
|
+
lfx_nightly-0.1.12.dev24.dist-info/METADATA,sha256=Hqqfow1Lo7YnVrJcIr-FYvOCGqASWggQeTT0CwZ1q3I,8068
|
|
722
|
+
lfx_nightly-0.1.12.dev24.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
723
|
+
lfx_nightly-0.1.12.dev24.dist-info/entry_points.txt,sha256=1724p3RHDQRT2CKx_QRzEIa7sFuSVO0Ux70YfXfoMT4,42
|
|
724
|
+
lfx_nightly-0.1.12.dev24.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|