Flowfile 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +4 -3
- flowfile/api.py +1 -1
- flowfile/web/static/assets/{CloudConnectionManager-c20a740f.js → CloudConnectionManager-d7c2c028.js} +2 -2
- flowfile/web/static/assets/{CloudStorageReader-960b400a.js → CloudStorageReader-d467329f.js} +11 -78
- flowfile/web/static/assets/{CloudStorageWriter-e3decbdd.js → CloudStorageWriter-071b8b00.js} +12 -79
- flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
- flowfile/web/static/assets/ContextMenu-2dea5e27.js +41 -0
- flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
- flowfile/web/static/assets/ContextMenu-785554c4.js +41 -0
- flowfile/web/static/assets/ContextMenu-a51e19ea.js +41 -0
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
- flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
- flowfile/web/static/assets/{CrossJoin-d67e2405.js → CrossJoin-cf68ec7a.js} +14 -84
- flowfile/web/static/assets/{DatabaseConnectionSettings-a81e0f7e.js → DatabaseConnectionSettings-435c5dd8.js} +3 -3
- flowfile/web/static/assets/{DatabaseManager-9ea35e84.js → DatabaseManager-349e33a8.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-9578bfa5.js → DatabaseReader-8075bd28.js} +14 -114
- flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
- flowfile/web/static/assets/{DatabaseWriter-19531098.js → DatabaseWriter-3e2dda89.js} +13 -74
- flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
- flowfile/web/static/assets/ExploreData-76ec698c.js +192 -0
- flowfile/web/static/assets/{ExternalSource-2297ef96.js → ExternalSource-609a265c.js} +8 -79
- flowfile/web/static/assets/{Filter-f211c03a.js → Filter-97cff793.js} +12 -85
- flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
- flowfile/web/static/assets/{Formula-4207ea31.js → Formula-09de0ec9.js} +18 -85
- flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
- flowfile/web/static/assets/{FuzzyMatch-bf120df0.js → FuzzyMatch-bdf70248.js} +16 -87
- flowfile/web/static/assets/{GraphSolver-5bb7497a.js → GraphSolver-0b5a0e05.js} +13 -159
- flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
- flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
- flowfile/web/static/assets/{GroupBy-92c81b65.js → GroupBy-eaddadde.js} +12 -75
- flowfile/web/static/assets/{Join-4e49a274.js → Join-3313371b.js} +15 -85
- flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
- flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
- flowfile/web/static/assets/{ManualInput-90998ae8.js → ManualInput-e8bfc0be.js} +11 -82
- flowfile/web/static/assets/{Output-81e3e917.js → Output-7303bb09.js} +13 -243
- flowfile/web/static/assets/Output-ddc9079f.css +37 -0
- flowfile/web/static/assets/{Pivot-a3419842.js → Pivot-3b1c54ef.js} +14 -138
- flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
- flowfile/web/static/assets/PivotValidation-3bb36c8f.js +61 -0
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
- flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
- flowfile/web/static/assets/PivotValidation-eaa819c0.js +61 -0
- flowfile/web/static/assets/{PolarsCode-72710deb.js → PolarsCode-aa12e25d.js} +13 -80
- flowfile/web/static/assets/Read-6b17491f.css +62 -0
- flowfile/web/static/assets/Read-a2bfc618.js +243 -0
- flowfile/web/static/assets/RecordCount-aa0dc082.js +53 -0
- flowfile/web/static/assets/{RecordId-10baf191.js → RecordId-48ee1a3b.js} +8 -80
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
- flowfile/web/static/assets/SQLQueryComponent-e149dbf2.js +38 -0
- flowfile/web/static/assets/{Sample-3ed9a0ae.js → Sample-f06cb97a.js} +8 -77
- flowfile/web/static/assets/{SecretManager-0d49c0e8.js → SecretManager-37f34886.js} +2 -2
- flowfile/web/static/assets/{Select-8a02a0b3.js → Select-b60e6c47.js} +11 -85
- flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
- flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
- flowfile/web/static/assets/SettingsSection-70e5a7b1.js +53 -0
- flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
- flowfile/web/static/assets/{SettingsSection-4c0f45f5.js → SettingsSection-75b6cf4f.js} +2 -40
- flowfile/web/static/assets/SettingsSection-e57a672e.js +45 -0
- flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
- flowfile/web/static/assets/{Sort-f55c9f9d.js → Sort-51b1ee4d.js} +12 -97
- flowfile/web/static/assets/{TextToRows-5dbc2145.js → TextToRows-26835f8f.js} +14 -83
- flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
- flowfile/web/static/assets/{UnavailableFields-a1768e52.js → UnavailableFields-88a4cd0c.js} +2 -2
- flowfile/web/static/assets/Union-4d0088eb.js +77 -0
- flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
- flowfile/web/static/assets/{Unique-46b250da.js → Unique-7d554a62.js} +22 -91
- flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
- flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
- flowfile/web/static/assets/{Unpivot-25ac84cc.js → Unpivot-4668595c.js} +12 -166
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
- flowfile/web/static/assets/UnpivotValidation-d4f0e0e8.js +51 -0
- flowfile/web/static/assets/{ExploreData-40476474.js → VueGraphicWalker-5324d566.js} +4 -264
- flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
- flowfile/web/static/assets/{api-6ef0dcef.js → api-271ed117.js} +1 -1
- flowfile/web/static/assets/{api-a0abbdc7.js → api-31e4fea6.js} +1 -1
- flowfile/web/static/assets/{designer-186f2e71.css → designer-091bdc3f.css} +819 -184
- flowfile/web/static/assets/{designer-13eabd83.js → designer-bf3d9487.js} +2214 -680
- flowfile/web/static/assets/{documentation-b87e7f6f.js → documentation-4d0a1cea.js} +1 -1
- flowfile/web/static/assets/{dropDown-13564764.js → dropDown-025888df.js} +1 -1
- flowfile/web/static/assets/{fullEditor-fd2cd6f9.js → fullEditor-1df991ec.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-71e11604.js → genericNodeSettings-d3b2b2ac.js} +3 -3
- flowfile/web/static/assets/{index-f6c15e76.js → index-d0518598.js} +210 -31
- flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
- flowfile/web/static/assets/outputCsv-d8457527.js +86 -0
- flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
- flowfile/web/static/assets/outputExcel-be89153e.js +56 -0
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
- flowfile/web/static/assets/outputParquet-fabb445a.js +31 -0
- flowfile/web/static/assets/readCsv-bca3ed53.css +52 -0
- flowfile/web/static/assets/readCsv-e8359522.js +178 -0
- flowfile/web/static/assets/readExcel-dabaf51b.js +203 -0
- flowfile/web/static/assets/readExcel-e1b381ea.css +64 -0
- flowfile/web/static/assets/readParquet-cee068e2.css +19 -0
- flowfile/web/static/assets/readParquet-e0771ef2.js +26 -0
- flowfile/web/static/assets/{secretApi-dd636aa2.js → secretApi-ce823eee.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-af36165e.js → selectDynamic-5476546e.js} +7 -7
- flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
- flowfile/web/static/assets/{vue-codemirror.esm-2847001e.js → vue-codemirror.esm-9ed00d50.js} +29 -33
- flowfile/web/static/assets/{vue-content-loader.es-0371da73.js → vue-content-loader.es-7bca2d9b.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/METADATA +2 -1
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/RECORD +147 -117
- flowfile_core/configs/flow_logger.py +5 -13
- flowfile_core/configs/node_store/nodes.py +303 -44
- flowfile_core/configs/settings.py +6 -3
- flowfile_core/database/connection.py +5 -21
- flowfile_core/fileExplorer/funcs.py +239 -121
- flowfile_core/flowfile/code_generator/code_generator.py +36 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +60 -80
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +61 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +44 -3
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +3 -3
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +33 -10
- flowfile_core/flowfile/flow_graph.py +223 -118
- flowfile_core/flowfile/flow_node/flow_node.py +56 -19
- flowfile_core/flowfile/flow_node/models.py +0 -2
- flowfile_core/flowfile/flow_node/schema_callback.py +138 -43
- flowfile_core/flowfile/graph_tree/graph_tree.py +250 -0
- flowfile_core/flowfile/graph_tree/models.py +15 -0
- flowfile_core/flowfile/handler.py +22 -3
- flowfile_core/flowfile/manage/compatibility_enhancements.py +1 -1
- flowfile_core/flowfile/{flow_data_engine/fuzzy_matching/settings_validator.py → schema_callbacks.py} +72 -16
- flowfile_core/flowfile/setting_generator/settings.py +2 -2
- flowfile_core/flowfile/util/execution_orderer.py +9 -0
- flowfile_core/flowfile/util/node_skipper.py +8 -0
- flowfile_core/main.py +4 -1
- flowfile_core/routes/routes.py +59 -10
- flowfile_core/schemas/input_schema.py +0 -1
- flowfile_core/schemas/output_model.py +5 -2
- flowfile_core/schemas/schemas.py +48 -3
- flowfile_core/schemas/transform_schema.py +28 -38
- flowfile_frame/__init__.py +1 -4
- flowfile_frame/flow_frame.py +33 -4
- flowfile_frame/flow_frame.pyi +2 -0
- flowfile_worker/__init__.py +6 -35
- flowfile_worker/funcs.py +7 -3
- flowfile_worker/main.py +5 -2
- flowfile_worker/models.py +3 -1
- flowfile_worker/routes.py +47 -5
- shared/__init__.py +15 -0
- shared/storage_config.py +243 -0
- flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
- flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
- flowfile/web/static/assets/Read-80dc1675.css +0 -197
- flowfile/web/static/assets/Read-c4059daf.js +0 -701
- flowfile/web/static/assets/RecordCount-c2b5e095.js +0 -122
- flowfile/web/static/assets/Union-f2aefdc9.js +0 -146
- flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
- flowfile/web/static/assets/nodeTitle-988d9efe.js +0 -227
- flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
- flowfile_worker/polars_fuzzy_match/matcher.py +0 -435
- flowfile_worker/polars_fuzzy_match/models.py +0 -36
- flowfile_worker/polars_fuzzy_match/pre_process.py +0 -213
- flowfile_worker/polars_fuzzy_match/process.py +0 -86
- flowfile_worker/polars_fuzzy_match/utils.py +0 -50
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/LICENSE +0 -0
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/WHEEL +0 -0
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/entry_points.txt +0 -0
- {flowfile_worker/polars_fuzzy_match → flowfile_core/flowfile/graph_tree}/__init__.py +0 -0
|
@@ -19,13 +19,30 @@ class FileInfo(BaseModel):
|
|
|
19
19
|
exists: bool = True
|
|
20
20
|
|
|
21
21
|
@classmethod
|
|
22
|
-
def from_path(cls, path: Path
|
|
23
|
-
|
|
22
|
+
def from_path(cls, path: Path, sandbox_root: Optional[Path] = None,
|
|
23
|
+
use_relative_paths: bool = False) -> 'FileInfo':
|
|
24
|
+
"""Create FileInfo instance from a path.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
path: The path to create FileInfo from
|
|
28
|
+
sandbox_root: The root directory for sandboxing (for relative path calculation)
|
|
29
|
+
use_relative_paths: If True, store relative paths; if False, store absolute paths
|
|
30
|
+
"""
|
|
24
31
|
try:
|
|
25
32
|
stats = path.stat()
|
|
33
|
+
|
|
34
|
+
# Decide whether to use relative or absolute path
|
|
35
|
+
if use_relative_paths and sandbox_root:
|
|
36
|
+
try:
|
|
37
|
+
display_path = str(path.relative_to(sandbox_root))
|
|
38
|
+
except ValueError:
|
|
39
|
+
display_path = str(path.absolute())
|
|
40
|
+
else:
|
|
41
|
+
display_path = str(path.absolute())
|
|
42
|
+
|
|
26
43
|
return cls(
|
|
27
44
|
name=path.name,
|
|
28
|
-
path=
|
|
45
|
+
path=display_path,
|
|
29
46
|
is_directory=path.is_dir(),
|
|
30
47
|
size=stats.st_size,
|
|
31
48
|
file_type=path.suffix[1:] if path.suffix else "",
|
|
@@ -37,9 +54,18 @@ class FileInfo(BaseModel):
|
|
|
37
54
|
exists=True
|
|
38
55
|
)
|
|
39
56
|
except (PermissionError, OSError):
|
|
57
|
+
# Handle error case
|
|
58
|
+
if use_relative_paths and sandbox_root:
|
|
59
|
+
try:
|
|
60
|
+
display_path = str(path.relative_to(sandbox_root))
|
|
61
|
+
except ValueError:
|
|
62
|
+
display_path = str(path.absolute())
|
|
63
|
+
else:
|
|
64
|
+
display_path = str(path.absolute())
|
|
65
|
+
|
|
40
66
|
return cls(
|
|
41
67
|
name=path.name,
|
|
42
|
-
path=
|
|
68
|
+
path=display_path,
|
|
43
69
|
is_directory=False,
|
|
44
70
|
size=0,
|
|
45
71
|
file_type="",
|
|
@@ -50,30 +76,108 @@ class FileInfo(BaseModel):
|
|
|
50
76
|
)
|
|
51
77
|
|
|
52
78
|
|
|
53
|
-
class
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
79
|
+
class SecureFileExplorer:
|
|
80
|
+
"""File explorer with sandbox enforcement to prevent directory traversal."""
|
|
81
|
+
|
|
82
|
+
def __init__(self, start_path: Union[str, Path],
|
|
83
|
+
sandbox_root: Optional[Union[str, Path]] = None,
|
|
84
|
+
use_relative_paths: bool = False):
|
|
85
|
+
"""Initialize SecureFileExplorer with sandboxing.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
start_path: Initial directory to start in
|
|
89
|
+
sandbox_root: Root directory that user cannot escape from.
|
|
90
|
+
If None, no sandbox enforcement.
|
|
91
|
+
use_relative_paths: If True, FileInfo will contain relative paths;
|
|
92
|
+
if False (default), contains absolute paths
|
|
93
|
+
"""
|
|
94
|
+
self.use_relative_paths = use_relative_paths
|
|
95
|
+
|
|
96
|
+
# Set up the sandbox root
|
|
97
|
+
if sandbox_root is not None:
|
|
98
|
+
self.sandbox_root = Path(sandbox_root).expanduser().resolve()
|
|
99
|
+
else:
|
|
100
|
+
self.sandbox_root = None
|
|
101
|
+
|
|
102
|
+
# Set initial current path
|
|
103
|
+
initial_path = Path(start_path).expanduser().resolve()
|
|
104
|
+
|
|
105
|
+
# If sandbox is set and initial path is outside it, use sandbox root
|
|
106
|
+
if self.sandbox_root and not self._is_path_safe(initial_path):
|
|
107
|
+
self.current_path = self.sandbox_root
|
|
58
108
|
else:
|
|
59
|
-
self.current_path =
|
|
109
|
+
self.current_path = initial_path
|
|
60
110
|
|
|
61
|
-
|
|
62
|
-
|
|
111
|
+
def _is_path_safe(self, path: Path) -> bool:
|
|
112
|
+
"""Check if a path is within the sandbox root.
|
|
63
113
|
|
|
64
|
-
|
|
65
|
-
|
|
114
|
+
Uses resolve() to handle symlinks and relative paths securely.
|
|
115
|
+
Returns True if no sandbox is set (no restrictions).
|
|
116
|
+
"""
|
|
117
|
+
if self.sandbox_root is None:
|
|
118
|
+
return True # No sandbox = no restrictions
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
resolved_path = path.resolve()
|
|
122
|
+
resolved_sandbox = self.sandbox_root.resolve()
|
|
123
|
+
# Check if the resolved path is within sandbox
|
|
124
|
+
resolved_path.relative_to(resolved_sandbox)
|
|
125
|
+
return True
|
|
126
|
+
except (ValueError, RuntimeError):
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
def _sanitize_path(self, path: Union[str, Path]) -> Optional[Path]:
|
|
130
|
+
"""Sanitize and validate a path, ensuring it stays within sandbox.
|
|
131
|
+
|
|
132
|
+
Returns None if path would escape sandbox.
|
|
133
|
+
"""
|
|
134
|
+
try:
|
|
135
|
+
# Handle relative paths from current directory
|
|
136
|
+
if isinstance(path, str):
|
|
137
|
+
# Remove any suspicious patterns
|
|
138
|
+
if '..' in Path(path).parts or path.startswith('/'):
|
|
139
|
+
# For absolute paths or parent references, resolve from sandbox root
|
|
140
|
+
test_path = Path(path).expanduser()
|
|
141
|
+
else:
|
|
142
|
+
# For simple relative paths, resolve from current directory
|
|
143
|
+
test_path = self.current_path / path
|
|
144
|
+
else:
|
|
145
|
+
test_path = path
|
|
146
|
+
|
|
147
|
+
# Resolve to absolute path
|
|
148
|
+
resolved = test_path.resolve()
|
|
149
|
+
|
|
150
|
+
# Check if within sandbox
|
|
151
|
+
if self._is_path_safe(resolved):
|
|
152
|
+
return resolved
|
|
153
|
+
else:
|
|
154
|
+
return None
|
|
155
|
+
except (ValueError, RuntimeError, OSError):
|
|
156
|
+
return None
|
|
66
157
|
|
|
67
158
|
@property
|
|
68
159
|
def current_directory(self) -> str:
|
|
69
|
-
"""Get the current directory path."""
|
|
70
|
-
|
|
160
|
+
"""Get the current directory path relative to sandbox root."""
|
|
161
|
+
if self.sandbox_root is None:
|
|
162
|
+
return str(self.current_path)
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
relative = self.current_path.relative_to(self.sandbox_root)
|
|
166
|
+
return str(relative) if str(relative) != "." else "/"
|
|
167
|
+
except ValueError:
|
|
168
|
+
return "/"
|
|
71
169
|
|
|
72
170
|
@property
|
|
73
171
|
def parent_directory(self) -> Optional[str]:
|
|
74
|
-
"""Get the parent directory path if it exists."""
|
|
172
|
+
"""Get the parent directory path if it exists and is within sandbox."""
|
|
75
173
|
parent = self.current_path.parent
|
|
76
|
-
|
|
174
|
+
if self._is_path_safe(parent) and parent != self.current_path:
|
|
175
|
+
try:
|
|
176
|
+
relative = parent.relative_to(self.sandbox_root)
|
|
177
|
+
return str(relative) if str(relative) != "." else "/"
|
|
178
|
+
except ValueError:
|
|
179
|
+
return None
|
|
180
|
+
return None
|
|
77
181
|
|
|
78
182
|
def list_contents(
|
|
79
183
|
self,
|
|
@@ -85,34 +189,23 @@ class FileExplorer:
|
|
|
85
189
|
max_size: Optional[int] = None,
|
|
86
190
|
sort_by: Literal['name', 'date', 'size', 'type'] = 'name',
|
|
87
191
|
reverse: bool = False,
|
|
88
|
-
exclude_patterns: Optional[List[str]] = None
|
|
192
|
+
exclude_patterns: Optional[List[str]] = None,
|
|
193
|
+
max_depth: int = 5 # Add depth limit for recursive operations
|
|
89
194
|
) -> List[FileInfo]:
|
|
90
|
-
"""
|
|
91
|
-
List contents of the current directory with advanced filtering and sorting.
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
show_hidden: Whether to show hidden files and directories
|
|
95
|
-
file_types: List of file extensions to include (without dots)
|
|
96
|
-
recursive: Whether to scan subdirectories
|
|
97
|
-
min_size: Minimum file size in bytes
|
|
98
|
-
max_size: Maximum file size in bytes
|
|
99
|
-
sort_by: Field to sort results by
|
|
100
|
-
reverse: Whether to reverse sort order
|
|
101
|
-
exclude_patterns: Glob patterns to exclude
|
|
102
|
-
|
|
103
|
-
Returns:
|
|
104
|
-
List of FileInfo objects sorted according to parameters
|
|
105
|
-
"""
|
|
195
|
+
"""List contents with security-conscious filtering."""
|
|
106
196
|
contents: List[FileInfo] = []
|
|
107
197
|
excluded_paths: Set[str] = set()
|
|
108
198
|
|
|
109
199
|
if exclude_patterns:
|
|
110
200
|
for pattern in exclude_patterns:
|
|
111
|
-
|
|
201
|
+
# Ensure patterns don't escape sandbox
|
|
202
|
+
safe_pattern = pattern.replace('../', '').replace('..\\', '')
|
|
203
|
+
excluded_paths.update(str(p) for p in self.current_path.glob(safe_pattern))
|
|
112
204
|
|
|
113
205
|
def should_include(info: FileInfo) -> bool:
|
|
114
206
|
"""Determine if a file should be included based on filters."""
|
|
115
|
-
|
|
207
|
+
full_path = self.current_path / info.path
|
|
208
|
+
if str(full_path) in excluded_paths:
|
|
116
209
|
return False
|
|
117
210
|
if not show_hidden and info.is_hidden:
|
|
118
211
|
return False
|
|
@@ -125,20 +218,55 @@ class FileExplorer:
|
|
|
125
218
|
return True
|
|
126
219
|
|
|
127
220
|
try:
|
|
128
|
-
|
|
129
|
-
|
|
221
|
+
if recursive:
|
|
222
|
+
# Use iterative approach with depth limit for safety
|
|
223
|
+
dirs_to_process = [(self.current_path, 0)]
|
|
224
|
+
processed = set()
|
|
130
225
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
if
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
226
|
+
while dirs_to_process:
|
|
227
|
+
current_dir, depth = dirs_to_process.pop(0)
|
|
228
|
+
|
|
229
|
+
# Skip if already processed or exceeds depth
|
|
230
|
+
if current_dir in processed or depth > max_depth:
|
|
231
|
+
continue
|
|
232
|
+
|
|
233
|
+
processed.add(current_dir)
|
|
234
|
+
|
|
235
|
+
try:
|
|
236
|
+
for item in current_dir.iterdir():
|
|
237
|
+
# Security check for each item
|
|
238
|
+
if not self._is_path_safe(item):
|
|
239
|
+
continue
|
|
240
|
+
|
|
241
|
+
try:
|
|
242
|
+
file_info = FileInfo.from_path(item, self.sandbox_root,
|
|
243
|
+
self.use_relative_paths)
|
|
244
|
+
if should_include(file_info):
|
|
245
|
+
contents.append(file_info)
|
|
246
|
+
|
|
247
|
+
if item.is_dir() and depth < max_depth:
|
|
248
|
+
dirs_to_process.append((item, depth + 1))
|
|
249
|
+
except (PermissionError, OSError):
|
|
250
|
+
continue
|
|
251
|
+
except (PermissionError, OSError):
|
|
252
|
+
continue
|
|
253
|
+
else:
|
|
254
|
+
# Non-recursive listing
|
|
255
|
+
for item in self.current_path.iterdir():
|
|
256
|
+
# Security check
|
|
257
|
+
if not self._is_path_safe(item):
|
|
258
|
+
continue
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
file_info = FileInfo.from_path(item, self.sandbox_root,
|
|
262
|
+
self.use_relative_paths)
|
|
263
|
+
if should_include(file_info):
|
|
264
|
+
contents.append(file_info)
|
|
265
|
+
except (PermissionError, OSError):
|
|
266
|
+
continue
|
|
139
267
|
|
|
140
268
|
except PermissionError:
|
|
141
|
-
raise PermissionError(f"Permission denied to access directory: {self.
|
|
269
|
+
raise PermissionError(f"Permission denied to access directory: {self.current_directory}")
|
|
142
270
|
|
|
143
271
|
# Sort results
|
|
144
272
|
sort_key = {
|
|
@@ -151,109 +279,99 @@ class FileExplorer:
|
|
|
151
279
|
return sorted(contents, key=sort_key, reverse=reverse)
|
|
152
280
|
|
|
153
281
|
def navigate_to(self, path: str) -> bool:
|
|
154
|
-
"""
|
|
155
|
-
|
|
156
|
-
Returns True if navigation was successful, False otherwise.
|
|
157
|
-
"""
|
|
158
|
-
new_path = None
|
|
159
|
-
try:
|
|
160
|
-
new_path = Path(path).expanduser().resolve()
|
|
282
|
+
"""Navigate to a new directory path within sandbox."""
|
|
283
|
+
sanitized = self._sanitize_path(path)
|
|
161
284
|
|
|
162
|
-
|
|
163
|
-
|
|
285
|
+
if sanitized is None:
|
|
286
|
+
return False
|
|
164
287
|
|
|
288
|
+
if not sanitized.exists() or not sanitized.is_dir():
|
|
289
|
+
return False
|
|
290
|
+
|
|
291
|
+
try:
|
|
165
292
|
# Test if we can actually read the directory
|
|
166
|
-
next(
|
|
167
|
-
self.current_path =
|
|
293
|
+
next(sanitized.iterdir(), None)
|
|
294
|
+
self.current_path = sanitized
|
|
168
295
|
return True
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
self.current_path = new_path
|
|
296
|
+
except (PermissionError, OSError):
|
|
297
|
+
# Still navigate if we have permission issues (user will see empty dir)
|
|
298
|
+
self.current_path = sanitized
|
|
173
299
|
return True
|
|
174
|
-
except OSError:
|
|
175
|
-
return False
|
|
176
300
|
|
|
177
301
|
def navigate_up(self) -> bool:
|
|
178
|
-
"""
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
if parent is None:
|
|
302
|
+
"""Navigate up to the parent directory, respecting sandbox."""
|
|
303
|
+
parent = self.current_path.parent
|
|
304
|
+
|
|
305
|
+
# Check if parent is within sandbox
|
|
306
|
+
if not self._is_path_safe(parent):
|
|
184
307
|
return False
|
|
185
|
-
|
|
308
|
+
|
|
309
|
+
# Don't navigate if we're already at sandbox root
|
|
310
|
+
if parent == self.current_path:
|
|
311
|
+
return False
|
|
312
|
+
|
|
313
|
+
self.current_path = parent
|
|
314
|
+
return True
|
|
186
315
|
|
|
187
316
|
def navigate_into(self, directory_name: str) -> bool:
|
|
188
|
-
"""
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
317
|
+
"""Navigate into a subdirectory, with path sanitization."""
|
|
318
|
+
# Sanitize directory name
|
|
319
|
+
if '/' in directory_name or '\\' in directory_name or '..' in directory_name:
|
|
320
|
+
return False
|
|
321
|
+
|
|
192
322
|
new_path = self.current_path / directory_name
|
|
193
323
|
return self.navigate_to(str(new_path))
|
|
194
324
|
|
|
325
|
+
def get_absolute_path(self, relative_path: str) -> Optional[Path]:
|
|
326
|
+
"""Get absolute path for a file within sandbox.
|
|
327
|
+
|
|
328
|
+
Returns None if the path would escape sandbox.
|
|
329
|
+
"""
|
|
330
|
+
sanitized = self._sanitize_path(relative_path)
|
|
331
|
+
return sanitized if sanitized else None
|
|
332
|
+
|
|
195
333
|
|
|
196
334
|
def get_files_from_directory(
|
|
197
335
|
dir_name: Union[str, Path],
|
|
198
336
|
types: Optional[List[str]] = None,
|
|
199
337
|
*,
|
|
200
338
|
include_hidden: bool = False,
|
|
201
|
-
recursive: bool = False
|
|
339
|
+
recursive: bool = False,
|
|
340
|
+
sandbox_root: Optional[Union[str, Path]] = None
|
|
202
341
|
) -> Optional[List[FileInfo]]:
|
|
203
342
|
"""
|
|
204
|
-
Get list of files from a directory with
|
|
343
|
+
Get list of files from a directory with sandbox enforcement.
|
|
205
344
|
|
|
206
345
|
Args:
|
|
207
346
|
dir_name: Directory path to scan
|
|
208
|
-
types: List of file extensions to include
|
|
347
|
+
types: List of file extensions to include
|
|
209
348
|
include_hidden: Whether to include hidden files
|
|
210
349
|
recursive: Whether to scan subdirectories
|
|
350
|
+
sandbox_root: Root directory to enforce as sandbox boundary
|
|
211
351
|
|
|
212
352
|
Returns:
|
|
213
|
-
List of FileInfo objects or None if directory doesn't exist
|
|
214
|
-
|
|
215
|
-
Example:
|
|
216
|
-
>>> files = get_files_from_directory("/path/to/dir", types=["pdf", "txt"])
|
|
217
|
-
>>> for file in files:
|
|
218
|
-
... print(f"{file.name} - {file.size} bytes")
|
|
353
|
+
List of FileInfo objects or None if directory doesn't exist or is outside sandbox
|
|
219
354
|
"""
|
|
220
355
|
try:
|
|
221
|
-
|
|
222
|
-
if
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
#
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
if not include_hidden and item.name.startswith('.'):
|
|
238
|
-
continue
|
|
239
|
-
|
|
240
|
-
# Skip directories unless recursive is True
|
|
241
|
-
if item.is_dir() and not recursive:
|
|
242
|
-
continue
|
|
243
|
-
|
|
244
|
-
# Check file type if types are specified
|
|
245
|
-
if types and not item.is_dir():
|
|
246
|
-
if item.suffix[1:].lower() not in types:
|
|
247
|
-
continue
|
|
248
|
-
|
|
249
|
-
file_info = FileInfo.from_path(item)
|
|
250
|
-
files.append(file_info)
|
|
251
|
-
|
|
252
|
-
except (PermissionError, OSError):
|
|
253
|
-
continue
|
|
254
|
-
|
|
255
|
-
return sorted(files, key=lambda x: (not x.is_directory, x.name.lower()))
|
|
256
|
-
|
|
356
|
+
# Create a secure explorer with sandbox
|
|
357
|
+
if sandbox_root:
|
|
358
|
+
explorer = SecureFileExplorer(start_path=dir_name, sandbox_root=sandbox_root)
|
|
359
|
+
else:
|
|
360
|
+
explorer = SecureFileExplorer(start_path=dir_name)
|
|
361
|
+
|
|
362
|
+
# Use the explorer's list_contents method
|
|
363
|
+
return explorer.list_contents(
|
|
364
|
+
show_hidden=include_hidden,
|
|
365
|
+
file_types=types,
|
|
366
|
+
recursive=recursive
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
except (ValueError, PermissionError) as e:
|
|
370
|
+
# Return None for invalid/inaccessible directories
|
|
371
|
+
return None
|
|
257
372
|
except Exception as e:
|
|
258
373
|
raise type(e)(f"Error scanning directory {dir_name}: {str(e)}") from e
|
|
259
374
|
|
|
375
|
+
|
|
376
|
+
# Alias for backward compatibility
|
|
377
|
+
FileExplorer = SecureFileExplorer
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from typing import List, Dict, Optional, Set, Tuple
|
|
2
2
|
import polars as pl
|
|
3
3
|
|
|
4
|
+
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
5
|
+
|
|
4
6
|
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
5
7
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, convert_pl_type_to_string
|
|
6
8
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
|
|
@@ -825,6 +827,40 @@ class FlowGraphToPolarsConverter:
|
|
|
825
827
|
self._add_code(f"{var_name} = {input_df}.head(n={settings.sample_size})")
|
|
826
828
|
self._add_code("")
|
|
827
829
|
|
|
830
|
+
@staticmethod
|
|
831
|
+
def _transform_fuzzy_mappings_to_string(fuzzy_mappings: List[FuzzyMapping]) -> str:
|
|
832
|
+
output_str = "["
|
|
833
|
+
for i, fuzzy_mapping in enumerate(fuzzy_mappings):
|
|
834
|
+
|
|
835
|
+
output_str += (f"FuzzyMapping(left_col='{fuzzy_mapping.left_col}',"
|
|
836
|
+
f" right_col='{fuzzy_mapping.right_col}', "
|
|
837
|
+
f"threshold_score={fuzzy_mapping.threshold_score}, "
|
|
838
|
+
f"fuzzy_type='{fuzzy_mapping.fuzzy_type}')")
|
|
839
|
+
if i < len(fuzzy_mappings) - 1:
|
|
840
|
+
output_str += ",\n"
|
|
841
|
+
output_str += "]"
|
|
842
|
+
return output_str
|
|
843
|
+
|
|
844
|
+
def _handle_fuzzy_match(self, settings: input_schema.NodeFuzzyMatch, var_name: str, input_vars: Dict[str, str]) -> None:
|
|
845
|
+
"""Handle fuzzy match nodes."""
|
|
846
|
+
self.imports.add("from pl_fuzzy_frame_match import FuzzyMapping, fuzzy_match_dfs")
|
|
847
|
+
left_df = input_vars.get('main', input_vars.get('main_0', 'df_left'))
|
|
848
|
+
right_df = input_vars.get('right', input_vars.get('main_1', 'df_right'))
|
|
849
|
+
if left_df == right_df:
|
|
850
|
+
right_df = "df_right"
|
|
851
|
+
self._add_code(f"{right_df} = {left_df}")
|
|
852
|
+
|
|
853
|
+
if settings.join_input.left_select.has_drop_cols():
|
|
854
|
+
self._add_code(f"{left_df} = {left_df}.drop({[c.old_name for c in settings.join_input.left_select.non_jk_drop_columns]})")
|
|
855
|
+
if settings.join_input.right_select.has_drop_cols():
|
|
856
|
+
self._add_code(f"{right_df} = {right_df}.drop({[c.old_name for c in settings.join_input.right_select.non_jk_drop_columns]})")
|
|
857
|
+
|
|
858
|
+
fuzzy_join_mapping_settings = self._transform_fuzzy_mappings_to_string(settings.join_input.join_mapping)
|
|
859
|
+
self._add_code(f"{var_name} = fuzzy_match_dfs(\n"
|
|
860
|
+
f" left_df={left_df}, right_df={right_df},\n"
|
|
861
|
+
f" fuzzy_maps={fuzzy_join_mapping_settings}\n"
|
|
862
|
+
f" ).lazy()")
|
|
863
|
+
|
|
828
864
|
def _handle_unique(self, settings: input_schema.NodeUnique, var_name: str, input_vars: Dict[str, str]) -> None:
|
|
829
865
|
"""Handle unique/distinct nodes."""
|
|
830
866
|
input_df = input_vars.get('main', 'df')
|