Flowfile 0.3.9__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (138) hide show
  1. flowfile/__init__.py +1 -1
  2. flowfile/api.py +0 -1
  3. flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-d7c2c028.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-d467329f.js} +11 -78
  5. flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-071b8b00.js} +12 -79
  6. flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
  7. flowfile/web/static/assets/ContextMenu-2dea5e27.js +41 -0
  8. flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
  9. flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
  10. flowfile/web/static/assets/ContextMenu-785554c4.js +41 -0
  11. flowfile/web/static/assets/ContextMenu-a51e19ea.js +41 -0
  12. flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
  13. flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
  14. flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-cf68ec7a.js} +14 -84
  15. flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-435c5dd8.js} +3 -3
  16. flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-349e33a8.js} +2 -2
  17. flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-8075bd28.js} +14 -114
  18. flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
  19. flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-3e2dda89.js} +13 -74
  20. flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
  21. flowfile/web/static/assets/ExploreData-76ec698c.js +192 -0
  22. flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-609a265c.js} +8 -79
  23. flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-97cff793.js} +12 -85
  24. flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
  25. flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-09de0ec9.js} +18 -85
  26. flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
  27. flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
  28. flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-bdf70248.js} +16 -87
  29. flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-0b5a0e05.js} +13 -159
  30. flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
  31. flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
  32. flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-eaddadde.js} +12 -75
  33. flowfile/web/static/assets/{Join-5a78a203.js → Join-3313371b.js} +15 -85
  34. flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
  35. flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
  36. flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-e8bfc0be.js} +11 -82
  37. flowfile/web/static/assets/{Output-411ecaee.js → Output-7303bb09.js} +13 -243
  38. flowfile/web/static/assets/Output-ddc9079f.css +37 -0
  39. flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-3b1c54ef.js} +14 -138
  40. flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
  41. flowfile/web/static/assets/PivotValidation-3bb36c8f.js +61 -0
  42. flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
  43. flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
  44. flowfile/web/static/assets/PivotValidation-eaa819c0.js +61 -0
  45. flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-aa12e25d.js} +13 -80
  46. flowfile/web/static/assets/Read-6b17491f.css +62 -0
  47. flowfile/web/static/assets/Read-a2bfc618.js +243 -0
  48. flowfile/web/static/assets/RecordCount-aa0dc082.js +53 -0
  49. flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-48ee1a3b.js} +8 -80
  50. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
  51. flowfile/web/static/assets/SQLQueryComponent-e149dbf2.js +38 -0
  52. flowfile/web/static/assets/{Sample-b4a18476.js → Sample-f06cb97a.js} +8 -77
  53. flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-37f34886.js} +2 -2
  54. flowfile/web/static/assets/{Select-727688dc.js → Select-b60e6c47.js} +11 -85
  55. flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
  56. flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
  57. flowfile/web/static/assets/SettingsSection-70e5a7b1.js +53 -0
  58. flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
  59. flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-75b6cf4f.js} +2 -40
  60. flowfile/web/static/assets/SettingsSection-e57a672e.js +45 -0
  61. flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
  62. flowfile/web/static/assets/{Sort-be3339a8.js → Sort-51b1ee4d.js} +12 -97
  63. flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-26835f8f.js} +14 -83
  64. flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
  65. flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-88a4cd0c.js} +2 -2
  66. flowfile/web/static/assets/Union-4d0088eb.js +77 -0
  67. flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
  68. flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-7d554a62.js} +22 -91
  69. flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
  70. flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
  71. flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-4668595c.js} +12 -166
  72. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
  73. flowfile/web/static/assets/UnpivotValidation-d4f0e0e8.js +51 -0
  74. flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-5324d566.js} +4 -264
  75. flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
  76. flowfile/web/static/assets/{api-cb00cce6.js → api-271ed117.js} +1 -1
  77. flowfile/web/static/assets/{api-023d1733.js → api-31e4fea6.js} +1 -1
  78. flowfile/web/static/assets/{designer-2197d782.css → designer-091bdc3f.css} +819 -184
  79. flowfile/web/static/assets/{designer-6c322d8e.js → designer-bf3d9487.js} +2191 -703
  80. flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-4d0a1cea.js} +1 -1
  81. flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-025888df.js} +1 -1
  82. flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-1df991ec.js} +2 -2
  83. flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-d3b2b2ac.js} +3 -3
  84. flowfile/web/static/assets/{index-683fc198.js → index-d0518598.js} +210 -31
  85. flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
  86. flowfile/web/static/assets/outputCsv-d8457527.js +86 -0
  87. flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
  88. flowfile/web/static/assets/outputExcel-be89153e.js +56 -0
  89. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
  90. flowfile/web/static/assets/outputParquet-fabb445a.js +31 -0
  91. flowfile/web/static/assets/readCsv-bca3ed53.css +52 -0
  92. flowfile/web/static/assets/readCsv-e8359522.js +178 -0
  93. flowfile/web/static/assets/readExcel-dabaf51b.js +203 -0
  94. flowfile/web/static/assets/readExcel-e1b381ea.css +64 -0
  95. flowfile/web/static/assets/readParquet-cee068e2.css +19 -0
  96. flowfile/web/static/assets/readParquet-e0771ef2.js +26 -0
  97. flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-ce823eee.js} +1 -1
  98. flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-5476546e.js} +7 -7
  99. flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
  100. flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-9ed00d50.js} +29 -33
  101. flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-7bca2d9b.js} +1 -1
  102. flowfile/web/static/index.html +1 -1
  103. {flowfile-0.3.9.dist-info → flowfile-0.3.10.dist-info}/METADATA +1 -1
  104. {flowfile-0.3.9.dist-info → flowfile-0.3.10.dist-info}/RECORD +129 -97
  105. flowfile_core/configs/flow_logger.py +5 -13
  106. flowfile_core/configs/node_store/nodes.py +303 -44
  107. flowfile_core/configs/settings.py +2 -1
  108. flowfile_core/database/connection.py +5 -21
  109. flowfile_core/fileExplorer/funcs.py +239 -121
  110. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +28 -8
  111. flowfile_core/flowfile/flow_graph.py +97 -33
  112. flowfile_core/flowfile/flow_node/flow_node.py +41 -9
  113. flowfile_core/flowfile/handler.py +22 -3
  114. flowfile_core/flowfile/schema_callbacks.py +8 -4
  115. flowfile_core/flowfile/setting_generator/settings.py +0 -1
  116. flowfile_core/main.py +4 -1
  117. flowfile_core/routes/routes.py +59 -10
  118. flowfile_core/schemas/input_schema.py +0 -1
  119. flowfile_core/schemas/output_model.py +5 -2
  120. flowfile_core/schemas/schemas.py +2 -0
  121. flowfile_core/schemas/transform_schema.py +1 -0
  122. flowfile_worker/__init__.py +6 -35
  123. flowfile_worker/main.py +5 -2
  124. flowfile_worker/routes.py +47 -5
  125. shared/__init__.py +15 -0
  126. shared/storage_config.py +243 -0
  127. flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
  128. flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
  129. flowfile/web/static/assets/Read-80dc1675.css +0 -197
  130. flowfile/web/static/assets/Read-c3b1929c.js +0 -701
  131. flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
  132. flowfile/web/static/assets/Union-89fd73dc.js +0 -146
  133. flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
  134. flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
  135. flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
  136. {flowfile-0.3.9.dist-info → flowfile-0.3.10.dist-info}/LICENSE +0 -0
  137. {flowfile-0.3.9.dist-info → flowfile-0.3.10.dist-info}/WHEEL +0 -0
  138. {flowfile-0.3.9.dist-info → flowfile-0.3.10.dist-info}/entry_points.txt +0 -0
@@ -19,13 +19,30 @@ class FileInfo(BaseModel):
19
19
  exists: bool = True
20
20
 
21
21
  @classmethod
22
- def from_path(cls, path: Path) -> 'FileInfo':
23
- """Create FileInfo instance from a path."""
22
+ def from_path(cls, path: Path, sandbox_root: Optional[Path] = None,
23
+ use_relative_paths: bool = False) -> 'FileInfo':
24
+ """Create FileInfo instance from a path.
25
+
26
+ Args:
27
+ path: The path to create FileInfo from
28
+ sandbox_root: The root directory for sandboxing (for relative path calculation)
29
+ use_relative_paths: If True, store relative paths; if False, store absolute paths
30
+ """
24
31
  try:
25
32
  stats = path.stat()
33
+
34
+ # Decide whether to use relative or absolute path
35
+ if use_relative_paths and sandbox_root:
36
+ try:
37
+ display_path = str(path.relative_to(sandbox_root))
38
+ except ValueError:
39
+ display_path = str(path.absolute())
40
+ else:
41
+ display_path = str(path.absolute())
42
+
26
43
  return cls(
27
44
  name=path.name,
28
- path=str(path.absolute()),
45
+ path=display_path,
29
46
  is_directory=path.is_dir(),
30
47
  size=stats.st_size,
31
48
  file_type=path.suffix[1:] if path.suffix else "",
@@ -37,9 +54,18 @@ class FileInfo(BaseModel):
37
54
  exists=True
38
55
  )
39
56
  except (PermissionError, OSError):
57
+ # Handle error case
58
+ if use_relative_paths and sandbox_root:
59
+ try:
60
+ display_path = str(path.relative_to(sandbox_root))
61
+ except ValueError:
62
+ display_path = str(path.absolute())
63
+ else:
64
+ display_path = str(path.absolute())
65
+
40
66
  return cls(
41
67
  name=path.name,
42
- path=str(path.absolute()),
68
+ path=display_path,
43
69
  is_directory=False,
44
70
  size=0,
45
71
  file_type="",
@@ -50,30 +76,108 @@ class FileInfo(BaseModel):
50
76
  )
51
77
 
52
78
 
53
- class FileExplorer:
54
- def __init__(self, start_path: Optional[str|Path] = None):
55
- """Initialize FileExplorer with user's home directory or specified path."""
56
- if start_path is None:
57
- self.current_path = Path.home()
79
+ class SecureFileExplorer:
80
+ """File explorer with sandbox enforcement to prevent directory traversal."""
81
+
82
+ def __init__(self, start_path: Union[str, Path],
83
+ sandbox_root: Optional[Union[str, Path]] = None,
84
+ use_relative_paths: bool = False):
85
+ """Initialize SecureFileExplorer with sandboxing.
86
+
87
+ Args:
88
+ start_path: Initial directory to start in
89
+ sandbox_root: Root directory that user cannot escape from.
90
+ If None, no sandbox enforcement.
91
+ use_relative_paths: If True, FileInfo will contain relative paths;
92
+ if False (default), contains absolute paths
93
+ """
94
+ self.use_relative_paths = use_relative_paths
95
+
96
+ # Set up the sandbox root
97
+ if sandbox_root is not None:
98
+ self.sandbox_root = Path(sandbox_root).expanduser().resolve()
99
+ else:
100
+ self.sandbox_root = None
101
+
102
+ # Set initial current path
103
+ initial_path = Path(start_path).expanduser().resolve()
104
+
105
+ # If sandbox is set and initial path is outside it, use sandbox root
106
+ if self.sandbox_root and not self._is_path_safe(initial_path):
107
+ self.current_path = self.sandbox_root
58
108
  else:
59
- self.current_path = Path(start_path).expanduser().resolve()
109
+ self.current_path = initial_path
60
110
 
61
- if not self.current_path.exists():
62
- raise ValueError(f"Path does not exist: {self.current_path}")
111
+ def _is_path_safe(self, path: Path) -> bool:
112
+ """Check if a path is within the sandbox root.
63
113
 
64
- if not self.current_path.is_dir():
65
- raise ValueError(f"Path is not a directory: {self.current_path}")
114
+ Uses resolve() to handle symlinks and relative paths securely.
115
+ Returns True if no sandbox is set (no restrictions).
116
+ """
117
+ if self.sandbox_root is None:
118
+ return True # No sandbox = no restrictions
119
+
120
+ try:
121
+ resolved_path = path.resolve()
122
+ resolved_sandbox = self.sandbox_root.resolve()
123
+ # Check if the resolved path is within sandbox
124
+ resolved_path.relative_to(resolved_sandbox)
125
+ return True
126
+ except (ValueError, RuntimeError):
127
+ return False
128
+
129
+ def _sanitize_path(self, path: Union[str, Path]) -> Optional[Path]:
130
+ """Sanitize and validate a path, ensuring it stays within sandbox.
131
+
132
+ Returns None if path would escape sandbox.
133
+ """
134
+ try:
135
+ # Handle relative paths from current directory
136
+ if isinstance(path, str):
137
+ # Remove any suspicious patterns
138
+ if '..' in Path(path).parts or path.startswith('/'):
139
+ # For absolute paths or parent references, resolve from sandbox root
140
+ test_path = Path(path).expanduser()
141
+ else:
142
+ # For simple relative paths, resolve from current directory
143
+ test_path = self.current_path / path
144
+ else:
145
+ test_path = path
146
+
147
+ # Resolve to absolute path
148
+ resolved = test_path.resolve()
149
+
150
+ # Check if within sandbox
151
+ if self._is_path_safe(resolved):
152
+ return resolved
153
+ else:
154
+ return None
155
+ except (ValueError, RuntimeError, OSError):
156
+ return None
66
157
 
67
158
  @property
68
159
  def current_directory(self) -> str:
69
- """Get the current directory path."""
70
- return str(self.current_path.absolute())
160
+ """Get the current directory path relative to sandbox root."""
161
+ if self.sandbox_root is None:
162
+ return str(self.current_path)
163
+
164
+ try:
165
+ relative = self.current_path.relative_to(self.sandbox_root)
166
+ return str(relative) if str(relative) != "." else "/"
167
+ except ValueError:
168
+ return "/"
71
169
 
72
170
  @property
73
171
  def parent_directory(self) -> Optional[str]:
74
- """Get the parent directory path if it exists."""
172
+ """Get the parent directory path if it exists and is within sandbox."""
75
173
  parent = self.current_path.parent
76
- return str(parent.absolute()) if parent != self.current_path else None
174
+ if self._is_path_safe(parent) and parent != self.current_path:
175
+ try:
176
+ relative = parent.relative_to(self.sandbox_root)
177
+ return str(relative) if str(relative) != "." else "/"
178
+ except ValueError:
179
+ return None
180
+ return None
77
181
 
78
182
  def list_contents(
79
183
  self,
@@ -85,34 +189,23 @@ class FileExplorer:
85
189
  max_size: Optional[int] = None,
86
190
  sort_by: Literal['name', 'date', 'size', 'type'] = 'name',
87
191
  reverse: bool = False,
88
- exclude_patterns: Optional[List[str]] = None
192
+ exclude_patterns: Optional[List[str]] = None,
193
+ max_depth: int = 5 # Add depth limit for recursive operations
89
194
  ) -> List[FileInfo]:
90
- """
91
- List contents of the current directory with advanced filtering and sorting.
92
-
93
- Args:
94
- show_hidden: Whether to show hidden files and directories
95
- file_types: List of file extensions to include (without dots)
96
- recursive: Whether to scan subdirectories
97
- min_size: Minimum file size in bytes
98
- max_size: Maximum file size in bytes
99
- sort_by: Field to sort results by
100
- reverse: Whether to reverse sort order
101
- exclude_patterns: Glob patterns to exclude
102
-
103
- Returns:
104
- List of FileInfo objects sorted according to parameters
105
- """
195
+ """List contents with security-conscious filtering."""
106
196
  contents: List[FileInfo] = []
107
197
  excluded_paths: Set[str] = set()
108
198
 
109
199
  if exclude_patterns:
110
200
  for pattern in exclude_patterns:
111
- excluded_paths.update(str(p) for p in self.current_path.glob(pattern))
201
+ # Ensure patterns don't escape sandbox
202
+ safe_pattern = pattern.replace('../', '').replace('..\\', '')
203
+ excluded_paths.update(str(p) for p in self.current_path.glob(safe_pattern))
112
204
 
113
205
  def should_include(info: FileInfo) -> bool:
114
206
  """Determine if a file should be included based on filters."""
115
- if str(info.path) in excluded_paths:
207
+ full_path = self.current_path / info.path
208
+ if str(full_path) in excluded_paths:
116
209
  return False
117
210
  if not show_hidden and info.is_hidden:
118
211
  return False
@@ -125,20 +218,55 @@ class FileExplorer:
125
218
  return True
126
219
 
127
220
  try:
128
- # Define the scan pattern based on recursion
129
- pattern = '**/*' if recursive else '*'
221
+ if recursive:
222
+ # Use iterative approach with depth limit for safety
223
+ dirs_to_process = [(self.current_path, 0)]
224
+ processed = set()
130
225
 
131
- # Scan directory
132
- for item in self.current_path.glob(pattern):
133
- try:
134
- file_info = FileInfo.from_path(item)
135
- if should_include(file_info):
136
- contents.append(file_info)
137
- except (PermissionError, OSError):
138
- continue
226
+ while dirs_to_process:
227
+ current_dir, depth = dirs_to_process.pop(0)
228
+
229
+ # Skip if already processed or exceeds depth
230
+ if current_dir in processed or depth > max_depth:
231
+ continue
232
+
233
+ processed.add(current_dir)
234
+
235
+ try:
236
+ for item in current_dir.iterdir():
237
+ # Security check for each item
238
+ if not self._is_path_safe(item):
239
+ continue
240
+
241
+ try:
242
+ file_info = FileInfo.from_path(item, self.sandbox_root,
243
+ self.use_relative_paths)
244
+ if should_include(file_info):
245
+ contents.append(file_info)
246
+
247
+ if item.is_dir() and depth < max_depth:
248
+ dirs_to_process.append((item, depth + 1))
249
+ except (PermissionError, OSError):
250
+ continue
251
+ except (PermissionError, OSError):
252
+ continue
253
+ else:
254
+ # Non-recursive listing
255
+ for item in self.current_path.iterdir():
256
+ # Security check
257
+ if not self._is_path_safe(item):
258
+ continue
259
+
260
+ try:
261
+ file_info = FileInfo.from_path(item, self.sandbox_root,
262
+ self.use_relative_paths)
263
+ if should_include(file_info):
264
+ contents.append(file_info)
265
+ except (PermissionError, OSError):
266
+ continue
139
267
 
140
268
  except PermissionError:
141
- raise PermissionError(f"Permission denied to access directory: {self.current_path}")
269
+ raise PermissionError(f"Permission denied to access directory: {self.current_directory}")
142
270
 
143
271
  # Sort results
144
272
  sort_key = {
@@ -151,109 +279,99 @@ class FileExplorer:
151
279
  return sorted(contents, key=sort_key, reverse=reverse)
152
280
 
153
281
  def navigate_to(self, path: str) -> bool:
154
- """
155
- Navigate to a new directory path.
156
- Returns True if navigation was successful, False otherwise.
157
- """
158
- new_path = None
159
- try:
160
- new_path = Path(path).expanduser().resolve()
282
+ """Navigate to a new directory path within sandbox."""
283
+ sanitized = self._sanitize_path(path)
161
284
 
162
- if not new_path.exists() or not new_path.is_dir():
163
- return False
285
+ if sanitized is None:
286
+ return False
164
287
 
288
+ if not sanitized.exists() or not sanitized.is_dir():
289
+ return False
290
+
291
+ try:
165
292
  # Test if we can actually read the directory
166
- next(new_path.iterdir(), None)
167
- self.current_path = new_path
293
+ next(sanitized.iterdir(), None)
294
+ self.current_path = sanitized
168
295
  return True
169
-
170
- except PermissionError:
171
- if new_path:
172
- self.current_path = new_path
296
+ except (PermissionError, OSError):
297
+ # Still navigate if we have permission issues (user will see empty dir)
298
+ self.current_path = sanitized
173
299
  return True
174
- except OSError:
175
- return False
176
300
 
177
301
  def navigate_up(self) -> bool:
178
- """
179
- Navigate up to the parent directory.
180
- Returns True if navigation was successful, False otherwise.
181
- """
182
- parent = self.parent_directory
183
- if parent is None:
302
+ """Navigate up to the parent directory, respecting sandbox."""
303
+ parent = self.current_path.parent
304
+
305
+ # Check if parent is within sandbox
306
+ if not self._is_path_safe(parent):
184
307
  return False
185
- return self.navigate_to(parent)
308
+
309
+ # Don't navigate if we're already at sandbox root
310
+ if parent == self.current_path:
311
+ return False
312
+
313
+ self.current_path = parent
314
+ return True
186
315
 
187
316
  def navigate_into(self, directory_name: str) -> bool:
188
- """
189
- Navigate into a subdirectory of the current directory.
190
- Returns True if navigation was successful, False otherwise.
191
- """
317
+ """Navigate into a subdirectory, with path sanitization."""
318
+ # Sanitize directory name
319
+ if '/' in directory_name or '\\' in directory_name or '..' in directory_name:
320
+ return False
321
+
192
322
  new_path = self.current_path / directory_name
193
323
  return self.navigate_to(str(new_path))
194
324
 
325
+ def get_absolute_path(self, relative_path: str) -> Optional[Path]:
326
+ """Get absolute path for a file within sandbox.
327
+
328
+ Returns None if the path would escape sandbox.
329
+ """
330
+ sanitized = self._sanitize_path(relative_path)
331
+ return sanitized if sanitized else None
332
+
195
333
 
196
334
  def get_files_from_directory(
197
335
  dir_name: Union[str, Path],
198
336
  types: Optional[List[str]] = None,
199
337
  *,
200
338
  include_hidden: bool = False,
201
- recursive: bool = False
339
+ recursive: bool = False,
340
+ sandbox_root: Optional[Union[str, Path]] = None
202
341
  ) -> Optional[List[FileInfo]]:
203
342
  """
204
- Get list of files from a directory with optional type filtering.
343
+ Get list of files from a directory with sandbox enforcement.
205
344
 
206
345
  Args:
207
346
  dir_name: Directory path to scan
208
- types: List of file extensions to include (without dots). None means all types
347
+ types: List of file extensions to include
209
348
  include_hidden: Whether to include hidden files
210
349
  recursive: Whether to scan subdirectories
350
+ sandbox_root: Root directory to enforce as sandbox boundary
211
351
 
212
352
  Returns:
213
- List of FileInfo objects or None if directory doesn't exist
214
-
215
- Example:
216
- >>> files = get_files_from_directory("/path/to/dir", types=["pdf", "txt"])
217
- >>> for file in files:
218
- ... print(f"{file.name} - {file.size} bytes")
353
+ List of FileInfo objects or None if directory doesn't exist or is outside sandbox
219
354
  """
220
355
  try:
221
- dir_path = Path(dir_name).resolve()
222
- if not dir_path.exists():
223
- return None
224
- if not dir_path.is_dir():
225
- raise ValueError(f"Path is not a directory: {dir_path}")
226
-
227
- # Normalize file types
228
- if types:
229
- types = [t.lower().lstrip('.') for t in types]
230
-
231
- files = []
232
- pattern = '**/*' if recursive else '*'
233
-
234
- for item in dir_path.glob(pattern):
235
- try:
236
- # Skip hidden files unless specifically requested
237
- if not include_hidden and item.name.startswith('.'):
238
- continue
239
-
240
- # Skip directories unless recursive is True
241
- if item.is_dir() and not recursive:
242
- continue
243
-
244
- # Check file type if types are specified
245
- if types and not item.is_dir():
246
- if item.suffix[1:].lower() not in types:
247
- continue
248
-
249
- file_info = FileInfo.from_path(item)
250
- files.append(file_info)
251
-
252
- except (PermissionError, OSError):
253
- continue
254
-
255
- return sorted(files, key=lambda x: (not x.is_directory, x.name.lower()))
256
-
356
+ # Create a secure explorer with sandbox
357
+ if sandbox_root:
358
+ explorer = SecureFileExplorer(start_path=dir_name, sandbox_root=sandbox_root)
359
+ else:
360
+ explorer = SecureFileExplorer(start_path=dir_name)
361
+
362
+ # Use the explorer's list_contents method
363
+ return explorer.list_contents(
364
+ show_hidden=include_hidden,
365
+ file_types=types,
366
+ recursive=recursive
367
+ )
368
+
369
+ except (ValueError, PermissionError) as e:
370
+ # Return None for invalid/inaccessible directories
371
+ return None
257
372
  except Exception as e:
258
373
  raise type(e)(f"Error scanning directory {dir_name}: {str(e)}") from e
259
374
 
375
+
376
+ # Alias for backward compatibility
377
+ FileExplorer = SecureFileExplorer
@@ -39,7 +39,7 @@ def trigger_df_operation(flow_id: int, node_id: int | str, lf: pl.LazyFrame, fil
39
39
  'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id}
40
40
  v = requests.post(url=f'{WORKER_URL}/submit_query/', json=_json)
41
41
  if not v.ok:
42
- raise Exception(f'Could not cache the data, {v.text}')
42
+ raise Exception(f'trigger_df_operation: Could not cache the data, {v.text}')
43
43
  return Status(**v.json())
44
44
 
45
45
 
@@ -49,7 +49,7 @@ def trigger_sample_operation(lf: pl.LazyFrame, file_ref: str, flow_id: int, node
49
49
  'sample_size': sample_size, 'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id}
50
50
  v = requests.post(url=f'{WORKER_URL}/store_sample/', json=_json)
51
51
  if not v.ok:
52
- raise Exception(f'Could not cache the data, {v.text}')
52
+ raise Exception(f'trigger_sample_operation: Could not cache the data, {v.text}')
53
53
  return Status(**v.json())
54
54
 
55
55
 
@@ -67,9 +67,10 @@ def trigger_fuzzy_match_operation(left_df: pl.LazyFrame, right_df: pl.LazyFrame,
67
67
  flowfile_flow_id=flow_id,
68
68
  flowfile_node_id=node_id
69
69
  )
70
+ print("fuzzy join input", fuzzy_join_input)
70
71
  v = requests.post(f'{WORKER_URL}/add_fuzzy_join', data=fuzzy_join_input.model_dump_json())
71
72
  if not v.ok:
72
- raise Exception(f'Could not cache the data, {v.text}')
73
+ raise Exception(f'trigger_fuzzy_match_operation: Could not cache the data, {v.text}')
73
74
  return Status(**v.json())
74
75
 
75
76
 
@@ -78,7 +79,7 @@ def trigger_create_operation(flow_id: int, node_id: int | str, received_table: R
78
79
  f = requests.post(url=f'{WORKER_URL}/create_table/{file_type}', data=received_table.model_dump_json(),
79
80
  params={'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id})
80
81
  if not f.ok:
81
- raise Exception(f'Could not cache the data, {f.text}')
82
+ raise Exception(f'trigger_create_operation: Could not cache the data, {f.text}')
82
83
  return Status(**f.json())
83
84
 
84
85
 
@@ -86,7 +87,7 @@ def trigger_database_read_collector(database_external_read_settings: DatabaseExt
86
87
  f = requests.post(url=f'{WORKER_URL}/store_database_read_result',
87
88
  data=database_external_read_settings.model_dump_json())
88
89
  if not f.ok:
89
- raise Exception(f'Could not cache the data, {f.text}')
90
+ raise Exception(f'trigger_database_read_collector: Could not cache the data, {f.text}')
90
91
  return Status(**f.json())
91
92
 
92
93
 
@@ -94,7 +95,7 @@ def trigger_database_write(database_external_write_settings: DatabaseExternalWri
94
95
  f = requests.post(url=f'{WORKER_URL}/store_database_write_result',
95
96
  data=database_external_write_settings.model_dump_json())
96
97
  if not f.ok:
97
- raise Exception(f'Could not cache the data, {f.text}')
98
+ raise Exception(f'trigger_database_write: Could not cache the data, {f.text}')
98
99
  return Status(**f.json())
99
100
 
100
101
 
@@ -102,7 +103,7 @@ def trigger_cloud_storage_write(database_external_write_settings: CloudStorageWr
102
103
  f = requests.post(url=f'{WORKER_URL}/write_data_to_cloud',
103
104
  data=database_external_write_settings.model_dump_json())
104
105
  if not f.ok:
105
- raise Exception(f'Could not cache the data, {f.text}')
106
+ raise Exception(f'trigger_cloud_storage_write: Could not cache the data, {f.text}')
106
107
  return Status(**f.json())
107
108
 
108
109
 
@@ -111,7 +112,7 @@ def get_results(file_ref: str) -> Status | None:
111
112
  if f.status_code == 200:
112
113
  return Status(**f.json())
113
114
  else:
114
- raise Exception(f'Could not fetch the data, {f.text}')
115
+ raise Exception(f'get_results: Could not fetch the data, {f.text}')
115
116
 
116
117
 
117
118
  def results_exists(file_ref: str):
@@ -128,6 +129,25 @@ def results_exists(file_ref: str):
128
129
  return False
129
130
 
130
131
 
132
+ def clear_task_from_worker(file_ref: str) -> bool:
133
+ """
134
+ Clears a task from the worker service by making a DELETE request. It also removes associated cached files.
135
+ Args:
136
+ file_ref (str): The unique identifier of the task to clear.
137
+
138
+ Returns:
139
+ bool: True if the task was successfully cleared, False otherwise.
140
+ """
141
+ try:
142
+ f = requests.delete(f'{WORKER_URL}/clear_task/{file_ref}')
143
+ if f.status_code == 200:
144
+ return True
145
+ return False
146
+ except requests.RequestException as e:
147
+ logger.error(f"Failed to remove results: {str(e)}")
148
+ return False
149
+
150
+
131
151
  def get_df_result(encoded_df: str) -> pl.LazyFrame:
132
152
  r = decodebytes(encoded_df.encode())
133
153
  return pl.LazyFrame.deserialize(io.BytesIO(r))