Flowfile 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (169) hide show
  1. flowfile/__init__.py +6 -1
  2. flowfile/api.py +0 -1
  3. flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-109ecc3c.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-19cdd67a.js} +11 -78
  5. flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-48e0ae20.js} +12 -79
  6. flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
  7. flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
  8. flowfile/web/static/assets/ColumnSelector-ecaf7c44.js +83 -0
  9. flowfile/web/static/assets/ContextMenu-2b348c4c.js +41 -0
  10. flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
  11. flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
  12. flowfile/web/static/assets/ContextMenu-a779eed7.js +41 -0
  13. flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
  14. flowfile/web/static/assets/ContextMenu-eca26a03.js +41 -0
  15. flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
  16. flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-a88f8142.js} +14 -84
  17. flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
  18. flowfile/web/static/assets/CustomNode-cb863dff.js +211 -0
  19. flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-819d3267.js} +3 -3
  20. flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-84ee2834.js} +2 -2
  21. flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-060dd412.js} +14 -114
  22. flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
  23. flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-7fc7750f.js} +13 -74
  24. flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
  25. flowfile/web/static/assets/ExploreData-82c95991.js +192 -0
  26. flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-e1a6ddc7.js} +8 -79
  27. flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-8aca894a.js} +12 -85
  28. flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
  29. flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
  30. flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-e33686d9.js} +18 -85
  31. flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
  32. flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-abda150d.js} +16 -87
  33. flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-4ecad1d7.js} +13 -159
  34. flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
  35. flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-656d07f3.js} +12 -75
  36. flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
  37. flowfile/web/static/assets/{Join-5a78a203.js → Join-b84ec849.js} +15 -85
  38. flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
  39. flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
  40. flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-346f4135.js} +11 -82
  41. flowfile/web/static/assets/MultiSelect-61b98268.js +5 -0
  42. flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-2a7c8312.js +63 -0
  43. flowfile/web/static/assets/NumericInput-e36602c2.js +5 -0
  44. flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-211a1990.js +35 -0
  45. flowfile/web/static/assets/Output-ddc9079f.css +37 -0
  46. flowfile/web/static/assets/{Output-411ecaee.js → Output-eb041599.js} +13 -243
  47. flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
  48. flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-f5c774f4.js} +14 -138
  49. flowfile/web/static/assets/PivotValidation-26546cbc.js +61 -0
  50. flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
  51. flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
  52. flowfile/web/static/assets/PivotValidation-e150a24b.js +61 -0
  53. flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-da3a7abf.js} +13 -80
  54. flowfile/web/static/assets/Read-0c768769.js +243 -0
  55. flowfile/web/static/assets/Read-6b17491f.css +62 -0
  56. flowfile/web/static/assets/RecordCount-84736276.js +53 -0
  57. flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-60055e6d.js} +8 -80
  58. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
  59. flowfile/web/static/assets/SQLQueryComponent-8a486004.js +38 -0
  60. flowfile/web/static/assets/{Sample-b4a18476.js → Sample-2d662611.js} +8 -77
  61. flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-ef586cab.js} +2 -2
  62. flowfile/web/static/assets/{Select-727688dc.js → Select-2e4a6965.js} +11 -85
  63. flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
  64. flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-310b61c0.js} +2 -40
  65. flowfile/web/static/assets/SettingsSection-5634f439.js +45 -0
  66. flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
  67. flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
  68. flowfile/web/static/assets/SettingsSection-7c68b19f.js +53 -0
  69. flowfile/web/static/assets/SingleSelect-7298811a.js +5 -0
  70. flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-43807bad.js +62 -0
  71. flowfile/web/static/assets/SliderInput-53105476.js +40 -0
  72. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
  73. flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
  74. flowfile/web/static/assets/{Sort-be3339a8.js → Sort-4fdebe74.js} +12 -97
  75. flowfile/web/static/assets/TextInput-28366b7e.js +5 -0
  76. flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-9cad14ba.js +32 -0
  77. flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
  78. flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-73ffa692.js} +14 -83
  79. flowfile/web/static/assets/ToggleSwitch-598add30.js +5 -0
  80. flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-f620cd32.js +31 -0
  81. flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-66239e83.js} +2 -2
  82. flowfile/web/static/assets/Union-26b10614.js +77 -0
  83. flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
  84. flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-33b9edbb.js} +22 -91
  85. flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
  86. flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
  87. flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-ef69d0e2.js} +12 -166
  88. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
  89. flowfile/web/static/assets/UnpivotValidation-8658388e.js +51 -0
  90. flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-4d7861f4.js} +4 -264
  91. flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
  92. flowfile/web/static/assets/{api-023d1733.js → api-2d1394bd.js} +1 -1
  93. flowfile/web/static/assets/{api-cb00cce6.js → api-c908fffe.js} +1 -1
  94. flowfile/web/static/assets/{designer-6c322d8e.js → designer-1667687d.js} +2201 -705
  95. flowfile/web/static/assets/{designer-2197d782.css → designer-665e9408.css} +836 -201
  96. flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-5eed779e.js} +1 -1
  97. flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-41ebe3c2.js} +1 -1
  98. flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-0670d32d.js} +2 -2
  99. flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-38410ebf.js} +3 -3
  100. flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
  101. flowfile/web/static/assets/{index-683fc198.js → index-5ec791df.js} +210 -31
  102. flowfile/web/static/assets/outputCsv-059583b6.js +86 -0
  103. flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
  104. flowfile/web/static/assets/outputExcel-76b1e02c.js +56 -0
  105. flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
  106. flowfile/web/static/assets/outputParquet-440fd4c7.js +31 -0
  107. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
  108. flowfile/web/static/assets/readCsv-9813903a.js +178 -0
  109. flowfile/web/static/assets/readCsv-bca3ed53.css +52 -0
  110. flowfile/web/static/assets/readExcel-7f40d237.js +203 -0
  111. flowfile/web/static/assets/readExcel-e1b381ea.css +64 -0
  112. flowfile/web/static/assets/readParquet-22d56002.js +26 -0
  113. flowfile/web/static/assets/readParquet-cee068e2.css +19 -0
  114. flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-b3cb072e.js} +1 -1
  115. flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-7ad95bca.js} +7 -7
  116. flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
  117. flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
  118. flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-b1dfaa46.js} +59 -33
  119. flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-22bac17c.js} +1 -1
  120. flowfile/web/static/index.html +2 -2
  121. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/METADATA +1 -1
  122. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/RECORD +160 -102
  123. flowfile_core/configs/flow_logger.py +5 -13
  124. flowfile_core/configs/node_store/__init__.py +30 -0
  125. flowfile_core/configs/node_store/nodes.py +383 -99
  126. flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
  127. flowfile_core/configs/settings.py +2 -1
  128. flowfile_core/database/connection.py +5 -21
  129. flowfile_core/fileExplorer/funcs.py +239 -121
  130. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
  131. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
  132. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
  133. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +28 -8
  134. flowfile_core/flowfile/flow_graph.py +117 -34
  135. flowfile_core/flowfile/flow_node/flow_node.py +45 -13
  136. flowfile_core/flowfile/handler.py +22 -3
  137. flowfile_core/flowfile/manage/open_flowfile.py +9 -1
  138. flowfile_core/flowfile/node_designer/__init__.py +47 -0
  139. flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
  140. flowfile_core/flowfile/node_designer/custom_node.py +371 -0
  141. flowfile_core/flowfile/node_designer/data_types.py +146 -0
  142. flowfile_core/flowfile/node_designer/ui_components.py +277 -0
  143. flowfile_core/flowfile/schema_callbacks.py +8 -4
  144. flowfile_core/flowfile/setting_generator/settings.py +0 -1
  145. flowfile_core/main.py +5 -1
  146. flowfile_core/routes/routes.py +73 -28
  147. flowfile_core/routes/user_defined_components.py +55 -0
  148. flowfile_core/schemas/input_schema.py +7 -1
  149. flowfile_core/schemas/output_model.py +5 -2
  150. flowfile_core/schemas/schemas.py +8 -3
  151. flowfile_core/schemas/transform_schema.py +1 -0
  152. flowfile_core/utils/validate_setup.py +3 -1
  153. flowfile_worker/__init__.py +6 -35
  154. flowfile_worker/main.py +5 -2
  155. flowfile_worker/routes.py +47 -5
  156. shared/__init__.py +15 -0
  157. shared/storage_config.py +258 -0
  158. flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
  159. flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
  160. flowfile/web/static/assets/Read-80dc1675.css +0 -197
  161. flowfile/web/static/assets/Read-c3b1929c.js +0 -701
  162. flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
  163. flowfile/web/static/assets/Union-89fd73dc.js +0 -146
  164. flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
  165. flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
  166. flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
  167. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/LICENSE +0 -0
  168. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/WHEEL +0 -0
  169. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -19,13 +19,30 @@ class FileInfo(BaseModel):
19
19
  exists: bool = True
20
20
 
21
21
  @classmethod
22
- def from_path(cls, path: Path) -> 'FileInfo':
23
- """Create FileInfo instance from a path."""
22
+ def from_path(cls, path: Path, sandbox_root: Optional[Path] = None,
23
+ use_relative_paths: bool = False) -> 'FileInfo':
24
+ """Create FileInfo instance from a path.
25
+
26
+ Args:
27
+ path: The path to create FileInfo from
28
+ sandbox_root: The root directory for sandboxing (for relative path calculation)
29
+ use_relative_paths: If True, store relative paths; if False, store absolute paths
30
+ """
24
31
  try:
25
32
  stats = path.stat()
33
+
34
+ # Decide whether to use relative or absolute path
35
+ if use_relative_paths and sandbox_root:
36
+ try:
37
+ display_path = str(path.relative_to(sandbox_root))
38
+ except ValueError:
39
+ display_path = str(path.absolute())
40
+ else:
41
+ display_path = str(path.absolute())
42
+
26
43
  return cls(
27
44
  name=path.name,
28
- path=str(path.absolute()),
45
+ path=display_path,
29
46
  is_directory=path.is_dir(),
30
47
  size=stats.st_size,
31
48
  file_type=path.suffix[1:] if path.suffix else "",
@@ -37,9 +54,18 @@ class FileInfo(BaseModel):
37
54
  exists=True
38
55
  )
39
56
  except (PermissionError, OSError):
57
+ # Handle error case
58
+ if use_relative_paths and sandbox_root:
59
+ try:
60
+ display_path = str(path.relative_to(sandbox_root))
61
+ except ValueError:
62
+ display_path = str(path.absolute())
63
+ else:
64
+ display_path = str(path.absolute())
65
+
40
66
  return cls(
41
67
  name=path.name,
42
- path=str(path.absolute()),
68
+ path=display_path,
43
69
  is_directory=False,
44
70
  size=0,
45
71
  file_type="",
@@ -50,30 +76,108 @@ class FileInfo(BaseModel):
50
76
  )
51
77
 
52
78
 
53
- class FileExplorer:
54
- def __init__(self, start_path: Optional[str|Path] = None):
55
- """Initialize FileExplorer with user's home directory or specified path."""
56
- if start_path is None:
57
- self.current_path = Path.home()
79
+ class SecureFileExplorer:
80
+ """File explorer with sandbox enforcement to prevent directory traversal."""
81
+
82
+ def __init__(self, start_path: Union[str, Path],
83
+ sandbox_root: Optional[Union[str, Path]] = None,
84
+ use_relative_paths: bool = False):
85
+ """Initialize SecureFileExplorer with sandboxing.
86
+
87
+ Args:
88
+ start_path: Initial directory to start in
89
+ sandbox_root: Root directory that user cannot escape from.
90
+ If None, no sandbox enforcement.
91
+ use_relative_paths: If True, FileInfo will contain relative paths;
92
+ if False (default), contains absolute paths
93
+ """
94
+ self.use_relative_paths = use_relative_paths
95
+
96
+ # Set up the sandbox root
97
+ if sandbox_root is not None:
98
+ self.sandbox_root = Path(sandbox_root).expanduser().resolve()
99
+ else:
100
+ self.sandbox_root = None
101
+
102
+ # Set initial current path
103
+ initial_path = Path(start_path).expanduser().resolve()
104
+
105
+ # If sandbox is set and initial path is outside it, use sandbox root
106
+ if self.sandbox_root and not self._is_path_safe(initial_path):
107
+ self.current_path = self.sandbox_root
58
108
  else:
59
- self.current_path = Path(start_path).expanduser().resolve()
109
+ self.current_path = initial_path
60
110
 
61
- if not self.current_path.exists():
62
- raise ValueError(f"Path does not exist: {self.current_path}")
111
+ def _is_path_safe(self, path: Path) -> bool:
112
+ """Check if a path is within the sandbox root.
63
113
 
64
- if not self.current_path.is_dir():
65
- raise ValueError(f"Path is not a directory: {self.current_path}")
114
+ Uses resolve() to handle symlinks and relative paths securely.
115
+ Returns True if no sandbox is set (no restrictions).
116
+ """
117
+ if self.sandbox_root is None:
118
+ return True # No sandbox = no restrictions
119
+
120
+ try:
121
+ resolved_path = path.resolve()
122
+ resolved_sandbox = self.sandbox_root.resolve()
123
+ # Check if the resolved path is within sandbox
124
+ resolved_path.relative_to(resolved_sandbox)
125
+ return True
126
+ except (ValueError, RuntimeError):
127
+ return False
128
+
129
+ def _sanitize_path(self, path: Union[str, Path]) -> Optional[Path]:
130
+ """Sanitize and validate a path, ensuring it stays within sandbox.
131
+
132
+ Returns None if path would escape sandbox.
133
+ """
134
+ try:
135
+ # Handle relative paths from current directory
136
+ if isinstance(path, str):
137
+ # Remove any suspicious patterns
138
+ if '..' in Path(path).parts or path.startswith('/'):
139
+ # For absolute paths or parent references, resolve from sandbox root
140
+ test_path = Path(path).expanduser()
141
+ else:
142
+ # For simple relative paths, resolve from current directory
143
+ test_path = self.current_path / path
144
+ else:
145
+ test_path = path
146
+
147
+ # Resolve to absolute path
148
+ resolved = test_path.resolve()
149
+
150
+ # Check if within sandbox
151
+ if self._is_path_safe(resolved):
152
+ return resolved
153
+ else:
154
+ return None
155
+ except (ValueError, RuntimeError, OSError):
156
+ return None
66
157
 
67
158
  @property
68
159
  def current_directory(self) -> str:
69
- """Get the current directory path."""
70
- return str(self.current_path.absolute())
160
+ """Get the current directory path relative to sandbox root."""
161
+ if self.sandbox_root is None:
162
+ return str(self.current_path)
163
+
164
+ try:
165
+ relative = self.current_path.relative_to(self.sandbox_root)
166
+ return str(relative) if str(relative) != "." else "/"
167
+ except ValueError:
168
+ return "/"
71
169
 
72
170
  @property
73
171
  def parent_directory(self) -> Optional[str]:
74
- """Get the parent directory path if it exists."""
172
+ """Get the parent directory path if it exists and is within sandbox."""
75
173
  parent = self.current_path.parent
76
- return str(parent.absolute()) if parent != self.current_path else None
174
+ if self._is_path_safe(parent) and parent != self.current_path:
175
+ try:
176
+ relative = parent.relative_to(self.sandbox_root)
177
+ return str(relative) if str(relative) != "." else "/"
178
+ except ValueError:
179
+ return None
180
+ return None
77
181
 
78
182
  def list_contents(
79
183
  self,
@@ -85,34 +189,23 @@ class FileExplorer:
85
189
  max_size: Optional[int] = None,
86
190
  sort_by: Literal['name', 'date', 'size', 'type'] = 'name',
87
191
  reverse: bool = False,
88
- exclude_patterns: Optional[List[str]] = None
192
+ exclude_patterns: Optional[List[str]] = None,
193
+ max_depth: int = 5 # Add depth limit for recursive operations
89
194
  ) -> List[FileInfo]:
90
- """
91
- List contents of the current directory with advanced filtering and sorting.
92
-
93
- Args:
94
- show_hidden: Whether to show hidden files and directories
95
- file_types: List of file extensions to include (without dots)
96
- recursive: Whether to scan subdirectories
97
- min_size: Minimum file size in bytes
98
- max_size: Maximum file size in bytes
99
- sort_by: Field to sort results by
100
- reverse: Whether to reverse sort order
101
- exclude_patterns: Glob patterns to exclude
102
-
103
- Returns:
104
- List of FileInfo objects sorted according to parameters
105
- """
195
+ """List contents with security-conscious filtering."""
106
196
  contents: List[FileInfo] = []
107
197
  excluded_paths: Set[str] = set()
108
198
 
109
199
  if exclude_patterns:
110
200
  for pattern in exclude_patterns:
111
- excluded_paths.update(str(p) for p in self.current_path.glob(pattern))
201
+ # Ensure patterns don't escape sandbox
202
+ safe_pattern = pattern.replace('../', '').replace('..\\', '')
203
+ excluded_paths.update(str(p) for p in self.current_path.glob(safe_pattern))
112
204
 
113
205
  def should_include(info: FileInfo) -> bool:
114
206
  """Determine if a file should be included based on filters."""
115
- if str(info.path) in excluded_paths:
207
+ full_path = self.current_path / info.path
208
+ if str(full_path) in excluded_paths:
116
209
  return False
117
210
  if not show_hidden and info.is_hidden:
118
211
  return False
@@ -125,20 +218,55 @@ class FileExplorer:
125
218
  return True
126
219
 
127
220
  try:
128
- # Define the scan pattern based on recursion
129
- pattern = '**/*' if recursive else '*'
221
+ if recursive:
222
+ # Use iterative approach with depth limit for safety
223
+ dirs_to_process = [(self.current_path, 0)]
224
+ processed = set()
130
225
 
131
- # Scan directory
132
- for item in self.current_path.glob(pattern):
133
- try:
134
- file_info = FileInfo.from_path(item)
135
- if should_include(file_info):
136
- contents.append(file_info)
137
- except (PermissionError, OSError):
138
- continue
226
+ while dirs_to_process:
227
+ current_dir, depth = dirs_to_process.pop(0)
228
+
229
+ # Skip if already processed or exceeds depth
230
+ if current_dir in processed or depth > max_depth:
231
+ continue
232
+
233
+ processed.add(current_dir)
234
+
235
+ try:
236
+ for item in current_dir.iterdir():
237
+ # Security check for each item
238
+ if not self._is_path_safe(item):
239
+ continue
240
+
241
+ try:
242
+ file_info = FileInfo.from_path(item, self.sandbox_root,
243
+ self.use_relative_paths)
244
+ if should_include(file_info):
245
+ contents.append(file_info)
246
+
247
+ if item.is_dir() and depth < max_depth:
248
+ dirs_to_process.append((item, depth + 1))
249
+ except (PermissionError, OSError):
250
+ continue
251
+ except (PermissionError, OSError):
252
+ continue
253
+ else:
254
+ # Non-recursive listing
255
+ for item in self.current_path.iterdir():
256
+ # Security check
257
+ if not self._is_path_safe(item):
258
+ continue
259
+
260
+ try:
261
+ file_info = FileInfo.from_path(item, self.sandbox_root,
262
+ self.use_relative_paths)
263
+ if should_include(file_info):
264
+ contents.append(file_info)
265
+ except (PermissionError, OSError):
266
+ continue
139
267
 
140
268
  except PermissionError:
141
- raise PermissionError(f"Permission denied to access directory: {self.current_path}")
269
+ raise PermissionError(f"Permission denied to access directory: {self.current_directory}")
142
270
 
143
271
  # Sort results
144
272
  sort_key = {
@@ -151,109 +279,99 @@ class FileExplorer:
151
279
  return sorted(contents, key=sort_key, reverse=reverse)
152
280
 
153
281
  def navigate_to(self, path: str) -> bool:
154
- """
155
- Navigate to a new directory path.
156
- Returns True if navigation was successful, False otherwise.
157
- """
158
- new_path = None
159
- try:
160
- new_path = Path(path).expanduser().resolve()
282
+ """Navigate to a new directory path within sandbox."""
283
+ sanitized = self._sanitize_path(path)
161
284
 
162
- if not new_path.exists() or not new_path.is_dir():
163
- return False
285
+ if sanitized is None:
286
+ return False
164
287
 
288
+ if not sanitized.exists() or not sanitized.is_dir():
289
+ return False
290
+
291
+ try:
165
292
  # Test if we can actually read the directory
166
- next(new_path.iterdir(), None)
167
- self.current_path = new_path
293
+ next(sanitized.iterdir(), None)
294
+ self.current_path = sanitized
168
295
  return True
169
-
170
- except PermissionError:
171
- if new_path:
172
- self.current_path = new_path
296
+ except (PermissionError, OSError):
297
+ # Still navigate if we have permission issues (user will see empty dir)
298
+ self.current_path = sanitized
173
299
  return True
174
- except OSError:
175
- return False
176
300
 
177
301
  def navigate_up(self) -> bool:
178
- """
179
- Navigate up to the parent directory.
180
- Returns True if navigation was successful, False otherwise.
181
- """
182
- parent = self.parent_directory
183
- if parent is None:
302
+ """Navigate up to the parent directory, respecting sandbox."""
303
+ parent = self.current_path.parent
304
+
305
+ # Check if parent is within sandbox
306
+ if not self._is_path_safe(parent):
184
307
  return False
185
- return self.navigate_to(parent)
308
+
309
+ # Don't navigate if we're already at sandbox root
310
+ if parent == self.current_path:
311
+ return False
312
+
313
+ self.current_path = parent
314
+ return True
186
315
 
187
316
  def navigate_into(self, directory_name: str) -> bool:
188
- """
189
- Navigate into a subdirectory of the current directory.
190
- Returns True if navigation was successful, False otherwise.
191
- """
317
+ """Navigate into a subdirectory, with path sanitization."""
318
+ # Sanitize directory name
319
+ if '/' in directory_name or '\\' in directory_name or '..' in directory_name:
320
+ return False
321
+
192
322
  new_path = self.current_path / directory_name
193
323
  return self.navigate_to(str(new_path))
194
324
 
325
+ def get_absolute_path(self, relative_path: str) -> Optional[Path]:
326
+ """Get absolute path for a file within sandbox.
327
+
328
+ Returns None if the path would escape sandbox.
329
+ """
330
+ sanitized = self._sanitize_path(relative_path)
331
+ return sanitized if sanitized else None
332
+
195
333
 
196
334
  def get_files_from_directory(
197
335
  dir_name: Union[str, Path],
198
336
  types: Optional[List[str]] = None,
199
337
  *,
200
338
  include_hidden: bool = False,
201
- recursive: bool = False
339
+ recursive: bool = False,
340
+ sandbox_root: Optional[Union[str, Path]] = None
202
341
  ) -> Optional[List[FileInfo]]:
203
342
  """
204
- Get list of files from a directory with optional type filtering.
343
+ Get list of files from a directory with sandbox enforcement.
205
344
 
206
345
  Args:
207
346
  dir_name: Directory path to scan
208
- types: List of file extensions to include (without dots). None means all types
347
+ types: List of file extensions to include
209
348
  include_hidden: Whether to include hidden files
210
349
  recursive: Whether to scan subdirectories
350
+ sandbox_root: Root directory to enforce as sandbox boundary
211
351
 
212
352
  Returns:
213
- List of FileInfo objects or None if directory doesn't exist
214
-
215
- Example:
216
- >>> files = get_files_from_directory("/path/to/dir", types=["pdf", "txt"])
217
- >>> for file in files:
218
- ... print(f"{file.name} - {file.size} bytes")
353
+ List of FileInfo objects or None if directory doesn't exist or is outside sandbox
219
354
  """
220
355
  try:
221
- dir_path = Path(dir_name).resolve()
222
- if not dir_path.exists():
223
- return None
224
- if not dir_path.is_dir():
225
- raise ValueError(f"Path is not a directory: {dir_path}")
226
-
227
- # Normalize file types
228
- if types:
229
- types = [t.lower().lstrip('.') for t in types]
230
-
231
- files = []
232
- pattern = '**/*' if recursive else '*'
233
-
234
- for item in dir_path.glob(pattern):
235
- try:
236
- # Skip hidden files unless specifically requested
237
- if not include_hidden and item.name.startswith('.'):
238
- continue
239
-
240
- # Skip directories unless recursive is True
241
- if item.is_dir() and not recursive:
242
- continue
243
-
244
- # Check file type if types are specified
245
- if types and not item.is_dir():
246
- if item.suffix[1:].lower() not in types:
247
- continue
248
-
249
- file_info = FileInfo.from_path(item)
250
- files.append(file_info)
251
-
252
- except (PermissionError, OSError):
253
- continue
254
-
255
- return sorted(files, key=lambda x: (not x.is_directory, x.name.lower()))
256
-
356
+ # Create a secure explorer with sandbox
357
+ if sandbox_root:
358
+ explorer = SecureFileExplorer(start_path=dir_name, sandbox_root=sandbox_root)
359
+ else:
360
+ explorer = SecureFileExplorer(start_path=dir_name)
361
+
362
+ # Use the explorer's list_contents method
363
+ return explorer.list_contents(
364
+ show_hidden=include_hidden,
365
+ file_types=types,
366
+ recursive=recursive
367
+ )
368
+
369
+ except (ValueError, PermissionError) as e:
370
+ # Return None for invalid/inaccessible directories
371
+ return None
257
372
  except Exception as e:
258
373
  raise type(e)(f"Error scanning directory {dir_name}: {str(e)}") from e
259
374
 
375
+
376
+ # Alias for backward compatibility
377
+ FileExplorer = SecureFileExplorer
@@ -0,0 +1,4 @@
1
+ from typing import Literal
2
+
3
+ DataTypeGroup = Literal['numeric', 'str', 'date']
4
+ ReadableDataTypeGroup = Literal['Numeric', 'String', 'Date', 'Other', 'Boolean', 'Binary', 'Complex']
@@ -1,44 +1,13 @@
1
1
 
2
2
  from dataclasses import dataclass
3
- from typing import Optional, Any, List, Dict, Literal, Iterable
3
+ from typing import Optional, Any, List, Dict, Iterable
4
4
 
5
5
  from flowfile_core.schemas import input_schema
6
6
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
7
7
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.polars_type import PlType
8
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.interface import ReadableDataTypeGroup, DataTypeGroup
9
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.type_registry import convert_pl_type_to_string
8
10
  import polars as pl
9
- # TODO: rename flow_file_column to flowfile_column
10
- DataTypeGroup = Literal['numeric', 'str', 'date']
11
-
12
-
13
- def convert_pl_type_to_string(pl_type: pl.DataType, inner: bool = False) -> str:
14
- if isinstance(pl_type, pl.List):
15
- inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
16
- return f"pl.List({inner_str})"
17
- elif isinstance(pl_type, pl.Array):
18
- inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
19
- return f"pl.Array({inner_str})"
20
- elif isinstance(pl_type, pl.Decimal):
21
- precision = pl_type.precision if hasattr(pl_type, 'precision') else None
22
- scale = pl_type.scale if hasattr(pl_type, 'scale') else None
23
- if precision is not None and scale is not None:
24
- return f"pl.Decimal({precision}, {scale})"
25
- elif precision is not None:
26
- return f"pl.Decimal({precision})"
27
- else:
28
- return "pl.Decimal()"
29
- elif isinstance(pl_type, pl.Struct):
30
- # Handle Struct with field definitions
31
- fields = []
32
- if hasattr(pl_type, 'fields'):
33
- for field in pl_type.fields:
34
- field_name = field.name
35
- field_type = convert_pl_type_to_string(field.dtype, inner=True)
36
- fields.append(f'pl.Field("{field_name}", {field_type})')
37
- field_str = ", ".join(fields)
38
- return f"pl.Struct([{field_str}])"
39
- else:
40
- # For base types, we want the full pl.TypeName format
41
- return str(pl_type.base_type()) if not inner else f"pl.{pl_type}"
42
11
 
43
12
 
44
13
  @dataclass
@@ -52,6 +21,7 @@ class FlowfileColumn:
52
21
  number_of_empty_values: int
53
22
  number_of_unique_values: int
54
23
  example_values: str
24
+ data_type_group: ReadableDataTypeGroup
55
25
  __sql_type: Optional[Any]
56
26
  __is_unique: Optional[bool]
57
27
  __nullable: Optional[bool]
@@ -75,6 +45,7 @@ class FlowfileColumn:
75
45
  self.__is_unique = None
76
46
  self.__sql_type = None
77
47
  self.__perc_unique = None
48
+ self.data_type_group = self.get_readable_datatype_group()
78
49
 
79
50
  def __repr__(self):
80
51
  """
@@ -220,6 +191,20 @@ class FlowfileColumn:
220
191
  return 'numeric'
221
192
  elif self.data_type in ('datetime', 'date', 'Date', 'Datetime', 'Time'):
222
193
  return 'date'
194
+ else:
195
+ return 'str'
196
+
197
+ def get_readable_datatype_group(self) -> ReadableDataTypeGroup:
198
+ if self.data_type in ('Utf8', 'VARCHAR', 'CHAR', 'NVARCHAR', 'String'):
199
+ return 'String'
200
+ elif self.data_type in ('fixed_decimal', 'decimal', 'float', 'integer', 'boolean', 'double', 'Int16', 'Int32',
201
+ 'Int64', 'Float32', 'Float64', 'Decimal', 'Binary', 'Boolean', 'Uint8', 'Uint16',
202
+ 'Uint32', 'Uint64'):
203
+ return 'Numeric'
204
+ elif self.data_type in ('datetime', 'date', 'Date', 'Datetime', 'Time'):
205
+ return 'Date'
206
+ else:
207
+ return 'Other'
223
208
 
224
209
  def get_polars_type(self) -> PlType:
225
210
  pl_datatype = cast_str_to_polars_type(self.data_type)
@@ -0,0 +1,36 @@
1
+
2
+ from typing import Type, Literal, List, Dict, Union, Tuple
3
+ import polars as pl
4
+ DataTypeGroup = Literal['numeric', 'string', 'datetime', 'boolean', 'binary', 'complex', 'unknown']
5
+
6
+
7
+ def convert_pl_type_to_string(pl_type: pl.DataType, inner: bool = False) -> str:
8
+ if isinstance(pl_type, pl.List):
9
+ inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
10
+ return f"pl.List({inner_str})"
11
+ elif isinstance(pl_type, pl.Array):
12
+ inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
13
+ return f"pl.Array({inner_str})"
14
+ elif isinstance(pl_type, pl.Decimal):
15
+ precision = pl_type.precision if hasattr(pl_type, 'precision') else None
16
+ scale = pl_type.scale if hasattr(pl_type, 'scale') else None
17
+ if precision is not None and scale is not None:
18
+ return f"pl.Decimal({precision}, {scale})"
19
+ elif precision is not None:
20
+ return f"pl.Decimal({precision})"
21
+ else:
22
+ return "pl.Decimal()"
23
+ elif isinstance(pl_type, pl.Struct):
24
+ # Handle Struct with field definitions
25
+ fields = []
26
+ if hasattr(pl_type, 'fields'):
27
+ for field in pl_type.fields:
28
+ field_name = field.name
29
+ field_type = convert_pl_type_to_string(field.dtype, inner=True)
30
+ fields.append(f'pl.Field("{field_name}", {field_type})')
31
+ field_str = ", ".join(fields)
32
+ return f"pl.Struct([{field_str}])"
33
+ else:
34
+ # For base types, we want the full pl.TypeName format
35
+ return str(pl_type.base_type()) if not inner else f"pl.{pl_type}"
36
+