Flowfile 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +27 -6
- flowfile/api.py +1 -0
- flowfile/web/__init__.py +2 -2
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
- flowfile/web/static/assets/CloudConnectionManager-c20a740f.js +783 -0
- flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
- flowfile/web/static/assets/CloudStorageReader-960b400a.js +437 -0
- flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
- flowfile/web/static/assets/CloudStorageWriter-e3decbdd.js +430 -0
- flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-d67e2405.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-9ea35e84.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-9578bfa5.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-19531098.js} +9 -9
- flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-40476474.js} +47141 -43697
- flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-2297ef96.js} +6 -6
- flowfile/web/static/assets/{Filter-f87bb897.js → Filter-f211c03a.js} +8 -8
- flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
- flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-4207ea31.js} +75 -9
- flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-bf120df0.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-5bb7497a.js} +5 -5
- flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-92c81b65.js} +6 -6
- flowfile/web/static/assets/{Join-eec38203.js → Join-4e49a274.js} +23 -15
- flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
- flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-90998ae8.js} +106 -34
- flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
- flowfile/web/static/assets/{Output-3b2ca045.js → Output-81e3e917.js} +4 -4
- flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-a3419842.js} +6 -6
- flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-72710deb.js} +6 -6
- flowfile/web/static/assets/{Read-07acdc9a.js → Read-c4059daf.js} +6 -6
- flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-c2b5e095.js} +5 -5
- flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-10baf191.js} +6 -6
- flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-3ed9a0ae.js} +5 -5
- flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-0d49c0e8.js} +2 -2
- flowfile/web/static/assets/{Select-32b28406.js → Select-8a02a0b3.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-4c0f45f5.js} +1 -1
- flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-f55c9f9d.js} +6 -6
- flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-5dbc2145.js} +8 -8
- flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-a1768e52.js} +2 -2
- flowfile/web/static/assets/{Union-39eecc6c.js → Union-f2aefdc9.js} +5 -5
- flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-46b250da.js} +8 -8
- flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-25ac84cc.js} +5 -5
- flowfile/web/static/assets/api-6ef0dcef.js +80 -0
- flowfile/web/static/assets/{api-44ca9e9c.js → api-a0abbdc7.js} +1 -1
- flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
- flowfile/web/static/assets/{designer-267d44f1.js → designer-13eabd83.js} +36 -34
- flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-b87e7f6f.js} +1 -1
- flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-13564764.js} +1 -1
- flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-fd2cd6f9.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-71e11604.js} +3 -3
- flowfile/web/static/assets/{index-e235a8bc.js → index-f6c15e76.js} +59 -22
- flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-988d9efe.js} +3 -3
- flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-dd636aa2.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-af36165e.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-2847001e.js} +2 -1
- flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-0371da73.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/METADATA +9 -4
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/RECORD +131 -124
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +2 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/auth/jwt.py +39 -0
- flowfile_core/configs/node_store/nodes.py +9 -6
- flowfile_core/configs/settings.py +6 -5
- flowfile_core/database/connection.py +63 -15
- flowfile_core/database/init_db.py +0 -1
- flowfile_core/database/models.py +49 -2
- flowfile_core/flowfile/code_generator/code_generator.py +472 -17
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
- flowfile_core/flowfile/extensions.py +1 -1
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1062 -311
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
- flowfile_core/flowfile/flow_graph.py +718 -253
- flowfile_core/flowfile/flow_graph_utils.py +2 -2
- flowfile_core/flowfile/flow_node/flow_node.py +563 -117
- flowfile_core/flowfile/flow_node/models.py +154 -20
- flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
- flowfile_core/flowfile/handler.py +2 -33
- flowfile_core/flowfile/manage/open_flowfile.py +1 -2
- flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
- flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
- flowfile_core/flowfile/util/calculate_layout.py +0 -2
- flowfile_core/flowfile/utils.py +35 -26
- flowfile_core/main.py +35 -15
- flowfile_core/routes/cloud_connections.py +77 -0
- flowfile_core/routes/logs.py +2 -7
- flowfile_core/routes/public.py +1 -0
- flowfile_core/routes/routes.py +130 -90
- flowfile_core/routes/secrets.py +72 -14
- flowfile_core/schemas/__init__.py +8 -0
- flowfile_core/schemas/cloud_storage_schemas.py +215 -0
- flowfile_core/schemas/input_schema.py +121 -71
- flowfile_core/schemas/output_model.py +19 -3
- flowfile_core/schemas/schemas.py +150 -12
- flowfile_core/schemas/transform_schema.py +175 -35
- flowfile_core/utils/utils.py +40 -1
- flowfile_core/utils/validate_setup.py +41 -0
- flowfile_frame/__init__.py +9 -1
- flowfile_frame/cloud_storage/frame_helpers.py +39 -0
- flowfile_frame/cloud_storage/secret_manager.py +73 -0
- flowfile_frame/expr.py +28 -1
- flowfile_frame/expr.pyi +76 -61
- flowfile_frame/flow_frame.py +481 -208
- flowfile_frame/flow_frame.pyi +140 -91
- flowfile_frame/flow_frame_methods.py +160 -22
- flowfile_frame/group_frame.py +3 -0
- flowfile_frame/utils.py +25 -3
- flowfile_worker/external_sources/s3_source/main.py +216 -0
- flowfile_worker/external_sources/s3_source/models.py +142 -0
- flowfile_worker/funcs.py +51 -6
- flowfile_worker/models.py +22 -2
- flowfile_worker/routes.py +40 -38
- flowfile_worker/utils.py +1 -1
- test_utils/s3/commands.py +46 -0
- test_utils/s3/data_generator.py +292 -0
- test_utils/s3/demo_data_generator.py +186 -0
- test_utils/s3/fixtures.py +214 -0
- flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
- flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
- flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
- flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
- flowfile_core/schemas/defaults.py +0 -9
- flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
- flowfile_core/schemas/models.py +0 -193
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
- flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
- flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
- {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_frame/cloud_storage}/__init__.py +0 -0
- {flowfile_core/schemas/external_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
- {flowfile_worker/external_sources/airbyte_sources → test_utils/s3}/__init__.py +0 -0
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from flowfile_core.schemas.input_schema import FullDatabaseConnection, FullDatabaseConnectionInterface
|
|
2
|
+
from flowfile_core.schemas.cloud_storage_schemas import FullCloudStorageConnection, FullCloudStorageConnectionInterface
|
|
2
3
|
from sqlalchemy.orm import Session
|
|
3
|
-
from flowfile_core.database.models import DatabaseConnection as DBConnectionModel, Secret
|
|
4
|
+
from flowfile_core.database.models import (DatabaseConnection as DBConnectionModel, Secret,
|
|
5
|
+
CloudStorageConnection as DBCloudStorageConnection)
|
|
4
6
|
from flowfile_core.secret_manager.secret_manager import store_secret, SecretInput, decrypt_secret
|
|
5
7
|
from flowfile_core.database.connection import get_db_context
|
|
6
8
|
|
|
@@ -53,6 +55,18 @@ def get_database_connection(db: Session, connection_name: str, user_id: int) ->
|
|
|
53
55
|
return db_connection
|
|
54
56
|
|
|
55
57
|
|
|
58
|
+
def get_cloud_connection(db: Session, connection_name: str, user_id: int) -> DBCloudStorageConnection | None:
|
|
59
|
+
"""
|
|
60
|
+
Get a cloud storage connection by its name and user ID.
|
|
61
|
+
"""
|
|
62
|
+
db_connection = db.query(DBCloudStorageConnection).filter(
|
|
63
|
+
DBCloudStorageConnection.connection_name == connection_name,
|
|
64
|
+
DBCloudStorageConnection.user_id == user_id
|
|
65
|
+
).first()
|
|
66
|
+
|
|
67
|
+
return db_connection
|
|
68
|
+
|
|
69
|
+
|
|
56
70
|
def get_database_connection_schema(db: Session, connection_name: str, user_id: int) -> FullDatabaseConnection | None:
|
|
57
71
|
"""
|
|
58
72
|
Get a database connection schema by its name and user ID.
|
|
@@ -84,6 +98,20 @@ def get_local_database_connection(connection_name: str, user_id: int) -> FullDat
|
|
|
84
98
|
return get_database_connection_schema(db, connection_name, user_id)
|
|
85
99
|
|
|
86
100
|
|
|
101
|
+
def get_local_cloud_connection(connection_name: str, user_id: int) -> FullCloudStorageConnection | None:
|
|
102
|
+
"""
|
|
103
|
+
Get a cloud storage connection schema by its name and user ID.
|
|
104
|
+
Args:
|
|
105
|
+
connection_name (str): The name of the cloud storage connection.
|
|
106
|
+
user_id (int): The ID of the user who owns the connection.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
FullCloudStorageConnection | None: The cloud storage connection schema if found, otherwise None.
|
|
110
|
+
"""
|
|
111
|
+
with get_db_context() as db:
|
|
112
|
+
return get_cloud_connection_schema(db, connection_name, user_id)
|
|
113
|
+
|
|
114
|
+
|
|
87
115
|
def delete_database_connection(db: Session, connection_name: str, user_id: int) -> None:
|
|
88
116
|
"""
|
|
89
117
|
Delete a database connection by its name and user ID.
|
|
@@ -102,7 +130,8 @@ def delete_database_connection(db: Session, connection_name: str, user_id: int)
|
|
|
102
130
|
db.commit()
|
|
103
131
|
|
|
104
132
|
|
|
105
|
-
def database_connection_interface_from_db_connection(
|
|
133
|
+
def database_connection_interface_from_db_connection(
|
|
134
|
+
db_connection: DBConnectionModel) -> FullDatabaseConnectionInterface:
|
|
106
135
|
"""
|
|
107
136
|
Convert a database connection from the database model to the interface model.
|
|
108
137
|
"""
|
|
@@ -137,3 +166,188 @@ def get_all_database_connections_interface(db: Session, user_id: int) -> list[Fu
|
|
|
137
166
|
raise TypeError(f"Expected a DBConnectionModel instance, got {type(db_connection)}")
|
|
138
167
|
|
|
139
168
|
return result
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def store_cloud_connection(db: Session, connection: FullCloudStorageConnection, user_id: int) -> DBCloudStorageConnection:
|
|
172
|
+
"""
|
|
173
|
+
Placeholder function to store a cloud database connection.
|
|
174
|
+
This function should be implemented based on specific cloud provider requirements.
|
|
175
|
+
"""
|
|
176
|
+
existing_database_connection = get_cloud_connection(db, connection.connection_name, user_id)
|
|
177
|
+
if existing_database_connection:
|
|
178
|
+
raise ValueError(
|
|
179
|
+
f"Database connection with name '{connection.connection_name}' already exists for user {user_id}."
|
|
180
|
+
f" Please use a unique connection name or delete the existing connection first."
|
|
181
|
+
)
|
|
182
|
+
if connection.aws_secret_access_key is not None:
|
|
183
|
+
aws_secret_access_key_ref_id = store_secret(db,
|
|
184
|
+
SecretInput(name=connection.connection_name + "_aws_secret_access_key",
|
|
185
|
+
value=connection.aws_secret_access_key), user_id).id
|
|
186
|
+
else:
|
|
187
|
+
aws_secret_access_key_ref_id = None
|
|
188
|
+
if connection.azure_client_secret is not None:
|
|
189
|
+
azure_client_secret_ref_id = store_secret(db,
|
|
190
|
+
SecretInput(name=connection.connection_name + "azure_client_secret",
|
|
191
|
+
value=connection.azure_client_secret), user_id).id
|
|
192
|
+
else:
|
|
193
|
+
azure_client_secret_ref_id = None
|
|
194
|
+
if connection.azure_account_key is not None:
|
|
195
|
+
azure_account_key_ref_id = store_secret(db, SecretInput(name=connection.connection_name + "azure_account_key",
|
|
196
|
+
value=connection.azure_account_key), user_id).id
|
|
197
|
+
else:
|
|
198
|
+
azure_account_key_ref_id = None
|
|
199
|
+
|
|
200
|
+
db_cloud_connection = DBCloudStorageConnection(
|
|
201
|
+
connection_name=connection.connection_name,
|
|
202
|
+
storage_type=connection.storage_type,
|
|
203
|
+
auth_method=connection.auth_method,
|
|
204
|
+
user_id=user_id,
|
|
205
|
+
|
|
206
|
+
# AWS S3 fields
|
|
207
|
+
aws_region=connection.aws_region,
|
|
208
|
+
aws_access_key_id=connection.aws_access_key_id,
|
|
209
|
+
aws_role_arn=connection.aws_role_arn,
|
|
210
|
+
aws_secret_access_key_id=aws_secret_access_key_ref_id,
|
|
211
|
+
aws_allow_unsafe_html=connection.aws_allow_unsafe_html,
|
|
212
|
+
|
|
213
|
+
# Azure ADLS fields
|
|
214
|
+
azure_account_name=connection.azure_account_name,
|
|
215
|
+
azure_tenant_id=connection.azure_tenant_id,
|
|
216
|
+
azure_client_id=connection.azure_client_id,
|
|
217
|
+
azure_account_key_id=azure_account_key_ref_id,
|
|
218
|
+
azure_client_secret_id=azure_client_secret_ref_id,
|
|
219
|
+
|
|
220
|
+
# Common fields
|
|
221
|
+
endpoint_url=connection.endpoint_url,
|
|
222
|
+
verify_ssl=connection.verify_ssl
|
|
223
|
+
)
|
|
224
|
+
db.add(db_cloud_connection)
|
|
225
|
+
db.commit()
|
|
226
|
+
db.refresh(db_cloud_connection)
|
|
227
|
+
return db_cloud_connection
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def get_full_cloud_storage_interface_from_db(
|
|
231
|
+
db_cloud_connection: DBCloudStorageConnection) -> FullCloudStorageConnectionInterface:
|
|
232
|
+
"""
|
|
233
|
+
Convert a cloud storage connection from the database model to the interface model.
|
|
234
|
+
"""
|
|
235
|
+
return FullCloudStorageConnectionInterface(
|
|
236
|
+
connection_name=db_cloud_connection.connection_name,
|
|
237
|
+
storage_type=db_cloud_connection.storage_type,
|
|
238
|
+
auth_method=db_cloud_connection.auth_method,
|
|
239
|
+
aws_allow_unsafe_html=db_cloud_connection.aws_allow_unsafe_html,
|
|
240
|
+
aws_region=db_cloud_connection.aws_region,
|
|
241
|
+
aws_access_key_id=db_cloud_connection.aws_access_key_id,
|
|
242
|
+
aws_role_arn=db_cloud_connection.aws_role_arn,
|
|
243
|
+
azure_account_name=db_cloud_connection.azure_account_name,
|
|
244
|
+
azure_tenant_id=db_cloud_connection.azure_tenant_id,
|
|
245
|
+
azure_client_id=db_cloud_connection.azure_client_id,
|
|
246
|
+
endpoint_url=db_cloud_connection.endpoint_url,
|
|
247
|
+
verify_ssl=db_cloud_connection.verify_ssl
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def get_cloud_connection_schema(db: Session, connection_name: str, user_id: int) -> FullCloudStorageConnection | None:
|
|
252
|
+
"""
|
|
253
|
+
Retrieves a full cloud storage connection schema, including decrypted secrets, by its name and user ID.
|
|
254
|
+
"""
|
|
255
|
+
db_connection = get_cloud_connection(db, connection_name, user_id)
|
|
256
|
+
if not db_connection:
|
|
257
|
+
return None
|
|
258
|
+
|
|
259
|
+
# Decrypt secrets associated with the connection
|
|
260
|
+
aws_secret_key = None
|
|
261
|
+
if db_connection.aws_secret_access_key_id:
|
|
262
|
+
secret_record = db.query(Secret).filter(Secret.id == db_connection.aws_secret_access_key_id).first()
|
|
263
|
+
if secret_record:
|
|
264
|
+
aws_secret_key = decrypt_secret(secret_record.encrypted_value)
|
|
265
|
+
|
|
266
|
+
azure_account_key = None
|
|
267
|
+
if db_connection.azure_account_key_id:
|
|
268
|
+
secret_record = db.query(Secret).filter(Secret.id == db_connection.azure_account_key_id).first()
|
|
269
|
+
if secret_record:
|
|
270
|
+
azure_account_key = decrypt_secret(secret_record.encrypted_value)
|
|
271
|
+
|
|
272
|
+
azure_client_secret = None
|
|
273
|
+
if db_connection.azure_client_secret_id:
|
|
274
|
+
secret_record = db.query(Secret).filter(Secret.id == db_connection.azure_client_secret_id).first()
|
|
275
|
+
if secret_record:
|
|
276
|
+
azure_client_secret = decrypt_secret(secret_record.encrypted_value)
|
|
277
|
+
|
|
278
|
+
# Construct the full Pydantic model
|
|
279
|
+
return FullCloudStorageConnection(
|
|
280
|
+
connection_name=db_connection.connection_name,
|
|
281
|
+
storage_type=db_connection.storage_type,
|
|
282
|
+
auth_method=db_connection.auth_method,
|
|
283
|
+
aws_allow_unsafe_html=db_connection.aws_allow_unsafe_html,
|
|
284
|
+
aws_region=db_connection.aws_region,
|
|
285
|
+
aws_access_key_id=db_connection.aws_access_key_id,
|
|
286
|
+
aws_secret_access_key=aws_secret_key,
|
|
287
|
+
aws_role_arn=db_connection.aws_role_arn,
|
|
288
|
+
azure_account_name=db_connection.azure_account_name,
|
|
289
|
+
azure_account_key=azure_account_key,
|
|
290
|
+
azure_tenant_id=db_connection.azure_tenant_id,
|
|
291
|
+
azure_client_id=db_connection.azure_client_id,
|
|
292
|
+
azure_client_secret=azure_client_secret,
|
|
293
|
+
endpoint_url=db_connection.endpoint_url,
|
|
294
|
+
verify_ssl=db_connection.verify_ssl
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def cloud_connection_interface_from_db_connection(
|
|
299
|
+
db_connection: DBCloudStorageConnection) -> FullCloudStorageConnectionInterface:
|
|
300
|
+
"""
|
|
301
|
+
Converts a DBCloudStorageConnection model to a FullCloudStorageConnectionInterface model,
|
|
302
|
+
which safely exposes non-sensitive data.
|
|
303
|
+
"""
|
|
304
|
+
return FullCloudStorageConnectionInterface(
|
|
305
|
+
connection_name=db_connection.connection_name,
|
|
306
|
+
storage_type=db_connection.storage_type,
|
|
307
|
+
auth_method=db_connection.auth_method,
|
|
308
|
+
aws_allow_unsafe_html=db_connection.aws_allow_unsafe_html,
|
|
309
|
+
aws_region=db_connection.aws_region,
|
|
310
|
+
aws_access_key_id=db_connection.aws_access_key_id,
|
|
311
|
+
aws_role_arn=db_connection.aws_role_arn,
|
|
312
|
+
azure_account_name=db_connection.azure_account_name,
|
|
313
|
+
azure_tenant_id=db_connection.azure_tenant_id,
|
|
314
|
+
azure_client_id=db_connection.azure_client_id,
|
|
315
|
+
endpoint_url=db_connection.endpoint_url,
|
|
316
|
+
verify_ssl=db_connection.verify_ssl
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def get_all_cloud_connections_interface(db: Session, user_id: int) -> list[FullCloudStorageConnectionInterface]:
|
|
321
|
+
"""
|
|
322
|
+
Retrieves a list of all cloud storage connections for a user in a safe interface format (no secrets).
|
|
323
|
+
"""
|
|
324
|
+
db_connections = db.query(DBCloudStorageConnection).filter(DBCloudStorageConnection.user_id == user_id).all()
|
|
325
|
+
|
|
326
|
+
return [cloud_connection_interface_from_db_connection(conn) for conn in db_connections]
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def delete_cloud_connection(db: Session, connection_name: str, user_id: int) -> None:
|
|
330
|
+
"""
|
|
331
|
+
Deletes a cloud storage connection and all of its associated secrets from the database.
|
|
332
|
+
"""
|
|
333
|
+
db_connection = get_cloud_connection(db, connection_name, user_id)
|
|
334
|
+
|
|
335
|
+
if db_connection:
|
|
336
|
+
# Collect all secret IDs associated with this connection
|
|
337
|
+
secret_ids_to_delete = [
|
|
338
|
+
db_connection.aws_secret_access_key_id,
|
|
339
|
+
db_connection.aws_session_token_id,
|
|
340
|
+
db_connection.azure_account_key_id,
|
|
341
|
+
db_connection.azure_client_secret_id,
|
|
342
|
+
db_connection.azure_sas_token_id
|
|
343
|
+
]
|
|
344
|
+
# Filter out None values
|
|
345
|
+
secret_ids_to_delete = [id for id in secret_ids_to_delete if id is not None]
|
|
346
|
+
|
|
347
|
+
# Delete associated secrets if they exist
|
|
348
|
+
if secret_ids_to_delete:
|
|
349
|
+
db.query(Secret).filter(Secret.id.in_(secret_ids_to_delete)).delete(synchronize_session=False)
|
|
350
|
+
|
|
351
|
+
# Delete the connection record itself
|
|
352
|
+
db.delete(db_connection)
|
|
353
|
+
db.commit()
|
|
@@ -17,7 +17,7 @@ def get_instant_func_results(node_step: FlowNode, func_string: str) -> InstantFu
|
|
|
17
17
|
return InstantFuncResult(result='No input data connected, so cannot evaluate the result', success=None)
|
|
18
18
|
node_input = node_step.main_input[0]
|
|
19
19
|
try:
|
|
20
|
-
if node_input.node_stats.
|
|
20
|
+
if node_input.node_stats.has_run_with_current_setup and node_input.is_setup and node_input.results.example_data_path:
|
|
21
21
|
df = get_first_row(node_input.results.example_data_path)
|
|
22
22
|
else:
|
|
23
23
|
df = node_input.get_predicted_resulting_data().data_frame.collect()
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import boto3
|
|
2
|
+
from botocore.exceptions import ClientError
|
|
3
|
+
from typing import Optional, Dict, Any, Callable, Literal
|
|
4
|
+
|
|
5
|
+
from flowfile_core.schemas.cloud_storage_schemas import FullCloudStorageConnection
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def create_storage_options_from_boto_credentials(profile_name: Optional[str],
|
|
9
|
+
region_name: Optional[str] = None) -> Dict[str, Any]:
|
|
10
|
+
"""
|
|
11
|
+
Create a storage options dictionary from AWS credentials using a boto3 profile.
|
|
12
|
+
This is the most robust way to handle profile-based authentication as it
|
|
13
|
+
bypasses Polars' internal credential provider chain, avoiding conflicts.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
profile_name
|
|
18
|
+
The name of the AWS profile in ~/.aws/credentials.
|
|
19
|
+
region_name
|
|
20
|
+
The AWS region to use.
|
|
21
|
+
|
|
22
|
+
Returns
|
|
23
|
+
-------
|
|
24
|
+
Dict[str, Any]
|
|
25
|
+
A storage options dictionary for Polars with explicit credentials.
|
|
26
|
+
"""
|
|
27
|
+
session = boto3.Session(profile_name=profile_name, region_name=region_name)
|
|
28
|
+
credentials = session.get_credentials()
|
|
29
|
+
frozen_creds = credentials.get_frozen_credentials()
|
|
30
|
+
|
|
31
|
+
storage_options = {
|
|
32
|
+
"aws_access_key_id": frozen_creds.access_key,
|
|
33
|
+
"aws_secret_access_key": frozen_creds.secret_key,
|
|
34
|
+
"aws_session_token": frozen_creds.token,
|
|
35
|
+
}
|
|
36
|
+
# Use the session's region if one was resolved, otherwise use the provided one
|
|
37
|
+
if session.region_name:
|
|
38
|
+
storage_options["aws_region"] = session.region_name
|
|
39
|
+
|
|
40
|
+
print("Boto3: Successfully created storage options with explicit credentials.")
|
|
41
|
+
return storage_options
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CloudStorageReader:
|
|
45
|
+
"""Helper class to handle different cloud storage authentication methods and read operations."""
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def get_storage_options(connection: FullCloudStorageConnection) -> Dict[str, Any]:
|
|
49
|
+
"""
|
|
50
|
+
Build storage options dict based on the connection type and auth method.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
connection: Full connection details with decrypted secrets
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Dict containing appropriate storage options for the provider
|
|
57
|
+
"""
|
|
58
|
+
if connection.storage_type == "s3":
|
|
59
|
+
return CloudStorageReader._get_s3_storage_options(connection)
|
|
60
|
+
elif connection.storage_type == "adls":
|
|
61
|
+
return CloudStorageReader._get_adls_storage_options(connection)
|
|
62
|
+
elif connection.storage_type == "gcs":
|
|
63
|
+
return CloudStorageReader._get_gcs_storage_options(connection)
|
|
64
|
+
else:
|
|
65
|
+
raise ValueError(f"Unsupported storage type: {connection.storage_type}")
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def _get_s3_storage_options(connection: 'FullCloudStorageConnection') -> Dict[str, Any]:
|
|
69
|
+
"""Build S3-specific storage options."""
|
|
70
|
+
auth_method = connection.auth_method
|
|
71
|
+
print(f"Building S3 storage options for auth_method: '{auth_method}'")
|
|
72
|
+
if auth_method == "aws-cli":
|
|
73
|
+
return create_storage_options_from_boto_credentials(
|
|
74
|
+
profile_name=connection.connection_name,
|
|
75
|
+
region_name=connection.aws_region
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
storage_options = {}
|
|
79
|
+
if connection.aws_region:
|
|
80
|
+
storage_options["aws_region"] = connection.aws_region
|
|
81
|
+
if connection.endpoint_url:
|
|
82
|
+
storage_options["endpoint_url"] = connection.endpoint_url
|
|
83
|
+
if not connection.verify_ssl:
|
|
84
|
+
storage_options["verify"] = "False"
|
|
85
|
+
if connection.aws_allow_unsafe_html: # Note: Polars uses aws_allow_http
|
|
86
|
+
storage_options["aws_allow_http"] = "true"
|
|
87
|
+
|
|
88
|
+
if auth_method == "access_key":
|
|
89
|
+
storage_options["aws_access_key_id"] = connection.aws_access_key_id
|
|
90
|
+
storage_options["aws_secret_access_key"] = connection.aws_secret_access_key.get_secret_value()
|
|
91
|
+
# Explicitly clear any session token from the environment
|
|
92
|
+
storage_options["aws_session_token"] = ""
|
|
93
|
+
|
|
94
|
+
elif auth_method == "iam_role":
|
|
95
|
+
# Correctly implement IAM role assumption using boto3 STS client.
|
|
96
|
+
sts_client = boto3.client('sts', region_name=connection.aws_region)
|
|
97
|
+
assumed_role_object = sts_client.assume_role(
|
|
98
|
+
RoleArn=connection.aws_role_arn,
|
|
99
|
+
RoleSessionName="PolarsCloudStorageReaderSession" # A descriptive session name
|
|
100
|
+
)
|
|
101
|
+
credentials = assumed_role_object['Credentials']
|
|
102
|
+
storage_options["aws_access_key_id"] = credentials['AccessKeyId']
|
|
103
|
+
storage_options["aws_secret_access_key"] = credentials['SecretAccessKey']
|
|
104
|
+
storage_options["aws_session_token"] = credentials['SessionToken']
|
|
105
|
+
|
|
106
|
+
return storage_options
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def _get_adls_storage_options(connection: 'FullCloudStorageConnection') -> Dict[str, Any]:
|
|
110
|
+
"""Build Azure ADLS-specific storage options."""
|
|
111
|
+
storage_options = {}
|
|
112
|
+
|
|
113
|
+
if connection.auth_method == "access_key":
|
|
114
|
+
# Account key authentication
|
|
115
|
+
if connection.azure_account_name:
|
|
116
|
+
storage_options["account_name"] = connection.azure_account_name
|
|
117
|
+
if connection.azure_account_key:
|
|
118
|
+
storage_options["account_key"] = connection.azure_account_key.get_secret_value()
|
|
119
|
+
|
|
120
|
+
elif connection.auth_method == "service_principal":
|
|
121
|
+
# Service principal authentication
|
|
122
|
+
if connection.azure_tenant_id:
|
|
123
|
+
storage_options["tenant_id"] = connection.azure_tenant_id
|
|
124
|
+
if connection.azure_client_id:
|
|
125
|
+
storage_options["client_id"] = connection.azure_client_id
|
|
126
|
+
if connection.azure_client_secret:
|
|
127
|
+
storage_options["client_secret"] = connection.azure_client_secret.get_secret_value()
|
|
128
|
+
|
|
129
|
+
elif connection.auth_method == "sas_token":
|
|
130
|
+
# SAS token authentication
|
|
131
|
+
if connection.azure_sas_token:
|
|
132
|
+
storage_options["sas_token"] = connection.azure_sas_token.get_secret_value()
|
|
133
|
+
|
|
134
|
+
return storage_options
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def _get_gcs_storage_options(connection: 'FullCloudStorageConnection') -> Dict[str, Any]:
|
|
138
|
+
"""Build GCS-specific storage options."""
|
|
139
|
+
# GCS typically uses service account authentication
|
|
140
|
+
# Implementation would depend on how credentials are stored
|
|
141
|
+
return {}
|
|
142
|
+
|
|
143
|
+
@staticmethod
|
|
144
|
+
def get_credential_provider(connection: 'FullCloudStorageConnection') -> Optional[Callable]:
|
|
145
|
+
"""
|
|
146
|
+
Get a credential provider function if needed for the authentication method.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
connection: Full connection details
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Credential provider function or None
|
|
153
|
+
"""
|
|
154
|
+
if connection.storage_type == "s3" and connection.auth_method == "iam_role":
|
|
155
|
+
# For IAM role, create a credential provider
|
|
156
|
+
def aws_credential_provider():
|
|
157
|
+
# This would typically use boto3 to assume the role
|
|
158
|
+
# For now, returning a placeholder
|
|
159
|
+
return {
|
|
160
|
+
"aws_access_key_id": "...",
|
|
161
|
+
"aws_secret_access_key": "...",
|
|
162
|
+
"aws_session_token": "...",
|
|
163
|
+
}, None # expiry
|
|
164
|
+
|
|
165
|
+
return aws_credential_provider
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def get_first_file_from_s3_dir(source: str, storage_options: Dict[str, Any] = None) -> str:
|
|
170
|
+
"""
|
|
171
|
+
Get the first parquet file from an S3 directory path.
|
|
172
|
+
|
|
173
|
+
Parameters
|
|
174
|
+
----------
|
|
175
|
+
source : str
|
|
176
|
+
S3 path with wildcards (e.g., 's3://bucket/prefix/**/*/*.parquet')
|
|
177
|
+
|
|
178
|
+
storage_options: FullCloudStorageConnection
|
|
179
|
+
|
|
180
|
+
Returns
|
|
181
|
+
-------
|
|
182
|
+
str
|
|
183
|
+
S3 URI of the first parquet file found
|
|
184
|
+
|
|
185
|
+
Raises
|
|
186
|
+
------
|
|
187
|
+
ValueError
|
|
188
|
+
If source path is invalid or no parquet files found
|
|
189
|
+
ClientError
|
|
190
|
+
If S3 access fails
|
|
191
|
+
"""
|
|
192
|
+
if not source.startswith('s3://'):
|
|
193
|
+
raise ValueError("Source must be a valid S3 URI starting with 's3://'")
|
|
194
|
+
bucket_name, prefix = _parse_s3_path(source)
|
|
195
|
+
file_extension = _get_file_extension(source)
|
|
196
|
+
base_prefix = _remove_wildcards_from_prefix(prefix)
|
|
197
|
+
s3_client = _create_s3_client(storage_options)
|
|
198
|
+
|
|
199
|
+
# Get parquet files
|
|
200
|
+
first_file = _get_first_file(s3_client, bucket_name, base_prefix, file_extension)
|
|
201
|
+
|
|
202
|
+
# Return first file URI
|
|
203
|
+
return f"s3://{bucket_name}/{first_file['Key']}"
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _get_file_extension(source: str) -> str:
|
|
207
|
+
parts = source.split(".")
|
|
208
|
+
if len(parts) == 1:
|
|
209
|
+
raise ValueError("Source path does not contain a file extension")
|
|
210
|
+
return parts[-1].lower()
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _parse_s3_path(source: str) -> tuple[str, str]:
|
|
214
|
+
"""Parse S3 URI into bucket name and prefix."""
|
|
215
|
+
path_parts = source[5:].split('/', 1) # Remove 's3://'
|
|
216
|
+
bucket_name = path_parts[0]
|
|
217
|
+
prefix = path_parts[1] if len(path_parts) > 1 else ''
|
|
218
|
+
return bucket_name, prefix
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _remove_wildcards_from_prefix(prefix: str) -> str:
|
|
222
|
+
"""Remove wildcard patterns from S3 prefix."""
|
|
223
|
+
return prefix.split('*')[0]
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _create_s3_client(storage_options: Optional[Dict[str, Any]]):
|
|
227
|
+
"""Create boto3 S3 client with optional credentials."""
|
|
228
|
+
if storage_options is None:
|
|
229
|
+
return boto3.client('s3')
|
|
230
|
+
|
|
231
|
+
# Handle both 'aws_region' and 'region_name' keys
|
|
232
|
+
client_options = storage_options.copy()
|
|
233
|
+
if 'aws_region' in client_options:
|
|
234
|
+
client_options['region_name'] = client_options.pop('aws_region')
|
|
235
|
+
|
|
236
|
+
return boto3.client('s3', **{k: v for k, v in client_options.items() if k != "aws_allow_http"})
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _get_first_file(s3_client, bucket_name: str, base_prefix: str, file_extension: str) -> Dict[Any, Any]:
|
|
240
|
+
"""List all parquet files in S3 bucket with given prefix."""
|
|
241
|
+
try:
|
|
242
|
+
paginator = s3_client.get_paginator('list_objects_v2')
|
|
243
|
+
pages = paginator.paginate(Bucket=bucket_name, Prefix=base_prefix)
|
|
244
|
+
for page in pages:
|
|
245
|
+
if 'Contents' in page:
|
|
246
|
+
for obj in page['Contents']:
|
|
247
|
+
if obj['Key'].endswith(f".{file_extension}"):
|
|
248
|
+
return obj
|
|
249
|
+
else:
|
|
250
|
+
raise ValueError(f"No objects found in s3://{bucket_name}/{base_prefix}")
|
|
251
|
+
raise ValueError(f"No {file_extension} files found in s3://{bucket_name}/{base_prefix}")
|
|
252
|
+
except ClientError as e:
|
|
253
|
+
raise ValueError(f"Failed to list files in s3://{bucket_name}/{base_prefix}: {e}")
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def ensure_path_has_wildcard_pattern(resource_path: str, file_format: Literal["csv", "parquet", "json"]):
|
|
257
|
+
if not resource_path.endswith(f"*.{file_format}"):
|
|
258
|
+
resource_path = resource_path.rstrip("/") + f"/**/*.{file_format}"
|
|
259
|
+
return resource_path
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import polars as pl
|
|
2
2
|
import os
|
|
3
|
-
|
|
4
3
|
from flowfile_core.schemas import input_schema
|
|
5
4
|
from flowfile_core.flowfile.flow_data_engine.sample_data import create_fake_data
|
|
6
5
|
from flowfile_core.flowfile.flow_data_engine.read_excel_tables import df_from_openpyxl, df_from_calamine_xlsx
|
|
6
|
+
from polars._typing import CsvEncoding
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def create_from_json(received_table: input_schema.ReceivedCsvTable):
|
|
@@ -49,11 +49,21 @@ def create_from_json(received_table: input_schema.ReceivedCsvTable):
|
|
|
49
49
|
return data
|
|
50
50
|
|
|
51
51
|
|
|
52
|
-
def
|
|
52
|
+
def standardize_utf8_encoding(non_standardized_encoding: str) -> CsvEncoding:
|
|
53
|
+
if non_standardized_encoding.upper() in ('UTF-8', 'UTF8'):
|
|
54
|
+
return 'utf8'
|
|
55
|
+
elif non_standardized_encoding.upper() in ('UTF-8-LOSSY', 'UTF8-LOSSY'):
|
|
56
|
+
return 'utf8-lossy'
|
|
57
|
+
else:
|
|
58
|
+
raise ValueError(f"Encoding {non_standardized_encoding} is not supported.")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def create_from_path_csv(received_table: input_schema.ReceivedCsvTable) -> pl.LazyFrame:
|
|
53
62
|
f = received_table.abs_file_path
|
|
54
63
|
gbs_to_load = os.path.getsize(f) / 1024 / 1000 / 1000
|
|
55
64
|
low_mem = gbs_to_load > 10
|
|
56
|
-
if received_table.encoding.upper()
|
|
65
|
+
if received_table.encoding.upper() in ("UTF-8", "UTF8", 'UTF8-LOSSY', 'UTF-8-LOSSY'):
|
|
66
|
+
encoding: CsvEncoding = standardize_utf8_encoding(received_table.encoding)
|
|
57
67
|
try:
|
|
58
68
|
data = pl.scan_csv(f,
|
|
59
69
|
low_memory=low_mem,
|
|
@@ -61,11 +71,12 @@ def create_from_path_csv(received_table: input_schema.ReceivedCsvTable) -> pl.Da
|
|
|
61
71
|
separator=received_table.delimiter,
|
|
62
72
|
has_header=received_table.has_headers,
|
|
63
73
|
skip_rows=received_table.starting_from_line,
|
|
64
|
-
encoding=
|
|
74
|
+
encoding=encoding,
|
|
65
75
|
infer_schema_length=received_table.infer_schema_length)
|
|
66
76
|
data.head(1).collect()
|
|
67
77
|
return data
|
|
68
78
|
except:
|
|
79
|
+
|
|
69
80
|
try:
|
|
70
81
|
data = pl.scan_csv(f, low_memory=low_mem,
|
|
71
82
|
separator=received_table.delimiter,
|
|
@@ -75,11 +86,11 @@ def create_from_path_csv(received_table: input_schema.ReceivedCsvTable) -> pl.Da
|
|
|
75
86
|
ignore_errors=True)
|
|
76
87
|
return data
|
|
77
88
|
except:
|
|
78
|
-
data = pl.scan_csv(f, low_memory=
|
|
89
|
+
data = pl.scan_csv(f, low_memory=False,
|
|
79
90
|
separator=received_table.delimiter,
|
|
80
91
|
has_header=received_table.has_headers,
|
|
81
92
|
skip_rows=received_table.starting_from_line,
|
|
82
|
-
encoding=
|
|
93
|
+
encoding=encoding,
|
|
83
94
|
ignore_errors=True)
|
|
84
95
|
return data
|
|
85
96
|
else:
|
|
@@ -90,14 +101,14 @@ def create_from_path_csv(received_table: input_schema.ReceivedCsvTable) -> pl.Da
|
|
|
90
101
|
skip_rows=received_table.starting_from_line,
|
|
91
102
|
encoding=received_table.encoding,
|
|
92
103
|
ignore_errors=True, batch_size=2).next_batches(1)
|
|
93
|
-
return data[0]
|
|
104
|
+
return data[0].lazy()
|
|
94
105
|
|
|
95
106
|
|
|
96
107
|
def create_random(number_of_records: int = 1000) -> pl.LazyFrame:
|
|
97
108
|
return create_fake_data(number_of_records).lazy()
|
|
98
109
|
|
|
99
110
|
|
|
100
|
-
def create_from_path_parquet(received_table: input_schema.ReceivedParquetTable):
|
|
111
|
+
def create_from_path_parquet(received_table: input_schema.ReceivedParquetTable) -> pl.LazyFrame:
|
|
101
112
|
low_mem = (os.path.getsize(received_table.abs_file_path) / 1024 / 1000 / 1000) > 2
|
|
102
113
|
return pl.scan_parquet(source=received_table.abs_file_path, low_memory=low_mem)
|
|
103
114
|
|