Flowfile 0.3.7__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +1 -1
- flowfile/api.py +4 -2
- flowfile/web/__init__.py +2 -0
- {flowfile-0.3.7.dist-info → flowfile-0.3.8.dist-info}/METADATA +1 -1
- {flowfile-0.3.7.dist-info → flowfile-0.3.8.dist-info}/RECORD +19 -19
- flowfile_core/configs/utils.py +5 -0
- flowfile_core/database/connection.py +1 -3
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +0 -1
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1 -1
- flowfile_core/flowfile/flow_graph.py +4 -4
- flowfile_core/flowfile/flow_node/flow_node.py +16 -6
- flowfile_core/utils/arrow_reader.py +8 -3
- flowfile_core/utils/validate_setup.py +0 -2
- flowfile_frame/expr.py +14 -0
- flowfile_frame/flow_frame.py +1 -1
- flowfile_frame/flow_frame.pyi +3 -6
- {flowfile-0.3.7.dist-info → flowfile-0.3.8.dist-info}/LICENSE +0 -0
- {flowfile-0.3.7.dist-info → flowfile-0.3.8.dist-info}/WHEEL +0 -0
- {flowfile-0.3.7.dist-info → flowfile-0.3.8.dist-info}/entry_points.txt +0 -0
flowfile/__init__.py
CHANGED
flowfile/api.py
CHANGED
|
@@ -206,7 +206,7 @@ def check_if_in_single_mode() -> bool:
|
|
|
206
206
|
try:
|
|
207
207
|
response: requests.Response = requests.get(f"{FLOWFILE_BASE_URL}/single_mode", timeout=1)
|
|
208
208
|
if response.ok:
|
|
209
|
-
return response.json()
|
|
209
|
+
return response.json()
|
|
210
210
|
except Exception:
|
|
211
211
|
pass
|
|
212
212
|
return False
|
|
@@ -400,6 +400,8 @@ def _open_flow_in_browser(flow_id: int) -> None:
|
|
|
400
400
|
logger.info(f"Unified mode detected. Opening imported flow in browser: {flow_url}")
|
|
401
401
|
try:
|
|
402
402
|
time.sleep(0.5)
|
|
403
|
+
logger.info("Attempting to open browser tab for flow...")
|
|
404
|
+
logger.info("Opening URL in browser: %s", flow_url)
|
|
403
405
|
webbrowser.open_new_tab(flow_url)
|
|
404
406
|
except Exception as wb_err:
|
|
405
407
|
logger.warning(f"Could not automatically open browser tab: {wb_err}")
|
|
@@ -452,7 +454,7 @@ def open_graph_in_editor(flow_graph: FlowGraph, storage_location: Optional[str]
|
|
|
452
454
|
return False
|
|
453
455
|
|
|
454
456
|
flow_id = import_flow_to_editor(flow_file_path, auth_token)
|
|
455
|
-
|
|
457
|
+
print(flow_id, "flow_id", flow_in_single_mode, automatically_open_browser)
|
|
456
458
|
if flow_id is not None:
|
|
457
459
|
if flow_in_single_mode and automatically_open_browser:
|
|
458
460
|
_open_flow_in_browser(flow_id)
|
flowfile/web/__init__.py
CHANGED
|
@@ -51,6 +51,8 @@ def extend_app(app: FastAPI):
|
|
|
51
51
|
|
|
52
52
|
@app.get("/single_mode")
|
|
53
53
|
async def in_single_mode() -> bool:
|
|
54
|
+
print("Checking if single file mode is enabled")
|
|
55
|
+
print(os.environ.get('FLOWFILE_SINGLE_FILE_MODE'))
|
|
54
56
|
return os.environ.get('FLOWFILE_SINGLE_FILE_MODE', "0") == "1"
|
|
55
57
|
|
|
56
58
|
@app.get("/ui", include_in_schema=False)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
build_backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
build_backends/main.py,sha256=hLmfqTeHLSTiwwZ5mUuoLQgtO40Igvl1_4NbnvzWSgI,9912
|
|
3
3
|
build_backends/main_prd.py,sha256=JR2tYCMWM5ThooQjv5pw6nwVKMQjgsiHgKMhYn9NXWI,6927
|
|
4
|
-
flowfile/__init__.py,sha256=
|
|
4
|
+
flowfile/__init__.py,sha256=ba9c-bTEliEzkOy8awqjFHYOesK6JpuJHAwdYIISALo,4025
|
|
5
5
|
flowfile/__main__.py,sha256=cpWeAL9Xw2qHfm52ZCAQzQhjoOAmVFSXPL-8MrnWAxA,2540
|
|
6
|
-
flowfile/api.py,sha256=
|
|
7
|
-
flowfile/web/__init__.py,sha256=
|
|
6
|
+
flowfile/api.py,sha256=kN03OrC_X0448DyPPVWGk6b9Y4pi3LAMqwsOKhWbDlI,18929
|
|
7
|
+
flowfile/web/__init__.py,sha256=InpX02yACH4ZJLVKFbpnJw7rg3k9KwtOKhxLsuHsJEc,5896
|
|
8
8
|
flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css,sha256=Lf3OL7Vgg7ns0m6C0FiCNY0h4vgKALPrvTTkETjQmdc,1621
|
|
9
9
|
flowfile/web/static/assets/CloudConnectionManager-c20a740f.js,sha256=HCr-EGu2ZPo3ZEcEU5ILcDd8zHCQaFjuZh4FpgGUSqQ,35768
|
|
10
10
|
flowfile/web/static/assets/CloudStorageReader-29d14fcc.css,sha256=KdFPzPpEdf69gKbDLTPG_bk5QvJKaFXLloUjfzg67-s,3154
|
|
@@ -147,9 +147,9 @@ flowfile_core/configs/flow_logger.py,sha256=Pk1yhaC58jjISMrgwhBFIue9Qj5XfYo8NfOe
|
|
|
147
147
|
flowfile_core/configs/node_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
148
148
|
flowfile_core/configs/node_store/nodes.py,sha256=cZmevDcRYthaHxM04BE2WjsswQdMpaOQHNus-sRBQlc,6388
|
|
149
149
|
flowfile_core/configs/settings.py,sha256=5EAZKlq7oehakL3F0uT_VI_xXGkcIK9NL2-KAZq_Na8,3199
|
|
150
|
-
flowfile_core/configs/utils.py,sha256=
|
|
150
|
+
flowfile_core/configs/utils.py,sha256=DBLpZfvgzTQmF3c8ABediMBcORIdEtcyy2L8eIZa4vM,601
|
|
151
151
|
flowfile_core/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
152
|
-
flowfile_core/database/connection.py,sha256=
|
|
152
|
+
flowfile_core/database/connection.py,sha256=CVqbQEQ5i-I9Dnybh_ZBRBc52I5ZfrWtR4Zqod73Rc0,2999
|
|
153
153
|
flowfile_core/database/init_db.py,sha256=9j4osTUrjsX3Y4z8hRVFDTCNzQxXqxZqXG2rwb6Mz5I,1235
|
|
154
154
|
flowfile_core/database/models.py,sha256=7PlhE9Xe5dMvI5mqAlPWGS-LAZbFWq111EfCCXRT-ec,3440
|
|
155
155
|
flowfile_core/fileExplorer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -173,10 +173,10 @@ flowfile_core/flowfile/database_connection_manager/db_connections.py,sha256=dVYr
|
|
|
173
173
|
flowfile_core/flowfile/database_connection_manager/models.py,sha256=lVJSifqznQ8fKGWBEBCy_8JeXmdKF4pnAE5P5oXRrLM,379
|
|
174
174
|
flowfile_core/flowfile/extensions.py,sha256=gUKTzNs6YBayHF51-ZacMknvrE1S8OXc89qGIdqPl2w,1814
|
|
175
175
|
flowfile_core/flowfile/flow_data_engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
176
|
-
flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py,sha256=
|
|
176
|
+
flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py,sha256=QE2h9ULd1zyqQ_Ph4yqpDMekTOtqH2ypu-NwhVfmgVc,10414
|
|
177
177
|
flowfile_core/flowfile/flow_data_engine/create/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
178
178
|
flowfile_core/flowfile/flow_data_engine/create/funcs.py,sha256=Hvk4L3aOppt3RyKBEqFutx1t2RF8qA6QdrS1lPnJJkY,7822
|
|
179
|
-
flowfile_core/flowfile/flow_data_engine/flow_data_engine.py,sha256=
|
|
179
|
+
flowfile_core/flowfile/flow_data_engine/flow_data_engine.py,sha256=ycGWuohlkD3x0IPQQvGPcl9BHkt-fc8DOOxURZwPPQA,100062
|
|
180
180
|
flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
181
181
|
flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py,sha256=auDG4EeLcnXlsTe3IlU6t8jLsVUEre0-QcoQRACt0xU,7484
|
|
182
182
|
flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py,sha256=xusyOLwSxevBk8-Uy9ZKISB_KOi0JeYfZ0wihcG-Qjk,530
|
|
@@ -197,10 +197,10 @@ flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operati
|
|
|
197
197
|
flowfile_core/flowfile/flow_data_engine/threaded_processes.py,sha256=15IPoqIoCfkPMb648o9hnOzNnZINhYQxJi5hNfQuRwE,1311
|
|
198
198
|
flowfile_core/flowfile/flow_data_engine/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
199
199
|
flowfile_core/flowfile/flow_data_engine/utils.py,sha256=Nkw4MM6SX9veOAC8bthWTLUqA_aIFLtDnJG3bTNcwoc,6628
|
|
200
|
-
flowfile_core/flowfile/flow_graph.py,sha256=
|
|
200
|
+
flowfile_core/flowfile/flow_graph.py,sha256=OUKfAio_HNB8eQz5ofGDTSXPXUb9_otTdG34DTizcdA,91219
|
|
201
201
|
flowfile_core/flowfile/flow_graph_utils.py,sha256=nqfQdkFo4DM1WpbMuL1tZps51dKFWZi2yu_UgNUxtNU,10057
|
|
202
202
|
flowfile_core/flowfile/flow_node/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
203
|
-
flowfile_core/flowfile/flow_node/flow_node.py,sha256=
|
|
203
|
+
flowfile_core/flowfile/flow_node/flow_node.py,sha256=frRM97Ac4BcXAtYzorLDBCUU7g7aT2nHfigC-gq6Al0,52159
|
|
204
204
|
flowfile_core/flowfile/flow_node/models.py,sha256=2inXz2Ov5gl2NzGh6HGtrKM3ow93iyEixsqX2w3pgdM,9929
|
|
205
205
|
flowfile_core/flowfile/flow_node/schema_callback.py,sha256=ed8OLzRgQluC6aa2CcvSWq7WC3ZNZnLvDk9rXWwIux4,2410
|
|
206
206
|
flowfile_core/flowfile/handler.py,sha256=Ov25vblx6oiMM4atPScqk6OnuEZ1PEPgCCO6mqT0y8I,3210
|
|
@@ -247,12 +247,12 @@ flowfile_core/schemas/transform_schema.py,sha256=zwvomSAEgoEyQTdUbp_Mnawz2zVIuTv
|
|
|
247
247
|
flowfile_core/secret_manager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
248
248
|
flowfile_core/secret_manager/secret_manager.py,sha256=6G9hqo-GjTE7va6i6r3_ZmGo4Y26Aw_PY0W8L6pNo7E,2086
|
|
249
249
|
flowfile_core/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
250
|
-
flowfile_core/utils/arrow_reader.py,sha256=
|
|
250
|
+
flowfile_core/utils/arrow_reader.py,sha256=IeRaNEJvCVb-bVOmZnrZhzxFGWOVnH3p4MnXGxvUo_8,9541
|
|
251
251
|
flowfile_core/utils/excel_file_manager.py,sha256=EIad2LenHu-3Yw1FcLmE0KgmLflnvNKt07FY6s6mPlE,452
|
|
252
252
|
flowfile_core/utils/fileManager.py,sha256=LnJhK_pwjb9MIApG2e4Hp3L5Z7Wny8YYHaL9SkW8WlE,1371
|
|
253
253
|
flowfile_core/utils/fl_executor.py,sha256=eNnNZHZ9451brzZD00_X8aoCHFl1hR1gVOIGxtE0Db4,1301
|
|
254
254
|
flowfile_core/utils/utils.py,sha256=XPanIHWQjNLkd-lC7mKoWdjP3TAR_GZwYIbZ5_KF5o0,1326
|
|
255
|
-
flowfile_core/utils/validate_setup.py,sha256=
|
|
255
|
+
flowfile_core/utils/validate_setup.py,sha256=3g6JtMgda-N7Ts_xlOSS8HtW4ea3eDsFq2UbguRwzdI,1641
|
|
256
256
|
flowfile_frame/__init__.py,sha256=JDEFbzBQ-YtB129IppN06v-8KOYb3SADtOTDgCpPh0k,2044
|
|
257
257
|
flowfile_frame/adapters.py,sha256=8H1z6UpYQ3aeuSYNs8GbWfL5BUzVbGJbGzi6b9uVBt4,528
|
|
258
258
|
flowfile_frame/adding_expr.py,sha256=K9KdF3WCbPy2YO5radyEBXQ0yi6SOI4nviaPwXmKW0k,12428
|
|
@@ -260,11 +260,11 @@ flowfile_frame/cloud_storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
260
260
|
flowfile_frame/cloud_storage/frame_helpers.py,sha256=4x5I5WVKpT1PXoPvEJd28MmsHDCJNhZxAdG6MKOSA_4,2271
|
|
261
261
|
flowfile_frame/cloud_storage/secret_manager.py,sha256=lIjOf2u6ZWrXFFYCrHK2XH51p2DEl9m7Pr1uLKe1DUI,2540
|
|
262
262
|
flowfile_frame/config.py,sha256=Tl_4V8byUiO1y3ANesW2E9qeTtZvtKYJl8KU00hc1Cg,176
|
|
263
|
-
flowfile_frame/expr.py,sha256=
|
|
263
|
+
flowfile_frame/expr.py,sha256=r340IF6t2cZkUeXmhdU-wOxpiNpjiyQ4dEABO_VZxk8,59743
|
|
264
264
|
flowfile_frame/expr.pyi,sha256=kUMj6IRlczjInzUb6WXkBrxboCRhWdkxGlpiR30zCKw,89316
|
|
265
265
|
flowfile_frame/expr_name.py,sha256=0VZZwC3xc6tiwOJPu7emQ86Xp2mVu7U-j-jrsMjbc2Y,9852
|
|
266
|
-
flowfile_frame/flow_frame.py,sha256=
|
|
267
|
-
flowfile_frame/flow_frame.pyi,sha256=
|
|
266
|
+
flowfile_frame/flow_frame.py,sha256=Rvz8SCRtLEtqWCjKeTiqoSGfGX2IBCnjqeeFx7wllUI,101545
|
|
267
|
+
flowfile_frame/flow_frame.pyi,sha256=HChIEnLil9_8sFAI61jqA9ldhfDZheIaHY9HFp-Vnr0,33664
|
|
268
268
|
flowfile_frame/flow_frame_methods.py,sha256=mHFltRFeKQ4m2aDsU0HcpAhACR0gdLCeONvBFeG-wJk,29054
|
|
269
269
|
flowfile_frame/group_frame.py,sha256=aLpiT9ZEidVDJ04ORr57SPxR7WfgmziBPkBGXB9oR54,11632
|
|
270
270
|
flowfile_frame/join.py,sha256=YQCHmw8nYzvGzvrFc0jOmVMy-rahK3CPtuFdKB7loS8,2437
|
|
@@ -313,8 +313,8 @@ test_utils/s3/commands.py,sha256=m-r6rWZZP7Q7bzXwuOId1QbQkw8aq8-3qcjH8wpSqxI,142
|
|
|
313
313
|
test_utils/s3/data_generator.py,sha256=qEVN6paBvPf1W4Pxjb341WOLvTrx9D20MyJ944TAHc8,10939
|
|
314
314
|
test_utils/s3/demo_data_generator.py,sha256=YuYnbEmuKBUsuuBMjvq9SCmm11pYJc75u-OqpOGMr5A,7682
|
|
315
315
|
test_utils/s3/fixtures.py,sha256=n5qTGbW1LASFusYbeNh5_53aKui2Xp0yhT70_VXJ4NU,7778
|
|
316
|
-
flowfile-0.3.
|
|
317
|
-
flowfile-0.3.
|
|
318
|
-
flowfile-0.3.
|
|
319
|
-
flowfile-0.3.
|
|
320
|
-
flowfile-0.3.
|
|
316
|
+
flowfile-0.3.8.dist-info/LICENSE,sha256=pCfLAA27jMHReYk_wGiirZxWRRXz_Bm7PVInRCa9P5g,1075
|
|
317
|
+
flowfile-0.3.8.dist-info/METADATA,sha256=KxNspVyvQnG6Tp0njD_vr6sK34tXp1a18rUxdF6FHGg,8814
|
|
318
|
+
flowfile-0.3.8.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
319
|
+
flowfile-0.3.8.dist-info/entry_points.txt,sha256=3VMU1A7kmYy8DSPw7axzpy00DEaVRoVagE4jyZFAz2U,425
|
|
320
|
+
flowfile-0.3.8.dist-info/RECORD,,
|
flowfile_core/configs/utils.py
CHANGED
|
@@ -26,8 +26,6 @@ def get_app_data_dir() -> Path:
|
|
|
26
26
|
base_dir = os.path.join(os.path.expanduser("~"), ".local", "share")
|
|
27
27
|
|
|
28
28
|
app_dir = Path(base_dir) / app_name
|
|
29
|
-
|
|
30
|
-
print(f"Using application data directory: {app_dir}")
|
|
31
29
|
app_dir.mkdir(parents=True, exist_ok=True)
|
|
32
30
|
|
|
33
31
|
return app_dir
|
|
@@ -48,7 +46,7 @@ def get_database_url():
|
|
|
48
46
|
app_dir = get_app_data_dir()
|
|
49
47
|
|
|
50
48
|
db_path = app_dir / "flowfile.db"
|
|
51
|
-
logger.
|
|
49
|
+
logger.debug(f"Using database URL: sqlite:///{db_path}")
|
|
52
50
|
return f"sqlite:///{db_path}"
|
|
53
51
|
|
|
54
52
|
|
|
@@ -68,7 +68,6 @@ class CloudStorageReader:
|
|
|
68
68
|
def _get_s3_storage_options(connection: 'FullCloudStorageConnection') -> Dict[str, Any]:
|
|
69
69
|
"""Build S3-specific storage options."""
|
|
70
70
|
auth_method = connection.auth_method
|
|
71
|
-
print(f"Building S3 storage options for auth_method: '{auth_method}'")
|
|
72
71
|
if auth_method == "aws-cli":
|
|
73
72
|
return create_storage_options_from_boto_credentials(
|
|
74
73
|
profile_name=connection.connection_name,
|
|
@@ -1956,7 +1956,7 @@ class FlowDataEngine:
|
|
|
1956
1956
|
"""
|
|
1957
1957
|
if self.is_future and not self.is_collected:
|
|
1958
1958
|
return -1
|
|
1959
|
-
calculate_in_worker_process = False if not OFFLOAD_TO_WORKER
|
|
1959
|
+
calculate_in_worker_process = False if not OFFLOAD_TO_WORKER else calculate_in_worker_process
|
|
1960
1960
|
if self.number_of_records is None or self.number_of_records < 0 or force_calculate:
|
|
1961
1961
|
if self._number_of_records_callback is not None:
|
|
1962
1962
|
self._number_of_records_callback(self)
|
|
@@ -300,8 +300,6 @@ class FlowGraph:
|
|
|
300
300
|
|
|
301
301
|
return print(tree)
|
|
302
302
|
|
|
303
|
-
|
|
304
|
-
|
|
305
303
|
def apply_layout(self, y_spacing: int = 150, x_spacing: int = 200, initial_y: int = 100):
|
|
306
304
|
"""Calculates and applies a layered layout to all nodes in the graph.
|
|
307
305
|
|
|
@@ -490,7 +488,8 @@ class FlowGraph:
|
|
|
490
488
|
node_id=node.node_id,
|
|
491
489
|
flow_id=self.flow_id,
|
|
492
490
|
)
|
|
493
|
-
node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref
|
|
491
|
+
node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref,
|
|
492
|
+
n=min(sample_size, number_of_records))
|
|
494
493
|
return flowfile_table
|
|
495
494
|
|
|
496
495
|
def schema_callback():
|
|
@@ -1581,12 +1580,13 @@ class FlowGraph:
|
|
|
1581
1580
|
execution_order = determine_execution_order(all_nodes=[node for node in self.nodes if
|
|
1582
1581
|
node not in skip_nodes],
|
|
1583
1582
|
flow_starts=self._flow_starts+self.get_implicit_starter_nodes())
|
|
1584
|
-
|
|
1585
1583
|
skip_node_message(self.flow_logger, skip_nodes)
|
|
1586
1584
|
execution_order_message(self.flow_logger, execution_order)
|
|
1587
1585
|
performance_mode = self.flow_settings.execution_mode == 'Performance'
|
|
1588
1586
|
if self.flow_settings.execution_location == 'local':
|
|
1589
1587
|
OFFLOAD_TO_WORKER.value = False
|
|
1588
|
+
elif self.flow_settings.execution_location == 'remote':
|
|
1589
|
+
OFFLOAD_TO_WORKER.value = True
|
|
1590
1590
|
for node in execution_order:
|
|
1591
1591
|
node_logger = self.flow_logger.get_node_logger(node.node_id)
|
|
1592
1592
|
if self.flow_settings.is_canceled:
|
|
@@ -5,7 +5,7 @@ from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEng
|
|
|
5
5
|
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
6
6
|
from flowfile_core.schemas import input_schema, schemas
|
|
7
7
|
from flowfile_core.configs.flow_logger import NodeLogger
|
|
8
|
-
from flowfile_core.configs.settings import SINGLE_FILE_MODE
|
|
8
|
+
from flowfile_core.configs.settings import SINGLE_FILE_MODE, OFFLOAD_TO_WORKER
|
|
9
9
|
|
|
10
10
|
from flowfile_core.schemas.output_model import TableExample, FileColumn, NodeData
|
|
11
11
|
from flowfile_core.flowfile.utils import get_hash
|
|
@@ -724,9 +724,19 @@ class FlowNode:
|
|
|
724
724
|
Raises:
|
|
725
725
|
Exception: Propagates exceptions from the execution.
|
|
726
726
|
"""
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
727
|
+
def example_data_generator():
|
|
728
|
+
example_data = None
|
|
729
|
+
|
|
730
|
+
def get_example_data():
|
|
731
|
+
nonlocal example_data
|
|
732
|
+
if example_data is None:
|
|
733
|
+
example_data = resulting_data.get_sample(100).to_arrow()
|
|
734
|
+
return example_data
|
|
735
|
+
return get_example_data
|
|
736
|
+
resulting_data = self.get_resulting_data()
|
|
737
|
+
|
|
738
|
+
if not performance_mode:
|
|
739
|
+
self.results.example_data_generator = example_data_generator()
|
|
730
740
|
self.node_schema.result_schema = self.results.resulting_data.schema
|
|
731
741
|
self.node_stats.has_completed_last_run = True
|
|
732
742
|
|
|
@@ -899,8 +909,8 @@ class FlowNode:
|
|
|
899
909
|
else:
|
|
900
910
|
self.results.errors = str(e)
|
|
901
911
|
node_logger.error(f'Error with running the node: {e}')
|
|
902
|
-
elif ((run_location == 'local' or SINGLE_FILE_MODE) and
|
|
903
|
-
|
|
912
|
+
elif ((run_location == 'local' or SINGLE_FILE_MODE) and
|
|
913
|
+
(not self.node_stats.has_run_with_current_setup or self.node_template.node_group == "output")):
|
|
904
914
|
try:
|
|
905
915
|
node_logger.info('Executing fully locally')
|
|
906
916
|
self.execute_full_local(performance_mode)
|
|
@@ -138,11 +138,16 @@ def collect_batches(reader: pa.ipc.RecordBatchFileReader, n: int) -> Tuple[List[
|
|
|
138
138
|
rows_collected = 0
|
|
139
139
|
|
|
140
140
|
for batch in iter_batches(reader, n, rows_collected):
|
|
141
|
-
|
|
141
|
+
|
|
142
142
|
rows_collected += batch.num_rows
|
|
143
143
|
logger.debug(f"Collected batch: total rows now {rows_collected}")
|
|
144
144
|
if rows_collected >= n:
|
|
145
|
+
if rows_collected > n:
|
|
146
|
+
batches.append(batch.slice(0, n - (rows_collected - batch.num_rows)))
|
|
147
|
+
else:
|
|
148
|
+
batches.append(batch)
|
|
145
149
|
break
|
|
150
|
+
batches.append(batch)
|
|
146
151
|
|
|
147
152
|
logger.info(f"Finished collecting {len(batches)} batches with {rows_collected} total rows")
|
|
148
153
|
return batches, rows_collected
|
|
@@ -217,7 +222,7 @@ def read_top_n(file_path: str, n: int = 1000, strict: bool = False) -> pa.Table:
|
|
|
217
222
|
|
|
218
223
|
table = pa.Table.from_batches(batches) # type: ignore
|
|
219
224
|
logger.info(f"Successfully read {rows_collected} rows from {file_path}")
|
|
220
|
-
|
|
225
|
+
return table
|
|
221
226
|
|
|
222
227
|
|
|
223
228
|
def get_read_top_n(file_path: str, n: int = 1000, strict: bool = False) -> Callable[[], pa.Table]:
|
|
@@ -244,4 +249,4 @@ def get_read_top_n(file_path: str, n: int = 1000, strict: bool = False) -> Calla
|
|
|
244
249
|
>>> table = reader_func()
|
|
245
250
|
"""
|
|
246
251
|
logger.info(f"Creating reader function for {file_path} with n={n}, strict={strict}")
|
|
247
|
-
return lambda: read_top_n(file_path, n, strict)
|
|
252
|
+
return lambda: read_top_n(file_path, n, strict)
|
|
@@ -34,8 +34,6 @@ def validate_setup():
|
|
|
34
34
|
check_if_node_has_add_function_in_flow_graph(node)
|
|
35
35
|
check_if_node_has_input_schema_definition(node)
|
|
36
36
|
|
|
37
|
-
print("All nodes have corresponding functions in FlowGraph and input schema definitions.")
|
|
38
|
-
|
|
39
37
|
|
|
40
38
|
if __name__ == "__main__":
|
|
41
39
|
validate_setup()
|
flowfile_frame/expr.py
CHANGED
|
@@ -490,6 +490,20 @@ class Expr:
|
|
|
490
490
|
result.agg_func = "sum"
|
|
491
491
|
return result
|
|
492
492
|
|
|
493
|
+
def unique_counts(self):
|
|
494
|
+
"""
|
|
495
|
+
Return the number of unique values in the column.
|
|
496
|
+
|
|
497
|
+
Returns
|
|
498
|
+
-------
|
|
499
|
+
Expr
|
|
500
|
+
A new expression with the unique counts
|
|
501
|
+
"""
|
|
502
|
+
result_expr = self.expr.unique_counts() if self.expr is not None else None
|
|
503
|
+
result = self._create_next_expr(method_name="unique_counts", result_expr=result_expr, is_complex=self.is_complex)
|
|
504
|
+
result.agg_func = "unique_counts"
|
|
505
|
+
return result
|
|
506
|
+
|
|
493
507
|
def implode(self):
|
|
494
508
|
result_expr = self.expr.implode() if self.expr is not None else None
|
|
495
509
|
result = self._create_next_expr(method_name="implode", result_expr=result_expr, is_complex=self.is_complex)
|
flowfile_frame/flow_frame.py
CHANGED
flowfile_frame/flow_frame.pyi
CHANGED
|
@@ -80,8 +80,8 @@ class FlowFrame:
|
|
|
80
80
|
|
|
81
81
|
def __ne__(self, other: object) -> typing.NoReturn: ...
|
|
82
82
|
|
|
83
|
-
#
|
|
84
|
-
def __new__(cls, data: typing.Union[LazyFrame, collections.abc.Mapping[str, typing.Union[collections.abc.Sequence[object], collections.abc.Mapping[str, collections.abc.Sequence[object]], ForwardRef('Series')]], collections.abc.Sequence[typing.Any], ForwardRef('np.ndarray[Any, Any]'), ForwardRef('pa.Table'), ForwardRef('pd.DataFrame'), ForwardRef('ArrowArrayExportable'), ForwardRef('ArrowStreamExportable'), ForwardRef('torch.Tensor')] = None, schema: typing.Union[collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]], collections.abc.Sequence[typing.Union[str, tuple[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]]]], NoneType] = None, schema_overrides: collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType')]] | None = None, strict: bool = True, orient: typing.Optional[typing.Literal['col', 'row']] = None, infer_schema_length: int | None = 100, nan_to_null: bool = False, flow_graph: typing.Optional[flowfile_core.flowfile.flow_graph.FlowGraph] = None, node_id: typing.Optional[int] = None, parent_node_id: typing.Optional[int] = None,
|
|
83
|
+
# Unified constructor for FlowFrame.
|
|
84
|
+
def __new__(cls, data: typing.Union[LazyFrame, collections.abc.Mapping[str, typing.Union[collections.abc.Sequence[object], collections.abc.Mapping[str, collections.abc.Sequence[object]], ForwardRef('Series')]], collections.abc.Sequence[typing.Any], ForwardRef('np.ndarray[Any, Any]'), ForwardRef('pa.Table'), ForwardRef('pd.DataFrame'), ForwardRef('ArrowArrayExportable'), ForwardRef('ArrowStreamExportable'), ForwardRef('torch.Tensor')] = None, schema: typing.Union[collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]], collections.abc.Sequence[typing.Union[str, tuple[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]]]], NoneType] = None, schema_overrides: collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType')]] | None = None, strict: bool = True, orient: typing.Optional[typing.Literal['col', 'row']] = None, infer_schema_length: int | None = 100, nan_to_null: bool = False, flow_graph: typing.Optional[flowfile_core.flowfile.flow_graph.FlowGraph] = None, node_id: typing.Optional[int] = None, parent_node_id: typing.Optional[int] = None, **kwargs) -> Self: ...
|
|
85
85
|
|
|
86
86
|
def __repr__(self) -> Any: ...
|
|
87
87
|
|
|
@@ -118,9 +118,6 @@ class FlowFrame:
|
|
|
118
118
|
# Execute join using Polars code approach.
|
|
119
119
|
def _execute_polars_code_join(self, other: FlowFrame, new_node_id: int, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], left_columns: typing.Optional[typing.List[str]], right_columns: typing.Optional[typing.List[str]], how: str, suffix: str, validate: str, nulls_equal: bool, coalesce: bool, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'], description: str) -> 'FlowFrame': ...
|
|
120
120
|
|
|
121
|
-
# Internal constructor to create a FlowFrame instance that wraps an
|
|
122
|
-
def _from_existing_node(self, data: LazyFrame, flow_graph: FlowGraph, node_id: int, parent_node_id: typing.Optional[int] = None) -> 'FlowFrame': ...
|
|
123
|
-
|
|
124
121
|
# Generates the `input_df.sort(...)` Polars code string using pure expression strings.
|
|
125
122
|
def _generate_sort_polars_code(self, pure_sort_expr_strs: typing.List[str], descending_values: typing.List[bool], nulls_last_values: typing.List[bool], multithreaded: bool, maintain_order: bool) -> str: ...
|
|
126
123
|
|
|
@@ -231,7 +228,7 @@ class FlowFrame:
|
|
|
231
228
|
def interpolate(self, description: Optional[str] = None) -> 'FlowFrame': ...
|
|
232
229
|
|
|
233
230
|
# Add a join operation to the Logical Plan.
|
|
234
|
-
def join(self, other, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, how: str = 'inner', left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, suffix: str = '_right', validate: str = None, nulls_equal: bool = False, coalesce: bool = None, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'] = None, description: str = None) ->
|
|
231
|
+
def join(self, other, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, how: str = 'inner', left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, suffix: str = '_right', validate: str = None, nulls_equal: bool = False, coalesce: bool = None, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'] = None, description: str = None) -> 'FlowFrame': ...
|
|
235
232
|
|
|
236
233
|
# Perform an asof join.
|
|
237
234
|
def join_asof(self, other: FlowFrame, left_on: str | None | Expr = None, right_on: str | None | Expr = None, on: str | None | Expr = None, by_left: str | Sequence[str] | None = None, by_right: str | Sequence[str] | None = None, by: str | Sequence[str] | None = None, strategy: AsofJoinStrategy = 'backward', suffix: str = '_right', tolerance: str | int | float | timedelta | None = None, allow_parallel: bool = True, force_parallel: bool = False, coalesce: bool = True, allow_exact_matches: bool = True, check_sortedness: bool = True, description: Optional[str] = None) -> 'FlowFrame': ...
|
|
File without changes
|
|
File without changes
|
|
File without changes
|