Flowfile 0.3.7__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
flowfile/__init__.py CHANGED
@@ -7,7 +7,7 @@ This package ties together the FlowFile ecosystem components:
7
7
  - flowfile_worker: Computation engine
8
8
  """
9
9
 
10
- __version__ = "0.3.7"
10
+ __version__ = "0.3.8"
11
11
 
12
12
  import os
13
13
  import logging
flowfile/api.py CHANGED
@@ -206,7 +206,7 @@ def check_if_in_single_mode() -> bool:
206
206
  try:
207
207
  response: requests.Response = requests.get(f"{FLOWFILE_BASE_URL}/single_mode", timeout=1)
208
208
  if response.ok:
209
- return response.json() == "1"
209
+ return response.json()
210
210
  except Exception:
211
211
  pass
212
212
  return False
@@ -400,6 +400,8 @@ def _open_flow_in_browser(flow_id: int) -> None:
400
400
  logger.info(f"Unified mode detected. Opening imported flow in browser: {flow_url}")
401
401
  try:
402
402
  time.sleep(0.5)
403
+ logger.info("Attempting to open browser tab for flow...")
404
+ logger.info("Opening URL in browser: %s", flow_url)
403
405
  webbrowser.open_new_tab(flow_url)
404
406
  except Exception as wb_err:
405
407
  logger.warning(f"Could not automatically open browser tab: {wb_err}")
@@ -452,7 +454,7 @@ def open_graph_in_editor(flow_graph: FlowGraph, storage_location: Optional[str]
452
454
  return False
453
455
 
454
456
  flow_id = import_flow_to_editor(flow_file_path, auth_token)
455
-
457
+ print(flow_id, "flow_id", flow_in_single_mode, automatically_open_browser)
456
458
  if flow_id is not None:
457
459
  if flow_in_single_mode and automatically_open_browser:
458
460
  _open_flow_in_browser(flow_id)
flowfile/web/__init__.py CHANGED
@@ -51,6 +51,8 @@ def extend_app(app: FastAPI):
51
51
 
52
52
  @app.get("/single_mode")
53
53
  async def in_single_mode() -> bool:
54
+ print("Checking if single file mode is enabled")
55
+ print(os.environ.get('FLOWFILE_SINGLE_FILE_MODE'))
54
56
  return os.environ.get('FLOWFILE_SINGLE_FILE_MODE', "0") == "1"
55
57
 
56
58
  @app.get("/ui", include_in_schema=False)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: Flowfile
3
- Version: 0.3.7
3
+ Version: 0.3.8
4
4
  Summary: Project combining flowfile core (backend) and flowfile_worker (compute offloader) and flowfile_frame (api)
5
5
  Author: Edward van Eechoud
6
6
  Author-email: evaneechoud@gmail.com
@@ -1,10 +1,10 @@
1
1
  build_backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  build_backends/main.py,sha256=hLmfqTeHLSTiwwZ5mUuoLQgtO40Igvl1_4NbnvzWSgI,9912
3
3
  build_backends/main_prd.py,sha256=JR2tYCMWM5ThooQjv5pw6nwVKMQjgsiHgKMhYn9NXWI,6927
4
- flowfile/__init__.py,sha256=kQ10o5ueV_t_wRXShd1FTlLmCpGxeS4InxA6VurC7UE,4025
4
+ flowfile/__init__.py,sha256=ba9c-bTEliEzkOy8awqjFHYOesK6JpuJHAwdYIISALo,4025
5
5
  flowfile/__main__.py,sha256=cpWeAL9Xw2qHfm52ZCAQzQhjoOAmVFSXPL-8MrnWAxA,2540
6
- flowfile/api.py,sha256=TDxw2g6cF_A94jTDAwTEnVyve99i3UJMjC9D0z8KgSg,18720
7
- flowfile/web/__init__.py,sha256=DMbHVQ2yVopshBQr5dBIn7hqrxO9RqCdwc_CA9Kd6F0,5780
6
+ flowfile/api.py,sha256=kN03OrC_X0448DyPPVWGk6b9Y4pi3LAMqwsOKhWbDlI,18929
7
+ flowfile/web/__init__.py,sha256=InpX02yACH4ZJLVKFbpnJw7rg3k9KwtOKhxLsuHsJEc,5896
8
8
  flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css,sha256=Lf3OL7Vgg7ns0m6C0FiCNY0h4vgKALPrvTTkETjQmdc,1621
9
9
  flowfile/web/static/assets/CloudConnectionManager-c20a740f.js,sha256=HCr-EGu2ZPo3ZEcEU5ILcDd8zHCQaFjuZh4FpgGUSqQ,35768
10
10
  flowfile/web/static/assets/CloudStorageReader-29d14fcc.css,sha256=KdFPzPpEdf69gKbDLTPG_bk5QvJKaFXLloUjfzg67-s,3154
@@ -147,9 +147,9 @@ flowfile_core/configs/flow_logger.py,sha256=Pk1yhaC58jjISMrgwhBFIue9Qj5XfYo8NfOe
147
147
  flowfile_core/configs/node_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
148
148
  flowfile_core/configs/node_store/nodes.py,sha256=cZmevDcRYthaHxM04BE2WjsswQdMpaOQHNus-sRBQlc,6388
149
149
  flowfile_core/configs/settings.py,sha256=5EAZKlq7oehakL3F0uT_VI_xXGkcIK9NL2-KAZq_Na8,3199
150
- flowfile_core/configs/utils.py,sha256=BjyJCfO4gR_n9Sbs6dPlFfSKwxfxd3aKZ9zUH0Xs5GY,474
150
+ flowfile_core/configs/utils.py,sha256=DBLpZfvgzTQmF3c8ABediMBcORIdEtcyy2L8eIZa4vM,601
151
151
  flowfile_core/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
152
- flowfile_core/database/connection.py,sha256=g6oqL8d47eED58lJleL6mVdTTzSZTb2BY-E-6P0zWRY,3057
152
+ flowfile_core/database/connection.py,sha256=CVqbQEQ5i-I9Dnybh_ZBRBc52I5ZfrWtR4Zqod73Rc0,2999
153
153
  flowfile_core/database/init_db.py,sha256=9j4osTUrjsX3Y4z8hRVFDTCNzQxXqxZqXG2rwb6Mz5I,1235
154
154
  flowfile_core/database/models.py,sha256=7PlhE9Xe5dMvI5mqAlPWGS-LAZbFWq111EfCCXRT-ec,3440
155
155
  flowfile_core/fileExplorer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -173,10 +173,10 @@ flowfile_core/flowfile/database_connection_manager/db_connections.py,sha256=dVYr
173
173
  flowfile_core/flowfile/database_connection_manager/models.py,sha256=lVJSifqznQ8fKGWBEBCy_8JeXmdKF4pnAE5P5oXRrLM,379
174
174
  flowfile_core/flowfile/extensions.py,sha256=gUKTzNs6YBayHF51-ZacMknvrE1S8OXc89qGIdqPl2w,1814
175
175
  flowfile_core/flowfile/flow_data_engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
176
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py,sha256=9QlduMjmFhhwEvzwqkbS3eANvmMasn1C_lj_knSfDJw,10493
176
+ flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py,sha256=QE2h9ULd1zyqQ_Ph4yqpDMekTOtqH2ypu-NwhVfmgVc,10414
177
177
  flowfile_core/flowfile/flow_data_engine/create/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
178
178
  flowfile_core/flowfile/flow_data_engine/create/funcs.py,sha256=Hvk4L3aOppt3RyKBEqFutx1t2RF8qA6QdrS1lPnJJkY,7822
179
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py,sha256=Wg9UkvtKN9t8dVfPyxOC0cotwHfh1EVJ5Rki3JzP0lI,100068
179
+ flowfile_core/flowfile/flow_data_engine/flow_data_engine.py,sha256=ycGWuohlkD3x0IPQQvGPcl9BHkt-fc8DOOxURZwPPQA,100062
180
180
  flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
181
181
  flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py,sha256=auDG4EeLcnXlsTe3IlU6t8jLsVUEre0-QcoQRACt0xU,7484
182
182
  flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py,sha256=xusyOLwSxevBk8-Uy9ZKISB_KOi0JeYfZ0wihcG-Qjk,530
@@ -197,10 +197,10 @@ flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operati
197
197
  flowfile_core/flowfile/flow_data_engine/threaded_processes.py,sha256=15IPoqIoCfkPMb648o9hnOzNnZINhYQxJi5hNfQuRwE,1311
198
198
  flowfile_core/flowfile/flow_data_engine/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
199
  flowfile_core/flowfile/flow_data_engine/utils.py,sha256=Nkw4MM6SX9veOAC8bthWTLUqA_aIFLtDnJG3bTNcwoc,6628
200
- flowfile_core/flowfile/flow_graph.py,sha256=UqEk3sLDL1K4EucMgySw75B45Mj5Qnjbzl3k1LJ4w7o,91002
200
+ flowfile_core/flowfile/flow_graph.py,sha256=OUKfAio_HNB8eQz5ofGDTSXPXUb9_otTdG34DTizcdA,91219
201
201
  flowfile_core/flowfile/flow_graph_utils.py,sha256=nqfQdkFo4DM1WpbMuL1tZps51dKFWZi2yu_UgNUxtNU,10057
202
202
  flowfile_core/flowfile/flow_node/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
203
- flowfile_core/flowfile/flow_node/flow_node.py,sha256=L9p0grp59WBFEtw6NxNK5nHOcXwIqs7XVG9eG90-oOQ,51948
203
+ flowfile_core/flowfile/flow_node/flow_node.py,sha256=frRM97Ac4BcXAtYzorLDBCUU7g7aT2nHfigC-gq6Al0,52159
204
204
  flowfile_core/flowfile/flow_node/models.py,sha256=2inXz2Ov5gl2NzGh6HGtrKM3ow93iyEixsqX2w3pgdM,9929
205
205
  flowfile_core/flowfile/flow_node/schema_callback.py,sha256=ed8OLzRgQluC6aa2CcvSWq7WC3ZNZnLvDk9rXWwIux4,2410
206
206
  flowfile_core/flowfile/handler.py,sha256=Ov25vblx6oiMM4atPScqk6OnuEZ1PEPgCCO6mqT0y8I,3210
@@ -247,12 +247,12 @@ flowfile_core/schemas/transform_schema.py,sha256=zwvomSAEgoEyQTdUbp_Mnawz2zVIuTv
247
247
  flowfile_core/secret_manager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
248
248
  flowfile_core/secret_manager/secret_manager.py,sha256=6G9hqo-GjTE7va6i6r3_ZmGo4Y26Aw_PY0W8L6pNo7E,2086
249
249
  flowfile_core/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
250
- flowfile_core/utils/arrow_reader.py,sha256=SbDDzOmtHcZ8rH88v1jN0EZthaDqBmvCrbruX07Qdks,9366
250
+ flowfile_core/utils/arrow_reader.py,sha256=IeRaNEJvCVb-bVOmZnrZhzxFGWOVnH3p4MnXGxvUo_8,9541
251
251
  flowfile_core/utils/excel_file_manager.py,sha256=EIad2LenHu-3Yw1FcLmE0KgmLflnvNKt07FY6s6mPlE,452
252
252
  flowfile_core/utils/fileManager.py,sha256=LnJhK_pwjb9MIApG2e4Hp3L5Z7Wny8YYHaL9SkW8WlE,1371
253
253
  flowfile_core/utils/fl_executor.py,sha256=eNnNZHZ9451brzZD00_X8aoCHFl1hR1gVOIGxtE0Db4,1301
254
254
  flowfile_core/utils/utils.py,sha256=XPanIHWQjNLkd-lC7mKoWdjP3TAR_GZwYIbZ5_KF5o0,1326
255
- flowfile_core/utils/validate_setup.py,sha256=1kaintlS0MA-PDO6IKgW0NGGai7WVUeAM_Vnvbm1Mfk,1737
255
+ flowfile_core/utils/validate_setup.py,sha256=3g6JtMgda-N7Ts_xlOSS8HtW4ea3eDsFq2UbguRwzdI,1641
256
256
  flowfile_frame/__init__.py,sha256=JDEFbzBQ-YtB129IppN06v-8KOYb3SADtOTDgCpPh0k,2044
257
257
  flowfile_frame/adapters.py,sha256=8H1z6UpYQ3aeuSYNs8GbWfL5BUzVbGJbGzi6b9uVBt4,528
258
258
  flowfile_frame/adding_expr.py,sha256=K9KdF3WCbPy2YO5radyEBXQ0yi6SOI4nviaPwXmKW0k,12428
@@ -260,11 +260,11 @@ flowfile_frame/cloud_storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
260
260
  flowfile_frame/cloud_storage/frame_helpers.py,sha256=4x5I5WVKpT1PXoPvEJd28MmsHDCJNhZxAdG6MKOSA_4,2271
261
261
  flowfile_frame/cloud_storage/secret_manager.py,sha256=lIjOf2u6ZWrXFFYCrHK2XH51p2DEl9m7Pr1uLKe1DUI,2540
262
262
  flowfile_frame/config.py,sha256=Tl_4V8byUiO1y3ANesW2E9qeTtZvtKYJl8KU00hc1Cg,176
263
- flowfile_frame/expr.py,sha256=OmpQZ7xleBoBmSLwlZ1pp1_WVd38bpqCs8OGQL0vx7s,59264
263
+ flowfile_frame/expr.py,sha256=r340IF6t2cZkUeXmhdU-wOxpiNpjiyQ4dEABO_VZxk8,59743
264
264
  flowfile_frame/expr.pyi,sha256=kUMj6IRlczjInzUb6WXkBrxboCRhWdkxGlpiR30zCKw,89316
265
265
  flowfile_frame/expr_name.py,sha256=0VZZwC3xc6tiwOJPu7emQ86Xp2mVu7U-j-jrsMjbc2Y,9852
266
- flowfile_frame/flow_frame.py,sha256=NCny_WgHzM2tnqdJGZTDGrFOj1jiMAJWvFgSK0N7tv4,101530
267
- flowfile_frame/flow_frame.pyi,sha256=P0DPw5xx6nXoL7hbUOS5t06OmGZjY7P5ZgHO1iCvYIw,33956
266
+ flowfile_frame/flow_frame.py,sha256=Rvz8SCRtLEtqWCjKeTiqoSGfGX2IBCnjqeeFx7wllUI,101545
267
+ flowfile_frame/flow_frame.pyi,sha256=HChIEnLil9_8sFAI61jqA9ldhfDZheIaHY9HFp-Vnr0,33664
268
268
  flowfile_frame/flow_frame_methods.py,sha256=mHFltRFeKQ4m2aDsU0HcpAhACR0gdLCeONvBFeG-wJk,29054
269
269
  flowfile_frame/group_frame.py,sha256=aLpiT9ZEidVDJ04ORr57SPxR7WfgmziBPkBGXB9oR54,11632
270
270
  flowfile_frame/join.py,sha256=YQCHmw8nYzvGzvrFc0jOmVMy-rahK3CPtuFdKB7loS8,2437
@@ -313,8 +313,8 @@ test_utils/s3/commands.py,sha256=m-r6rWZZP7Q7bzXwuOId1QbQkw8aq8-3qcjH8wpSqxI,142
313
313
  test_utils/s3/data_generator.py,sha256=qEVN6paBvPf1W4Pxjb341WOLvTrx9D20MyJ944TAHc8,10939
314
314
  test_utils/s3/demo_data_generator.py,sha256=YuYnbEmuKBUsuuBMjvq9SCmm11pYJc75u-OqpOGMr5A,7682
315
315
  test_utils/s3/fixtures.py,sha256=n5qTGbW1LASFusYbeNh5_53aKui2Xp0yhT70_VXJ4NU,7778
316
- flowfile-0.3.7.dist-info/LICENSE,sha256=pCfLAA27jMHReYk_wGiirZxWRRXz_Bm7PVInRCa9P5g,1075
317
- flowfile-0.3.7.dist-info/METADATA,sha256=nmpxLVZlB17JVxvG3ZbxHmBAnAdxDOXenZLOKrU_B4g,8814
318
- flowfile-0.3.7.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
319
- flowfile-0.3.7.dist-info/entry_points.txt,sha256=3VMU1A7kmYy8DSPw7axzpy00DEaVRoVagE4jyZFAz2U,425
320
- flowfile-0.3.7.dist-info/RECORD,,
316
+ flowfile-0.3.8.dist-info/LICENSE,sha256=pCfLAA27jMHReYk_wGiirZxWRRXz_Bm7PVInRCa9P5g,1075
317
+ flowfile-0.3.8.dist-info/METADATA,sha256=KxNspVyvQnG6Tp0njD_vr6sK34tXp1a18rUxdF6FHGg,8814
318
+ flowfile-0.3.8.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
319
+ flowfile-0.3.8.dist-info/entry_points.txt,sha256=3VMU1A7kmYy8DSPw7axzpy00DEaVRoVagE4jyZFAz2U,425
320
+ flowfile-0.3.8.dist-info/RECORD,,
@@ -16,3 +16,8 @@ class MutableBool:
16
16
  elif isinstance(other, MutableBool):
17
17
  return self.value == other.value
18
18
  return NotImplemented
19
+
20
+ def set(self, value):
21
+ """Set the value of the MutableBool"""
22
+ self.value = bool(value)
23
+ return self
@@ -26,8 +26,6 @@ def get_app_data_dir() -> Path:
26
26
  base_dir = os.path.join(os.path.expanduser("~"), ".local", "share")
27
27
 
28
28
  app_dir = Path(base_dir) / app_name
29
-
30
- print(f"Using application data directory: {app_dir}")
31
29
  app_dir.mkdir(parents=True, exist_ok=True)
32
30
 
33
31
  return app_dir
@@ -48,7 +46,7 @@ def get_database_url():
48
46
  app_dir = get_app_data_dir()
49
47
 
50
48
  db_path = app_dir / "flowfile.db"
51
- logger.info(f"Using database URL: sqlite:///{db_path}")
49
+ logger.debug(f"Using database URL: sqlite:///{db_path}")
52
50
  return f"sqlite:///{db_path}"
53
51
 
54
52
 
@@ -68,7 +68,6 @@ class CloudStorageReader:
68
68
  def _get_s3_storage_options(connection: 'FullCloudStorageConnection') -> Dict[str, Any]:
69
69
  """Build S3-specific storage options."""
70
70
  auth_method = connection.auth_method
71
- print(f"Building S3 storage options for auth_method: '{auth_method}'")
72
71
  if auth_method == "aws-cli":
73
72
  return create_storage_options_from_boto_credentials(
74
73
  profile_name=connection.connection_name,
@@ -1956,7 +1956,7 @@ class FlowDataEngine:
1956
1956
  """
1957
1957
  if self.is_future and not self.is_collected:
1958
1958
  return -1
1959
- calculate_in_worker_process = False if not OFFLOAD_TO_WORKER.value else calculate_in_worker_process
1959
+ calculate_in_worker_process = False if not OFFLOAD_TO_WORKER else calculate_in_worker_process
1960
1960
  if self.number_of_records is None or self.number_of_records < 0 or force_calculate:
1961
1961
  if self._number_of_records_callback is not None:
1962
1962
  self._number_of_records_callback(self)
@@ -300,8 +300,6 @@ class FlowGraph:
300
300
 
301
301
  return print(tree)
302
302
 
303
-
304
-
305
303
  def apply_layout(self, y_spacing: int = 150, x_spacing: int = 200, initial_y: int = 100):
306
304
  """Calculates and applies a layered layout to all nodes in the graph.
307
305
 
@@ -490,7 +488,8 @@ class FlowGraph:
490
488
  node_id=node.node_id,
491
489
  flow_id=self.flow_id,
492
490
  )
493
- node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref)
491
+ node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref,
492
+ n=min(sample_size, number_of_records))
494
493
  return flowfile_table
495
494
 
496
495
  def schema_callback():
@@ -1581,12 +1580,13 @@ class FlowGraph:
1581
1580
  execution_order = determine_execution_order(all_nodes=[node for node in self.nodes if
1582
1581
  node not in skip_nodes],
1583
1582
  flow_starts=self._flow_starts+self.get_implicit_starter_nodes())
1584
-
1585
1583
  skip_node_message(self.flow_logger, skip_nodes)
1586
1584
  execution_order_message(self.flow_logger, execution_order)
1587
1585
  performance_mode = self.flow_settings.execution_mode == 'Performance'
1588
1586
  if self.flow_settings.execution_location == 'local':
1589
1587
  OFFLOAD_TO_WORKER.value = False
1588
+ elif self.flow_settings.execution_location == 'remote':
1589
+ OFFLOAD_TO_WORKER.value = True
1590
1590
  for node in execution_order:
1591
1591
  node_logger = self.flow_logger.get_node_logger(node.node_id)
1592
1592
  if self.flow_settings.is_canceled:
@@ -5,7 +5,7 @@ from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEng
5
5
  from flowfile_core.utils.arrow_reader import get_read_top_n
6
6
  from flowfile_core.schemas import input_schema, schemas
7
7
  from flowfile_core.configs.flow_logger import NodeLogger
8
- from flowfile_core.configs.settings import SINGLE_FILE_MODE
8
+ from flowfile_core.configs.settings import SINGLE_FILE_MODE, OFFLOAD_TO_WORKER
9
9
 
10
10
  from flowfile_core.schemas.output_model import TableExample, FileColumn, NodeData
11
11
  from flowfile_core.flowfile.utils import get_hash
@@ -724,9 +724,19 @@ class FlowNode:
724
724
  Raises:
725
725
  Exception: Propagates exceptions from the execution.
726
726
  """
727
- if self.results.resulting_data is None and not performance_mode:
728
- self.results.resulting_data = self.get_resulting_data()
729
- self.results.example_data_generator = lambda: self.get_resulting_data().get_sample(100).to_arrow()
727
+ def example_data_generator():
728
+ example_data = None
729
+
730
+ def get_example_data():
731
+ nonlocal example_data
732
+ if example_data is None:
733
+ example_data = resulting_data.get_sample(100).to_arrow()
734
+ return example_data
735
+ return get_example_data
736
+ resulting_data = self.get_resulting_data()
737
+
738
+ if not performance_mode:
739
+ self.results.example_data_generator = example_data_generator()
730
740
  self.node_schema.result_schema = self.results.resulting_data.schema
731
741
  self.node_stats.has_completed_last_run = True
732
742
 
@@ -899,8 +909,8 @@ class FlowNode:
899
909
  else:
900
910
  self.results.errors = str(e)
901
911
  node_logger.error(f'Error with running the node: {e}')
902
- elif ((run_location == 'local' or SINGLE_FILE_MODE) and (not self.node_stats.has_run_with_current_setup
903
- or self.node_template.node_group == "output")):
912
+ elif ((run_location == 'local' or SINGLE_FILE_MODE) and
913
+ (not self.node_stats.has_run_with_current_setup or self.node_template.node_group == "output")):
904
914
  try:
905
915
  node_logger.info('Executing fully locally')
906
916
  self.execute_full_local(performance_mode)
@@ -138,11 +138,16 @@ def collect_batches(reader: pa.ipc.RecordBatchFileReader, n: int) -> Tuple[List[
138
138
  rows_collected = 0
139
139
 
140
140
  for batch in iter_batches(reader, n, rows_collected):
141
- batches.append(batch)
141
+
142
142
  rows_collected += batch.num_rows
143
143
  logger.debug(f"Collected batch: total rows now {rows_collected}")
144
144
  if rows_collected >= n:
145
+ if rows_collected > n:
146
+ batches.append(batch.slice(0, n - (rows_collected - batch.num_rows)))
147
+ else:
148
+ batches.append(batch)
145
149
  break
150
+ batches.append(batch)
146
151
 
147
152
  logger.info(f"Finished collecting {len(batches)} batches with {rows_collected} total rows")
148
153
  return batches, rows_collected
@@ -217,7 +222,7 @@ def read_top_n(file_path: str, n: int = 1000, strict: bool = False) -> pa.Table:
217
222
 
218
223
  table = pa.Table.from_batches(batches) # type: ignore
219
224
  logger.info(f"Successfully read {rows_collected} rows from {file_path}")
220
- return table
225
+ return table
221
226
 
222
227
 
223
228
  def get_read_top_n(file_path: str, n: int = 1000, strict: bool = False) -> Callable[[], pa.Table]:
@@ -244,4 +249,4 @@ def get_read_top_n(file_path: str, n: int = 1000, strict: bool = False) -> Calla
244
249
  >>> table = reader_func()
245
250
  """
246
251
  logger.info(f"Creating reader function for {file_path} with n={n}, strict={strict}")
247
- return lambda: read_top_n(file_path, n, strict)
252
+ return lambda: read_top_n(file_path, n, strict)
@@ -34,8 +34,6 @@ def validate_setup():
34
34
  check_if_node_has_add_function_in_flow_graph(node)
35
35
  check_if_node_has_input_schema_definition(node)
36
36
 
37
- print("All nodes have corresponding functions in FlowGraph and input schema definitions.")
38
-
39
37
 
40
38
  if __name__ == "__main__":
41
39
  validate_setup()
flowfile_frame/expr.py CHANGED
@@ -490,6 +490,20 @@ class Expr:
490
490
  result.agg_func = "sum"
491
491
  return result
492
492
 
493
+ def unique_counts(self):
494
+ """
495
+ Return the number of unique values in the column.
496
+
497
+ Returns
498
+ -------
499
+ Expr
500
+ A new expression with the unique counts
501
+ """
502
+ result_expr = self.expr.unique_counts() if self.expr is not None else None
503
+ result = self._create_next_expr(method_name="unique_counts", result_expr=result_expr, is_complex=self.is_complex)
504
+ result.agg_func = "unique_counts"
505
+ return result
506
+
493
507
  def implode(self):
494
508
  result_expr = self.expr.implode() if self.expr is not None else None
495
509
  result = self._create_next_expr(method_name="implode", result_expr=result_expr, is_complex=self.is_complex)
@@ -565,7 +565,7 @@ class FlowFrame:
565
565
  coalesce: bool = None,
566
566
  maintain_order: Literal[None, "left", "right", "left_right", "right_left"] = None,
567
567
  description: str = None,
568
- ):
568
+ ) -> "FlowFrame":
569
569
  """
570
570
  Add a join operation to the Logical Plan.
571
571
 
@@ -80,8 +80,8 @@ class FlowFrame:
80
80
 
81
81
  def __ne__(self, other: object) -> typing.NoReturn: ...
82
82
 
83
- # Create and configure a new FlowFrame instance, mimicking Polars' flexible constructor.
84
- def __new__(cls, data: typing.Union[LazyFrame, collections.abc.Mapping[str, typing.Union[collections.abc.Sequence[object], collections.abc.Mapping[str, collections.abc.Sequence[object]], ForwardRef('Series')]], collections.abc.Sequence[typing.Any], ForwardRef('np.ndarray[Any, Any]'), ForwardRef('pa.Table'), ForwardRef('pd.DataFrame'), ForwardRef('ArrowArrayExportable'), ForwardRef('ArrowStreamExportable'), ForwardRef('torch.Tensor')] = None, schema: typing.Union[collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]], collections.abc.Sequence[typing.Union[str, tuple[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]]]], NoneType] = None, schema_overrides: collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType')]] | None = None, strict: bool = True, orient: typing.Optional[typing.Literal['col', 'row']] = None, infer_schema_length: int | None = 100, nan_to_null: bool = False, flow_graph: typing.Optional[flowfile_core.flowfile.flow_graph.FlowGraph] = None, node_id: typing.Optional[int] = None, parent_node_id: typing.Optional[int] = None, override_initial: bool = False) -> Self: ...
83
+ # Unified constructor for FlowFrame.
84
+ def __new__(cls, data: typing.Union[LazyFrame, collections.abc.Mapping[str, typing.Union[collections.abc.Sequence[object], collections.abc.Mapping[str, collections.abc.Sequence[object]], ForwardRef('Series')]], collections.abc.Sequence[typing.Any], ForwardRef('np.ndarray[Any, Any]'), ForwardRef('pa.Table'), ForwardRef('pd.DataFrame'), ForwardRef('ArrowArrayExportable'), ForwardRef('ArrowStreamExportable'), ForwardRef('torch.Tensor')] = None, schema: typing.Union[collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]], collections.abc.Sequence[typing.Union[str, tuple[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]]]], NoneType] = None, schema_overrides: collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType')]] | None = None, strict: bool = True, orient: typing.Optional[typing.Literal['col', 'row']] = None, infer_schema_length: int | None = 100, nan_to_null: bool = False, flow_graph: typing.Optional[flowfile_core.flowfile.flow_graph.FlowGraph] = None, node_id: typing.Optional[int] = None, parent_node_id: typing.Optional[int] = None, **kwargs) -> Self: ...
85
85
 
86
86
  def __repr__(self) -> Any: ...
87
87
 
@@ -118,9 +118,6 @@ class FlowFrame:
118
118
  # Execute join using Polars code approach.
119
119
  def _execute_polars_code_join(self, other: FlowFrame, new_node_id: int, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], left_columns: typing.Optional[typing.List[str]], right_columns: typing.Optional[typing.List[str]], how: str, suffix: str, validate: str, nulls_equal: bool, coalesce: bool, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'], description: str) -> 'FlowFrame': ...
120
120
 
121
- # Internal constructor to create a FlowFrame instance that wraps an
122
- def _from_existing_node(self, data: LazyFrame, flow_graph: FlowGraph, node_id: int, parent_node_id: typing.Optional[int] = None) -> 'FlowFrame': ...
123
-
124
121
  # Generates the `input_df.sort(...)` Polars code string using pure expression strings.
125
122
  def _generate_sort_polars_code(self, pure_sort_expr_strs: typing.List[str], descending_values: typing.List[bool], nulls_last_values: typing.List[bool], multithreaded: bool, maintain_order: bool) -> str: ...
126
123
 
@@ -231,7 +228,7 @@ class FlowFrame:
231
228
  def interpolate(self, description: Optional[str] = None) -> 'FlowFrame': ...
232
229
 
233
230
  # Add a join operation to the Logical Plan.
234
- def join(self, other, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, how: str = 'inner', left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, suffix: str = '_right', validate: str = None, nulls_equal: bool = False, coalesce: bool = None, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'] = None, description: str = None) -> Any: ...
231
+ def join(self, other, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, how: str = 'inner', left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, suffix: str = '_right', validate: str = None, nulls_equal: bool = False, coalesce: bool = None, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'] = None, description: str = None) -> 'FlowFrame': ...
235
232
 
236
233
  # Perform an asof join.
237
234
  def join_asof(self, other: FlowFrame, left_on: str | None | Expr = None, right_on: str | None | Expr = None, on: str | None | Expr = None, by_left: str | Sequence[str] | None = None, by_right: str | Sequence[str] | None = None, by: str | Sequence[str] | None = None, strategy: AsofJoinStrategy = 'backward', suffix: str = '_right', tolerance: str | int | float | timedelta | None = None, allow_parallel: bool = True, force_parallel: bool = False, coalesce: bool = True, allow_exact_matches: bool = True, check_sortedness: bool = True, description: Optional[str] = None) -> 'FlowFrame': ...