Flowfile 0.3.4.1__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (122) hide show
  1. flowfile/__init__.py +3 -3
  2. flowfile/api.py +36 -15
  3. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
  4. flowfile/web/static/assets/CloudConnectionManager-d004942f.js +784 -0
  5. flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
  6. flowfile/web/static/assets/CloudStorageReader-eccf9fc2.js +437 -0
  7. flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
  8. flowfile/web/static/assets/CloudStorageWriter-b1ba6bba.js +430 -0
  9. flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-68981877.js} +8 -8
  10. flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-0b06649c.js} +2 -2
  11. flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-8349a426.js} +2 -2
  12. flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-905344f8.js} +9 -9
  13. flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-9f5b8638.js} +9 -9
  14. flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-131a6d53.js} +5 -5
  15. flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-e3549dcc.js} +6 -6
  16. flowfile/web/static/assets/{Filter-f87bb897.js → Filter-6e0730ae.js} +8 -8
  17. flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-02f033e6.js} +75 -9
  18. flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
  19. flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-54c14036.js} +9 -9
  20. flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-08a3f499.js} +5 -5
  21. flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-2ae38139.js} +6 -6
  22. flowfile/web/static/assets/{Join-eec38203.js → Join-493b9772.js} +23 -15
  23. flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
  24. flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-4373d163.js} +106 -34
  25. flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
  26. flowfile/web/static/assets/{Output-3b2ca045.js → Output-b534f3c7.js} +4 -4
  27. flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-2968ff65.js} +6 -6
  28. flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-65136536.js} +6 -6
  29. flowfile/web/static/assets/{Read-07acdc9a.js → Read-c56339ed.js} +6 -6
  30. flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-1c641a5e.js} +5 -5
  31. flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-df308b8f.js} +6 -6
  32. flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-293e8a64.js} +5 -5
  33. flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-03911655.js} +2 -2
  34. flowfile/web/static/assets/{Select-32b28406.js → Select-3058a13d.js} +8 -8
  35. flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-fbf4fb39.js} +1 -1
  36. flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-a29bbaf7.js} +6 -6
  37. flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-c7d7760e.js} +8 -8
  38. flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-118f1d20.js} +2 -2
  39. flowfile/web/static/assets/{Union-39eecc6c.js → Union-f0589571.js} +5 -5
  40. flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-7329a207.js} +8 -8
  41. flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-30b0be15.js} +5 -5
  42. flowfile/web/static/assets/{api-44ca9e9c.js → api-602fb95c.js} +1 -1
  43. flowfile/web/static/assets/api-fb67319c.js +80 -0
  44. flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
  45. flowfile/web/static/assets/{designer-267d44f1.js → designer-94a6bf4d.js} +36 -34
  46. flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-a224831e.js} +1 -1
  47. flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-c2d2aa97.js} +1 -1
  48. flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-921ac5fd.js} +2 -2
  49. flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-7013cc94.js} +3 -3
  50. flowfile/web/static/assets/{index-e235a8bc.js → index-3a75211d.js} +19 -6
  51. flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-a63d4680.js} +3 -3
  52. flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-763aec6e.js} +1 -1
  53. flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-08464729.js} +3 -3
  54. flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-f15a5f87.js} +2 -1
  55. flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-93bd09d7.js} +1 -1
  56. flowfile/web/static/index.html +1 -1
  57. {flowfile-0.3.4.1.dist-info → flowfile-0.3.6.dist-info}/METADATA +8 -3
  58. {flowfile-0.3.4.1.dist-info → flowfile-0.3.6.dist-info}/RECORD +109 -104
  59. {flowfile-0.3.4.1.dist-info → flowfile-0.3.6.dist-info}/entry_points.txt +2 -0
  60. flowfile_core/__init__.py +2 -0
  61. flowfile_core/configs/node_store/nodes.py +8 -6
  62. flowfile_core/database/connection.py +63 -15
  63. flowfile_core/database/init_db.py +0 -1
  64. flowfile_core/database/models.py +49 -2
  65. flowfile_core/flowfile/code_generator/code_generator.py +402 -18
  66. flowfile_core/flowfile/connection_manager/models.py +1 -1
  67. flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
  68. flowfile_core/flowfile/extensions.py +1 -1
  69. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
  70. flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
  71. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +522 -59
  72. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
  73. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
  74. flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
  75. flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
  76. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
  77. flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
  78. flowfile_core/flowfile/flow_graph.py +119 -82
  79. flowfile_core/flowfile/flow_node/flow_node.py +68 -33
  80. flowfile_core/flowfile/flow_node/models.py +32 -3
  81. flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
  82. flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
  83. flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
  84. flowfile_core/flowfile/utils.py +1 -23
  85. flowfile_core/main.py +3 -2
  86. flowfile_core/routes/cloud_connections.py +81 -0
  87. flowfile_core/routes/logs.py +0 -1
  88. flowfile_core/routes/routes.py +3 -39
  89. flowfile_core/schemas/cloud_storage_schemas.py +215 -0
  90. flowfile_core/schemas/input_schema.py +37 -15
  91. flowfile_core/schemas/schemas.py +7 -2
  92. flowfile_core/schemas/transform_schema.py +97 -22
  93. flowfile_core/utils/utils.py +40 -1
  94. flowfile_core/utils/validate_setup.py +41 -0
  95. flowfile_frame/flow_frame.py +253 -102
  96. flowfile_frame/flow_frame_methods.py +13 -13
  97. flowfile_worker/external_sources/s3_source/main.py +216 -0
  98. flowfile_worker/external_sources/s3_source/models.py +142 -0
  99. flowfile_worker/funcs.py +51 -6
  100. flowfile_worker/models.py +22 -2
  101. flowfile_worker/routes.py +40 -38
  102. flowfile_worker/utils.py +1 -1
  103. test_utils/s3/commands.py +46 -0
  104. test_utils/s3/data_generator.py +291 -0
  105. test_utils/s3/fixtures.py +209 -0
  106. flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
  107. flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
  108. flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
  109. flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
  110. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
  111. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
  112. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
  113. flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
  114. flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
  115. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
  116. flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
  117. flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
  118. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  119. {flowfile-0.3.4.1.dist-info → flowfile-0.3.6.dist-info}/LICENSE +0 -0
  120. {flowfile-0.3.4.1.dist-info → flowfile-0.3.6.dist-info}/WHEEL +0 -0
  121. {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
  122. {flowfile_core/schemas/external_sources → test_utils/s3}/__init__.py +0 -0
@@ -13,7 +13,7 @@ from flowfile_core.configs.node_store import nodes as node_interface
13
13
  from flowfile_core.flowfile.setting_generator import setting_generator, setting_updator
14
14
  from time import sleep
15
15
  from flowfile_core.flowfile.flow_data_engine.subprocess_operations import (
16
- ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result, ExternalDatabaseFetcher, ExternalDatabaseWriter)
16
+ ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result, ExternalDatabaseFetcher, ExternalDatabaseWriter, ExternalCloudWriter)
17
17
  from flowfile_core.flowfile.flow_node.models import (NodeStepSettings, NodeStepInputs, NodeSchemaInformation,
18
18
  NodeStepStats, NodeResults)
19
19
  from flowfile_core.flowfile.flow_node.schema_callback import SingleExecutionFuture
@@ -31,13 +31,14 @@ class FlowNode:
31
31
  results: NodeResults
32
32
  node_information: Optional[schemas.NodeInformation] = None
33
33
  leads_to_nodes: List["FlowNode"] = [] # list with target flows, after execution the step will trigger those step(s)
34
+ user_provided_schema_callback: Optional[Callable] = None # user provided callback function for schema calculation
34
35
  _setting_input: Any = None
35
36
  _hash: Optional[str] = None # host this for caching results
36
37
  _function: Callable = None # the function that needs to be executed when triggered
37
38
  _schema_callback: Optional[SingleExecutionFuture] = None # Function that calculates the schema without executing
38
39
  _state_needs_reset: bool = False
39
- _fetch_cached_df: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter] = None
40
- _cache_progress: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter] = None
40
+ _fetch_cached_df: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter | ExternalCloudWriter] = None
41
+ _cache_progress: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter | ExternalCloudWriter] = None
41
42
 
42
43
  def post_init(self):
43
44
  self.node_inputs = NodeStepInputs()
@@ -60,8 +61,29 @@ class FlowNode:
60
61
  def state_needs_reset(self, v: bool):
61
62
  self._state_needs_reset = v
62
63
 
64
+ @staticmethod
65
+ def create_schema_callback_from_function(f: Callable) -> Callable[[], List[FlowfileColumn]]:
66
+ """
67
+ Create a schema callback from a function.
68
+ :param f: Function that returns the schema
69
+ :return: Callable that returns the schema
70
+ """
71
+ def schema_callback() -> List[FlowfileColumn]:
72
+ try:
73
+ logger.info('Executing the schema callback function based on the node function')
74
+ return f().schema
75
+ except Exception as e:
76
+ logger.warning(f'Error with the schema callback: {e}')
77
+ return []
78
+ return schema_callback
79
+
63
80
  @property
64
- def schema_callback(self):
81
+ def schema_callback(self) -> SingleExecutionFuture:
82
+ if self._schema_callback is None:
83
+ if self.user_provided_schema_callback is not None:
84
+ self.schema_callback = self.user_provided_schema_callback
85
+ elif self.is_start:
86
+ self.schema_callback = self.create_schema_callback_from_function(self._function)
65
87
  return self._schema_callback
66
88
 
67
89
  @schema_callback.setter
@@ -76,7 +98,6 @@ class FlowNode:
76
98
  return []
77
99
 
78
100
  self._schema_callback = SingleExecutionFuture(f, error_callback)
79
- self._schema_callback.start()
80
101
 
81
102
  @property
82
103
  def is_start(self) -> bool:
@@ -133,13 +154,13 @@ class FlowNode:
133
154
  pos_y: float = 0,
134
155
  schema_callback: Callable = None,
135
156
  ):
136
-
137
- self.schema_callback = schema_callback
157
+ self.user_provided_schema_callback = schema_callback
138
158
  self.node_information.y_position = pos_y
139
159
  self.node_information.x_position = pos_x
140
160
  self.node_information.setting_input = setting_input
141
161
  self.name = self.node_type if name is None else name
142
162
  self._function = function
163
+
143
164
  self.node_schema.input_columns = [] if input_columns is None else input_columns
144
165
  self.node_schema.output_columns = [] if output_schema is None else output_schema
145
166
  self.node_schema.drop_columns = [] if drop_columns is None else drop_columns
@@ -147,7 +168,6 @@ class FlowNode:
147
168
  if hasattr(setting_input, 'cache_results'):
148
169
  self.node_settings.cache_results = setting_input.cache_results
149
170
 
150
- self.setting_input = setting_input
151
171
  self.results.errors = None
152
172
  self.add_lead_to_in_depend_source()
153
173
  _ = self.hash
@@ -155,6 +175,7 @@ class FlowNode:
155
175
  if self.node_template is None:
156
176
  raise Exception(f'Node template {self.node_type} not found')
157
177
  self.node_default = node_interface.node_defaults.get(self.node_type)
178
+ self.setting_input = setting_input # wait until the end so that the hash is calculated correctly
158
179
 
159
180
  @property
160
181
  def name(self):
@@ -171,11 +192,17 @@ class FlowNode:
171
192
 
172
193
  @setting_input.setter
173
194
  def setting_input(self, setting_input: Any):
195
+ is_manual_input = (self.node_type == 'manual_input' and
196
+ isinstance(setting_input, input_schema.NodeManualInput) and
197
+ isinstance(self._setting_input, input_schema.NodeManualInput)
198
+ )
199
+ if is_manual_input:
200
+ _ = self.hash
174
201
  self._setting_input = setting_input
175
202
  self.set_node_information()
176
- if self.node_type == 'manual_input' and isinstance(self._setting_input, input_schema.NodeManualInput):
177
- if self.hash != self.calculate_hash(setting_input) or not self.node_stats.has_run:
178
- self.function = self.function.__class__(setting_input.raw_data_format)
203
+ if is_manual_input:
204
+ if self.hash != self.calculate_hash(setting_input) or not self.node_stats.has_run_with_current_setup:
205
+ self.function = FlowDataEngine(setting_input.raw_data_format)
179
206
  self.reset()
180
207
  self.get_predicted_schema()
181
208
  elif self._setting_input is not None:
@@ -291,7 +318,7 @@ class FlowNode:
291
318
  Method to get a predicted schema based on the columns that are dropped and added
292
319
  :return:
293
320
  """
294
- if self.node_schema.predicted_schema is not None and not force:
321
+ if self.node_schema.predicted_schema and not force:
295
322
  return self.node_schema.predicted_schema
296
323
  if self.schema_callback is not None and (self.node_schema.predicted_schema is None or force):
297
324
  self.print('Getting the data from a schema callback')
@@ -299,7 +326,7 @@ class FlowNode:
299
326
  # Force the schema callback to reset, so that it will be executed again
300
327
  self.schema_callback.reset()
301
328
  schema = self.schema_callback()
302
- if schema is not None:
329
+ if schema is not None and len(schema) > 0:
303
330
  self.print('Calculating the schema based on the schema callback')
304
331
  self.node_schema.predicted_schema = schema
305
332
  return self.node_schema.predicted_schema
@@ -320,14 +347,14 @@ class FlowNode:
320
347
  def print(self, v: Any):
321
348
  logger.info(f'{self.node_type}, node_id: {self.node_id}: {v}')
322
349
 
323
- def get_resulting_data(self) -> FlowDataEngine:
350
+ def get_resulting_data(self) -> FlowDataEngine | None:
324
351
  if self.is_setup:
325
352
  if self.results.resulting_data is None and self.results.errors is None:
326
353
  self.print('getting resulting data')
327
354
  try:
328
355
  if isinstance(self.function, FlowDataEngine):
329
356
  fl: FlowDataEngine = self.function
330
- elif self.node_type in ('external_source', 'airbyte_reader'):
357
+ elif self.node_type == 'external_source':
331
358
  fl: FlowDataEngine = self.function()
332
359
  fl.collect_external()
333
360
  self.node_settings.streamable = False
@@ -342,11 +369,12 @@ class FlowNode:
342
369
  except Exception as e:
343
370
  self.results.resulting_data = FlowDataEngine()
344
371
  self.results.errors = str(e)
345
- self.node_stats.has_run = False
372
+ self.node_stats.has_run_with_current_setup = False
373
+ self.node_stats.has_completed_last_run = False
346
374
  raise e
347
375
  return self.results.resulting_data
348
376
 
349
- def _predicted_data_getter(self) -> FlowDataEngine|None:
377
+ def _predicted_data_getter(self) -> FlowDataEngine | None:
350
378
  try:
351
379
  fl = self._function(*[v.get_predicted_resulting_data() for v in self.all_inputs])
352
380
  return fl
@@ -365,6 +393,7 @@ class FlowNode:
365
393
  def get_predicted_resulting_data(self) -> FlowDataEngine:
366
394
  if self.needs_run(False) and self.schema_callback is not None or self.node_schema.result_schema is not None:
367
395
  self.print('Getting data based on the schema')
396
+
368
397
  _s = self.schema_callback() if self.node_schema.result_schema is None else self.node_schema.result_schema
369
398
  return FlowDataEngine.create_from_schema(_s)
370
399
  else:
@@ -425,7 +454,7 @@ class FlowNode:
425
454
  return False
426
455
  flow_logger = logger if node_logger is None else node_logger
427
456
  cache_result_exists = results_exists(self.hash)
428
- if not self.node_stats.has_run:
457
+ if not self.node_stats.has_run_with_current_setup:
429
458
  flow_logger.info('Node has not run, needs to run')
430
459
  return True
431
460
  if self.node_settings.cache_results and cache_result_exists:
@@ -449,17 +478,18 @@ class FlowNode:
449
478
  wait_on_completion=True, node_id=self.node_id, flow_id=flow_id)
450
479
  self.store_example_data_generator(external_sampler)
451
480
  if self.results.errors is None and not self.node_stats.is_canceled:
452
- self.node_stats.has_run = True
481
+ self.node_stats.has_run_with_current_setup = True
453
482
  self.node_schema.result_schema = resulting_data.schema
454
483
 
455
484
  except Exception as e:
456
485
  logger.warning(f"Error with step {self.__name__}")
457
486
  logger.error(str(e))
458
487
  self.results.errors = str(e)
459
- self.node_stats.has_run = False
488
+ self.node_stats.has_run_with_current_setup = False
489
+ self.node_stats.has_completed_last_run = False
460
490
  raise e
461
491
 
462
- if self.node_stats.has_run:
492
+ if self.node_stats.has_run_with_current_setup:
463
493
  for step in self.leads_to_nodes:
464
494
  if not self.node_settings.streamable:
465
495
  step.node_settings.streamable = self.node_settings.streamable
@@ -477,7 +507,7 @@ class FlowNode:
477
507
  node_logger.warning('Failed to read the cache, rerunning the code')
478
508
  if self.node_type == 'output':
479
509
  self.results.resulting_data = self.get_resulting_data()
480
- self.node_stats.has_run = True
510
+ self.node_stats.has_run_with_current_setup = True
481
511
  return
482
512
  try:
483
513
  self.get_resulting_data()
@@ -498,7 +528,7 @@ class FlowNode:
498
528
  )
499
529
  if not performance_mode:
500
530
  self.store_example_data_generator(external_df_fetcher)
501
- self.node_stats.has_run = True
531
+ self.node_stats.has_run_with_current_setup = True
502
532
 
503
533
  except Exception as e:
504
534
  node_logger.error('Error with external process')
@@ -541,7 +571,8 @@ class FlowNode:
541
571
  # node_logger = flow_logger.get_node_logger(self.node_id)
542
572
  if reset_cache:
543
573
  self.remove_cache()
544
- self.node_stats.has_run = False
574
+ self.node_stats.has_run_with_current_setup = False
575
+ self.node_stats.has_completed_last_run = False
545
576
  if self.is_setup:
546
577
  node_logger.info(f'Starting to run {self.__name__}')
547
578
  if self.needs_run(performance_mode, node_logger, run_location):
@@ -572,7 +603,6 @@ class FlowNode:
572
603
  performance_mode=performance_mode, retry=False,
573
604
  node_logger=node_logger)
574
605
  else:
575
- self.node_stats.has_run = False
576
606
  self.results.errors = str(e)
577
607
  node_logger.error(f'Error with running the node: {e}')
578
608
 
@@ -596,15 +626,19 @@ class FlowNode:
596
626
  needs_reset = self.needs_reset() or deep
597
627
  if needs_reset:
598
628
  logger.info(f'{self.node_id}: Node needs reset')
599
- self.node_stats.has_run = False
629
+ self.node_stats.has_run_with_current_setup = False
600
630
  self.results.reset()
601
- if self.schema_callback:
602
- self.schema_callback.reset()
631
+ if self.is_correct:
632
+ self._schema_callback = None # Ensure the schema callback is reset
633
+ if self.schema_callback:
634
+ logger.info(f'{self.node_id}: Resetting the schema callback')
635
+ self.schema_callback.start()
603
636
  self.node_schema.result_schema = None
604
637
  self.node_schema.predicted_schema = None
605
638
  self._hash = None
606
639
  self.node_information.is_setup = None
607
640
  self.evaluate_nodes()
641
+ _ = self.hash # Recalculate the hash after reset
608
642
 
609
643
  def delete_lead_to_node(self, node_id: int) -> bool:
610
644
  logger.info(f'Deleting lead to node: {node_id}')
@@ -682,10 +716,11 @@ class FlowNode:
682
716
 
683
717
  def get_table_example(self, include_data: bool = False) -> TableExample | None:
684
718
  self.print('Getting a table example')
685
- if self.node_type == 'output':
686
- self.print('getting the table example')
687
- return self.main_input[0].get_table_example(include_data)
688
- if self.is_setup and include_data:
719
+ if self.is_setup and include_data and self.node_stats.has_completed_last_run:
720
+ if self.node_template.node_group == 'output':
721
+ self.print('getting the table example')
722
+ return self.main_input[0].get_table_example(include_data)
723
+
689
724
  logger.info('getting the table example since the node has run')
690
725
  example_data_getter = self.results.example_data_generator
691
726
  if example_data_getter is not None:
@@ -720,7 +755,7 @@ class FlowNode:
720
755
  def get_node_data(self, flow_id: int, include_example: bool = False) -> NodeData:
721
756
  node = NodeData(flow_id=flow_id,
722
757
  node_id=self.node_id,
723
- has_run=self.node_stats.has_run,
758
+ has_run=self.node_stats.has_run_with_current_setup,
724
759
  setting_input=self.setting_input,
725
760
  flow_type=self.node_type)
726
761
  if self.main_input:
@@ -21,10 +21,39 @@ class NodeStepPromise:
21
21
 
22
22
  class NodeStepStats:
23
23
  error: str = None
24
- has_run: bool = False
24
+ _has_run_with_current_setup: bool = False
25
+ has_completed_last_run: bool = False
25
26
  active: bool = True
26
27
  is_canceled: bool = False
27
28
 
29
+ def __init__(self, error: str = None,
30
+ has_run_with_current_setup: bool = False,
31
+ has_completed_last_run: bool = False,
32
+ active: bool = True,
33
+ is_canceled: bool = False):
34
+ self.error = error
35
+ self._has_run_with_current_setup = has_run_with_current_setup
36
+ self.has_completed_last_run = has_completed_last_run
37
+ self.active = active
38
+ self.is_canceled = is_canceled
39
+
40
+ def __repr__(self):
41
+ return (f"NodeStepStats(error={self.error}, has_run_with_current_setup={self.has_run_with_current_setup}, "
42
+ f"has_completed_last_run={self.has_completed_last_run}, "
43
+ f"active={self.active}, is_canceled={self.is_canceled})")
44
+
45
+ @property
46
+ def has_run_with_current_setup(self) -> bool:
47
+ return self._has_run_with_current_setup
48
+
49
+ @has_run_with_current_setup.setter
50
+ def has_run_with_current_setup(self, value: bool):
51
+ if value:
52
+ self._has_run_with_current_setup = True
53
+ self.has_completed_last_run = True
54
+ else:
55
+ self._has_run_with_current_setup = False
56
+
28
57
 
29
58
  class NodeStepSettings:
30
59
  cache_results: bool = False
@@ -66,8 +95,8 @@ class NodeStepInputs:
66
95
 
67
96
 
68
97
  class NodeSchemaInformation:
69
- result_schema: Optional[List[FlowfileColumn]] = [] # resulting schema of the function
70
- predicted_schema: Optional[List[FlowfileColumn]] = [] # predicted resulting schema of the function
98
+ result_schema: Optional[List[FlowfileColumn]] = None # resulting schema of the function
99
+ predicted_schema: Optional[List[FlowfileColumn]] = None # predicted resulting schema of the function
71
100
  input_columns: List[str] = [] # columns that are needed for the function
72
101
  drop_columns: List[str] = [] # columns that will not be available after the function
73
102
  output_columns: List[FlowfileColumn] = [] # columns that will be added with the function
@@ -14,6 +14,7 @@ class SingleExecutionFuture(Generic[T]):
14
14
  func: Callable[[], T]
15
15
  on_error: Optional[Callable[[Exception], Any]]
16
16
  result_value: Optional[T]
17
+ has_run_at_least_once: bool = False # Indicates if the function has been run at least once
17
18
 
18
19
  def __init__(
19
20
  self,
@@ -26,6 +27,7 @@ class SingleExecutionFuture(Generic[T]):
26
27
  self.func = func
27
28
  self.on_error = on_error
28
29
  self.result_value = None
30
+ self.has_run_at_least_once = False
29
31
 
30
32
  def start(self) -> None:
31
33
  """Start the function execution if not already started."""
@@ -35,8 +37,7 @@ class SingleExecutionFuture(Generic[T]):
35
37
 
36
38
  def cleanup(self) -> None:
37
39
  """Clean up resources by clearing the future and shutting down the executor."""
38
- # if self.future:
39
- # self.future = None
40
+ self.has_run_at_least_once = True
40
41
  self.executor.shutdown(wait=False)
41
42
 
42
43
  def __call__(self) -> Optional[T]:
@@ -1,3 +1 @@
1
1
  from flowfile_core.flowfile.sources.external_sources import custom_external_sources
2
- # from flowfile.sources.external_sources.custom_external_sources.external_source import check_for_key_vault_existence
3
- # from flowfile.sources.external_sources.airbyte_sources.airbyte import AirbyteSource
@@ -1,22 +1,19 @@
1
1
  from flowfile_core.flowfile.sources.external_sources.custom_external_sources.external_source import CustomExternalSource
2
- from flowfile_core.flowfile.sources.external_sources.airbyte_sources.airbyte import AirbyteSource
3
2
 
4
3
 
5
- def data_source_factory(source_type: str, **kwargs) -> CustomExternalSource | AirbyteSource:
4
+ def data_source_factory(source_type: str, **kwargs) -> CustomExternalSource:
6
5
  """
7
- Factory function to generate either CustomExternalSource or AirbyteSource.
6
+ Factory function to generate either CustomExternalSource .
8
7
 
9
8
  Args:
10
- source_type (str): The type of source to create ("custom" or "airbyte").
9
+ source_type (str): The type of source to create ("custom").
11
10
  **kwargs: The keyword arguments required for the specific source type.
12
11
 
13
12
  Returns:
14
- Union[CustomExternalSource, AirbyteSource]: An instance of the selected data source type.
13
+ Union[CustomExternalSource]: An instance of the selected data source type.
15
14
  """
16
15
  if source_type == "custom":
17
16
  return CustomExternalSource(**kwargs)
18
- elif source_type == "airbyte":
19
- return AirbyteSource(**kwargs)
20
17
  else:
21
18
  raise ValueError(f"Unknown source type: {source_type}")
22
19
 
@@ -1,16 +1,12 @@
1
1
  import os
2
2
  import hashlib
3
3
  import json
4
- import polars as pl
5
4
  import shutil
6
5
 
7
6
  from datetime import datetime, date, time
8
7
  from typing import List
9
8
  from decimal import Decimal
10
9
 
11
- from flowfile_core.flowfile.flow_data_engine.utils import standardize_col_dtype
12
- from flowfile_core.schemas import input_schema
13
-
14
10
 
15
11
  def generate_sha256_hash(data: bytes):
16
12
  sha256 = hashlib.sha256()
@@ -61,7 +57,7 @@ def get_hash(val):
61
57
  if hasattr(val, 'overridden_hash') and val.overridden_hash():
62
58
  val = hash(val)
63
59
  elif hasattr(val, '__dict__'):
64
- val = {k: v for k, v in val.__dict__.items() if k not in {'pos_x', 'pos_y'}}
60
+ val = {k: v for k, v in val.__dict__.items() if k not in {'pos_x', 'pos_y', 'description'}}
65
61
  elif hasattr(val, 'json'):
66
62
  pass
67
63
  return generate_sha256_hash(json_dumps(val).encode('utf-8'))
@@ -117,21 +113,3 @@ def batch_generator(input_list: List, batch_size: int = 10000):
117
113
  yield input_list
118
114
  input_list = []
119
115
  run = False
120
-
121
-
122
- def _handle_raw_data(node_manual_input: input_schema.NodeManualInput):
123
- """Ensure compatibility with the new typed raw data and the old dict form data type"""
124
- if (not (hasattr(node_manual_input, "raw_data_format") and node_manual_input.raw_data_format)
125
- and (hasattr(node_manual_input, 'raw_data') and node_manual_input.raw_data)):
126
- values = [standardize_col_dtype([vv for vv in c]) for c in zip(*(r.values()
127
- for r in node_manual_input.raw_data))]
128
- data_types = (pl.DataType.from_python(type(next((v for v in column_values), None))) for column_values in values)
129
- _columns = [input_schema.MinimalFieldInfo(name=c, data_type=str(next(data_types))) for c in
130
- node_manual_input.raw_data[0].keys()]
131
-
132
- node_manual_input.raw_data_format = input_schema.RawData(columns=_columns, data=values)
133
- elif ((hasattr(node_manual_input, "raw_data_format") and node_manual_input.raw_data_format)
134
- and not (hasattr(node_manual_input, 'raw_data') and node_manual_input.raw_data)):
135
- node_manual_input.raw_data = [{c.name: node_manual_input.raw_data_format.data[ci][ri] for ci, c in
136
- enumerate(node_manual_input.raw_data_format.columns)}
137
- for ri in range(len(node_manual_input.raw_data_format.data[0]))]
flowfile_core/main.py CHANGED
@@ -8,14 +8,14 @@ from fastapi import FastAPI
8
8
  from fastapi.middleware.cors import CORSMiddleware
9
9
 
10
10
  from flowfile_core import ServerRun
11
- from flowfile_core.configs.settings import (SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL,
12
- OFFLOAD_TO_WORKER)
11
+ from flowfile_core.configs.settings import (SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL,)
13
12
 
14
13
  from flowfile_core.routes.auth import router as auth_router
15
14
  from flowfile_core.routes.secrets import router as secrets_router
16
15
  from flowfile_core.routes.routes import router
17
16
  from flowfile_core.routes.public import router as public_router
18
17
  from flowfile_core.routes.logs import router as logs_router
18
+ from flowfile_core.routes.cloud_connections import router as cloud_connections_router
19
19
 
20
20
  from flowfile_core.configs.flow_logger import clear_all_flow_logs
21
21
 
@@ -72,6 +72,7 @@ app.include_router(router)
72
72
  app.include_router(logs_router, tags=["logs"])
73
73
  app.include_router(auth_router, prefix="/auth", tags=["auth"])
74
74
  app.include_router(secrets_router, prefix="/secrets", tags=["secrets"])
75
+ app.include_router(cloud_connections_router, prefix="/cloud_connections", tags=["cloud_connections"])
75
76
 
76
77
 
77
78
  @app.post("/shutdown")
@@ -0,0 +1,81 @@
1
+ from typing import List
2
+
3
+ from fastapi import HTTPException, Depends, APIRouter
4
+ from sqlalchemy.orm import Session
5
+
6
+ # Core modules
7
+ from flowfile_core.auth.jwt import get_current_active_user
8
+ from flowfile_core.configs import logger
9
+ from flowfile_core.database.connection import get_db
10
+ from flowfile_core.flowfile.database_connection_manager.db_connections import (store_cloud_connection,
11
+ get_cloud_connection_schema,
12
+ get_all_cloud_connections_interface,
13
+ delete_cloud_connection)
14
+ # Schema and models
15
+ from flowfile_core.schemas.cloud_storage_schemas import FullCloudStorageConnection, FullCloudStorageConnectionInterface
16
+
17
+ # External dependencies
18
+ # File handling
19
+ router = APIRouter()
20
+
21
+
22
+ @router.post("/cloud_connection", tags=['cloud_connections'])
23
+ def create_cloud_storage_connection(input_connection: FullCloudStorageConnection,
24
+ current_user=Depends(get_current_active_user),
25
+ db: Session = Depends(get_db)
26
+ ):
27
+ """
28
+ Create a new cloud storage connection.
29
+ Parameters
30
+ ----------
31
+ input_connection: FullCloudStorageConnection schema containing connection details
32
+ current_user: User obtained from Depends(get_current_active_user)
33
+ db: Session obtained from Depends(get_db)
34
+ Returns
35
+ -------
36
+ Dict with a success message
37
+ """
38
+ logger.info(f'Create cloud connection {input_connection.connection_name}')
39
+ try:
40
+ store_cloud_connection(db, input_connection, current_user.id)
41
+ except ValueError:
42
+ raise HTTPException(422, 'Connection name already exists')
43
+ except Exception as e:
44
+ logger.error(e)
45
+ raise HTTPException(422, str(e))
46
+ return {"message": "Cloud connection created successfully"}
47
+
48
+
49
+ @router.delete('/cloud_connection', tags=['cloud_connections'])
50
+ def delete_cloud_connection_with_connection_name(connection_name: str,
51
+ current_user=Depends(get_current_active_user),
52
+ db: Session = Depends(get_db)
53
+ ):
54
+ """
55
+ Delete a cloud connection.
56
+ """
57
+ logger.info(f'Deleting cloud connection {connection_name}')
58
+ cloud_storage_connection = get_cloud_connection_schema(db, connection_name, current_user.id)
59
+ if cloud_storage_connection is None:
60
+ raise HTTPException(404, 'Cloud connection connection not found')
61
+ delete_cloud_connection(db, connection_name, current_user.id)
62
+ return {"message": "Cloud connection deleted successfully"}
63
+
64
+
65
+ @router.get('/cloud_connections', tags=['cloud_connection'],
66
+ response_model=List[FullCloudStorageConnectionInterface])
67
+ def get_cloud_connections(
68
+ db: Session = Depends(get_db),
69
+ current_user=Depends(get_current_active_user)) -> List[FullCloudStorageConnectionInterface]:
70
+ """
71
+ Get all cloud storage connections for the current user.
72
+ Parameters
73
+ ----------
74
+ db: Session obtained from Depends(get_db)
75
+ current_user: User obtained from Depends(get_current_active_user)
76
+
77
+ Returns
78
+ -------
79
+ List[FullCloudStorageConnectionInterface]
80
+ """
81
+ return get_all_cloud_connections_interface(db, current_user.id)
@@ -86,7 +86,6 @@ async def stream_log_file(
86
86
  line = await file.readline()
87
87
  if line:
88
88
  formatted_message = await format_sse_message(line.strip())
89
- logger.info(f'Yielding line: {line.strip()}')
90
89
  yield formatted_message
91
90
  last_active = time.monotonic() # Reset idle timer on activity
92
91
  else:
@@ -27,12 +27,7 @@ from flowfile_core.flowfile.code_generator.code_generator import export_flow_to_
27
27
  from flowfile_core.flowfile.analytics.analytics_processor import AnalyticsProcessor
28
28
  from flowfile_core.flowfile.extensions import get_instant_func_results
29
29
  # Flow handling
30
- from flowfile_core.flowfile.sources.external_sources.airbyte_sources.models import AirbyteConfigTemplate
31
- # Airbyte
32
- from flowfile_core.flowfile.sources.external_sources.airbyte_sources.settings import (
33
- airbyte_config_handler,
34
- AirbyteHandler
35
- )
30
+
36
31
  from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import create_sql_source_from_db_settings
37
32
  from flowfile_core.run_lock import get_flow_run_lock
38
33
  # Schema and models
@@ -49,7 +44,6 @@ from flowfile_core.database.connection import get_db
49
44
 
50
45
 
51
46
 
52
- # Router setup
53
47
  router = APIRouter(dependencies=[Depends(get_current_active_user)])
54
48
 
55
49
  # Initialize services
@@ -57,9 +51,11 @@ file_explorer = FileExplorer('/app/shared' if IS_RUNNING_IN_DOCKER else None)
57
51
 
58
52
 
59
53
  def get_node_model(setting_name_ref: str):
54
+ logger.info("Getting node model for: " + setting_name_ref)
60
55
  for ref_name, ref in inspect.getmodule(input_schema).__dict__.items():
61
56
  if ref_name.lower() == setting_name_ref:
62
57
  return ref
58
+ logger.error(f"Could not find node model for: {setting_name_ref}")
63
59
 
64
60
 
65
61
  @router.post("/upload/")
@@ -384,38 +380,6 @@ def close_flow(flow_id: int) -> None:
384
380
  flow_file_handler.delete_flow(flow_id)
385
381
 
386
382
 
387
- @router.get('/airbyte/available_connectors', tags=['airbyte'])
388
- def get_available_connectors():
389
- return airbyte_config_handler.available_connectors
390
-
391
-
392
- @router.get('/airbyte/available_configs', tags=['airbyte'])
393
- def get_available_configs() -> List[str]:
394
- """
395
- Get the available configurations for the airbyte connectors
396
- Returns: List of available configurations
397
- """
398
- return airbyte_config_handler.available_configs
399
-
400
-
401
- @router.get('/airbyte/config_template', tags=['airbyte'], response_model=AirbyteConfigTemplate)
402
- def get_config_spec(connector_name: str):
403
- a = airbyte_config_handler.get_config('source-' + connector_name)
404
- return a
405
-
406
-
407
- @router.post('/airbyte/set_airbyte_configs_for_streams', tags=['airbyte'])
408
- def set_airbyte_configs_for_streams(airbyte_config: input_schema.AirbyteConfig):
409
- logger.info('Setting airbyte config, update_style = ')
410
- logger.info(f'Setting config for {airbyte_config.source_name}')
411
- logger.debug(f'Config: {airbyte_config.mapped_config_spec}')
412
- airbyte_handler = AirbyteHandler(airbyte_config=airbyte_config)
413
- try:
414
- _ = airbyte_handler.get_available_streams()
415
- except Exception as e:
416
- raise HTTPException(404, str(e))
417
-
418
-
419
383
  @router.post('/update_settings/', tags=['transform'])
420
384
  def add_generic_settings(input_data: Dict[str, Any], node_type: str, current_user=Depends(get_current_active_user)):
421
385
  input_data['user_id'] = current_user.id