Flowfile 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (145) hide show
  1. flowfile/__init__.py +27 -6
  2. flowfile/api.py +1 -0
  3. flowfile/web/__init__.py +2 -2
  4. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
  5. flowfile/web/static/assets/CloudConnectionManager-c20a740f.js +783 -0
  6. flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
  7. flowfile/web/static/assets/CloudStorageReader-960b400a.js +437 -0
  8. flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
  9. flowfile/web/static/assets/CloudStorageWriter-e3decbdd.js +430 -0
  10. flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-d67e2405.js} +8 -8
  11. flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
  12. flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-9ea35e84.js} +2 -2
  13. flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-9578bfa5.js} +9 -9
  14. flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-19531098.js} +9 -9
  15. flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-40476474.js} +47141 -43697
  16. flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-2297ef96.js} +6 -6
  17. flowfile/web/static/assets/{Filter-f87bb897.js → Filter-f211c03a.js} +8 -8
  18. flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
  19. flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-4207ea31.js} +75 -9
  20. flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-bf120df0.js} +9 -9
  21. flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-5bb7497a.js} +5 -5
  22. flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-92c81b65.js} +6 -6
  23. flowfile/web/static/assets/{Join-eec38203.js → Join-4e49a274.js} +23 -15
  24. flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
  25. flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-90998ae8.js} +106 -34
  26. flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
  27. flowfile/web/static/assets/{Output-3b2ca045.js → Output-81e3e917.js} +4 -4
  28. flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-a3419842.js} +6 -6
  29. flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-72710deb.js} +6 -6
  30. flowfile/web/static/assets/{Read-07acdc9a.js → Read-c4059daf.js} +6 -6
  31. flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-c2b5e095.js} +5 -5
  32. flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-10baf191.js} +6 -6
  33. flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-3ed9a0ae.js} +5 -5
  34. flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-0d49c0e8.js} +2 -2
  35. flowfile/web/static/assets/{Select-32b28406.js → Select-8a02a0b3.js} +8 -8
  36. flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-4c0f45f5.js} +1 -1
  37. flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-f55c9f9d.js} +6 -6
  38. flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-5dbc2145.js} +8 -8
  39. flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-a1768e52.js} +2 -2
  40. flowfile/web/static/assets/{Union-39eecc6c.js → Union-f2aefdc9.js} +5 -5
  41. flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-46b250da.js} +8 -8
  42. flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-25ac84cc.js} +5 -5
  43. flowfile/web/static/assets/api-6ef0dcef.js +80 -0
  44. flowfile/web/static/assets/{api-44ca9e9c.js → api-a0abbdc7.js} +1 -1
  45. flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
  46. flowfile/web/static/assets/{designer-267d44f1.js → designer-13eabd83.js} +36 -34
  47. flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-b87e7f6f.js} +1 -1
  48. flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-13564764.js} +1 -1
  49. flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-fd2cd6f9.js} +2 -2
  50. flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-71e11604.js} +3 -3
  51. flowfile/web/static/assets/{index-e235a8bc.js → index-f6c15e76.js} +59 -22
  52. flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-988d9efe.js} +3 -3
  53. flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-dd636aa2.js} +1 -1
  54. flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-af36165e.js} +3 -3
  55. flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-2847001e.js} +2 -1
  56. flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-0371da73.js} +1 -1
  57. flowfile/web/static/index.html +1 -1
  58. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/METADATA +9 -4
  59. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/RECORD +131 -124
  60. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +2 -0
  61. flowfile_core/__init__.py +3 -0
  62. flowfile_core/auth/jwt.py +39 -0
  63. flowfile_core/configs/node_store/nodes.py +9 -6
  64. flowfile_core/configs/settings.py +6 -5
  65. flowfile_core/database/connection.py +63 -15
  66. flowfile_core/database/init_db.py +0 -1
  67. flowfile_core/database/models.py +49 -2
  68. flowfile_core/flowfile/code_generator/code_generator.py +472 -17
  69. flowfile_core/flowfile/connection_manager/models.py +1 -1
  70. flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
  71. flowfile_core/flowfile/extensions.py +1 -1
  72. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
  73. flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
  74. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1062 -311
  75. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
  76. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
  77. flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
  78. flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
  79. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
  80. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
  81. flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
  82. flowfile_core/flowfile/flow_graph.py +718 -253
  83. flowfile_core/flowfile/flow_graph_utils.py +2 -2
  84. flowfile_core/flowfile/flow_node/flow_node.py +563 -117
  85. flowfile_core/flowfile/flow_node/models.py +154 -20
  86. flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
  87. flowfile_core/flowfile/handler.py +2 -33
  88. flowfile_core/flowfile/manage/open_flowfile.py +1 -2
  89. flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
  90. flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
  91. flowfile_core/flowfile/util/calculate_layout.py +0 -2
  92. flowfile_core/flowfile/utils.py +35 -26
  93. flowfile_core/main.py +35 -15
  94. flowfile_core/routes/cloud_connections.py +77 -0
  95. flowfile_core/routes/logs.py +2 -7
  96. flowfile_core/routes/public.py +1 -0
  97. flowfile_core/routes/routes.py +130 -90
  98. flowfile_core/routes/secrets.py +72 -14
  99. flowfile_core/schemas/__init__.py +8 -0
  100. flowfile_core/schemas/cloud_storage_schemas.py +215 -0
  101. flowfile_core/schemas/input_schema.py +121 -71
  102. flowfile_core/schemas/output_model.py +19 -3
  103. flowfile_core/schemas/schemas.py +150 -12
  104. flowfile_core/schemas/transform_schema.py +175 -35
  105. flowfile_core/utils/utils.py +40 -1
  106. flowfile_core/utils/validate_setup.py +41 -0
  107. flowfile_frame/__init__.py +9 -1
  108. flowfile_frame/cloud_storage/frame_helpers.py +39 -0
  109. flowfile_frame/cloud_storage/secret_manager.py +73 -0
  110. flowfile_frame/expr.py +28 -1
  111. flowfile_frame/expr.pyi +76 -61
  112. flowfile_frame/flow_frame.py +481 -208
  113. flowfile_frame/flow_frame.pyi +140 -91
  114. flowfile_frame/flow_frame_methods.py +160 -22
  115. flowfile_frame/group_frame.py +3 -0
  116. flowfile_frame/utils.py +25 -3
  117. flowfile_worker/external_sources/s3_source/main.py +216 -0
  118. flowfile_worker/external_sources/s3_source/models.py +142 -0
  119. flowfile_worker/funcs.py +51 -6
  120. flowfile_worker/models.py +22 -2
  121. flowfile_worker/routes.py +40 -38
  122. flowfile_worker/utils.py +1 -1
  123. test_utils/s3/commands.py +46 -0
  124. test_utils/s3/data_generator.py +292 -0
  125. test_utils/s3/demo_data_generator.py +186 -0
  126. test_utils/s3/fixtures.py +214 -0
  127. flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
  128. flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
  129. flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
  130. flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
  131. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
  132. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
  133. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
  134. flowfile_core/schemas/defaults.py +0 -9
  135. flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
  136. flowfile_core/schemas/models.py +0 -193
  137. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
  138. flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
  139. flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
  140. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  141. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
  142. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
  143. {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_frame/cloud_storage}/__init__.py +0 -0
  144. {flowfile_core/schemas/external_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
  145. {flowfile_worker/external_sources/airbyte_sources → test_utils/s3}/__init__.py +0 -0
@@ -1,9 +1,10 @@
1
+
1
2
  from dataclasses import dataclass
2
- from typing import Optional, Any, List, Dict, Literal
3
+ from typing import Optional, Any, List, Dict, Literal, Iterable
4
+
3
5
  from flowfile_core.schemas import input_schema
4
6
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
5
7
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.polars_type import PlType
6
- from polars import datatypes
7
8
  import polars as pl
8
9
  # TODO: rename flow_file_column to flowfile_column
9
10
  DataTypeGroup = Literal['numeric', 'str', 'date']
@@ -175,3 +176,12 @@ def convert_stats_to_column_info(stats: List[Dict]) -> List[FlowfileColumn]:
175
176
  def convert_pl_schema_to_raw_data_format(pl_schema: pl.Schema) -> List[input_schema.MinimalFieldInfo]:
176
177
  return [FlowfileColumn.create_from_polars_type(PlType(column_name=k, pl_datatype=v)).get_minimal_field_info()
177
178
  for k, v in pl_schema.items()]
179
+
180
+
181
+ def assert_if_flowfile_schema(obj: Iterable) -> bool:
182
+ """
183
+ Assert that the object is a valid iterable of FlowfileColumn objects.
184
+ """
185
+ if isinstance(obj, (list, set, tuple)):
186
+ return all(isinstance(item, FlowfileColumn) for item in obj)
187
+ return False
@@ -32,7 +32,7 @@ def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInput,
32
32
  output_schema.append(FlowfileColumn.from_input(column.new_name, column_schema.data_type,
33
33
  example_values=column_schema.example_values))
34
34
 
35
- for i, fm in enumerate(fm_input.join_mappings):
35
+ for i, fm in enumerate(fm_input.join_mapping):
36
36
  output_schema.append(FlowfileColumn.from_input(f'fuzzy_score_{i}', 'Float64'))
37
37
  return output_schema
38
38
 
@@ -1 +1,2 @@
1
- from flowfile_core.flowfile.flow_data_engine.join.verify_integrity import *
1
+ from flowfile_core.flowfile.flow_data_engine.join.verify_integrity import *
2
+ from flowfile_core.flowfile.flow_data_engine.join.utils import *
@@ -0,0 +1,25 @@
1
+ # Standard library imports
2
+ from typing import Dict, Tuple, TypeVar
3
+
4
+ # Third-party imports
5
+ import polars as pl
6
+
7
+ from flowfile_core.schemas import (
8
+ transform_schema as transform_schemas
9
+ )
10
+
11
+ T = TypeVar('T', pl.DataFrame, pl.LazyFrame)
12
+
13
+
14
+ def rename_df_table_for_join(left_df: T, right_df: T, join_key_rename: transform_schemas.FullJoinKeyResponse) -> Tuple[T, T]:
15
+ return (left_df.rename({r[0]: r[1] for r in join_key_rename.left.join_key_renames}),
16
+ right_df.rename({r[0]: r[1] for r in join_key_rename.right.join_key_renames}))
17
+
18
+
19
+ def get_undo_rename_mapping_join(join_input: transform_schemas.JoinInput) -> Dict[str, str]:
20
+ join_key_rename = join_input.get_join_key_renames(True)
21
+ return {r[1]: r[0] for r in join_key_rename.right.join_key_renames + join_key_rename.left.join_key_renames}
22
+
23
+
24
+ def get_col_name_to_delete(col: transform_schemas.SelectInput, side: transform_schemas.SideLit):
25
+ return col.new_name if not col.join_key else transform_schemas.construct_join_key_name(side, col.new_name)
@@ -121,6 +121,7 @@ class PolarsCodeParser:
121
121
  """
122
122
 
123
123
  def __init__(self):
124
+ import datetime
124
125
  self.safe_globals = {
125
126
  # Polars functionality
126
127
  'pl': pl,
@@ -175,7 +176,8 @@ class PolarsCodeParser:
175
176
  'False': False,
176
177
  'None': None,
177
178
  'time': time,
178
- 'BytesIO': BytesIO
179
+ 'BytesIO': BytesIO,
180
+ 'datetime': datetime,
179
181
  }
180
182
 
181
183
  @staticmethod
@@ -18,9 +18,9 @@ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.models import
18
18
  PolarsOperation,
19
19
  Status
20
20
  )
21
- from flowfile_core.flowfile.sources.external_sources.airbyte_sources.models import AirbyteSettings
22
21
  from flowfile_core.flowfile.sources.external_sources.sql_source.models import (DatabaseExternalReadSettings,
23
22
  DatabaseExternalWriteSettings)
23
+ from flowfile_core.schemas.cloud_storage_schemas import CloudStorageWriteSettingsWorkerInterface
24
24
  from flowfile_core.schemas.input_schema import (
25
25
  ReceivedCsvTable,
26
26
  ReceivedExcelTable,
@@ -81,13 +81,6 @@ def trigger_create_operation(flow_id: int, node_id: int | str, received_table: R
81
81
  return Status(**f.json())
82
82
 
83
83
 
84
- def trigger_airbyte_collector(airbyte_settings: AirbyteSettings):
85
- f = requests.post(url=f'{WORKER_URL}/store_airbyte_result', data=airbyte_settings.model_dump_json())
86
- if not f.ok:
87
- raise Exception(f'Could not cache the data, {f.text}')
88
- return Status(**f.json())
89
-
90
-
91
84
  def trigger_database_read_collector(database_external_read_settings: DatabaseExternalReadSettings):
92
85
  f = requests.post(url=f'{WORKER_URL}/store_database_read_result',
93
86
  data=database_external_read_settings.model_dump_json())
@@ -104,6 +97,14 @@ def trigger_database_write(database_external_write_settings: DatabaseExternalWri
104
97
  return Status(**f.json())
105
98
 
106
99
 
100
+ def trigger_cloud_storage_write(database_external_write_settings: CloudStorageWriteSettingsWorkerInterface):
101
+ f = requests.post(url=f'{WORKER_URL}/write_data_to_cloud',
102
+ data=database_external_write_settings.model_dump_json())
103
+ if not f.ok:
104
+ raise Exception(f'Could not cache the data, {f.text}')
105
+ return Status(**f.json())
106
+
107
+
107
108
  def get_results(file_ref: str) -> Status | None:
108
109
  f = requests.get(f'{WORKER_URL}/status/{file_ref}')
109
110
  if f.status_code == 200:
@@ -113,11 +114,15 @@ def get_results(file_ref: str) -> Status | None:
113
114
 
114
115
 
115
116
  def results_exists(file_ref: str):
116
- f = requests.get(f'{WORKER_URL}/status/{file_ref}')
117
- if f.status_code == 200:
118
- if f.json()['status'] == 'Completed':
119
- return True
120
- return False
117
+ try:
118
+ f = requests.get(f'{WORKER_URL}/status/{file_ref}')
119
+ if f.status_code == 200:
120
+ if f.json()['status'] == 'Completed':
121
+ return True
122
+ return False
123
+ except requests.RequestException as e:
124
+ logger.error(f"Failed to check results existence: {str(e)}")
125
+ return False
121
126
 
122
127
 
123
128
  def get_df_result(encoded_df: str) -> pl.LazyFrame:
@@ -336,15 +341,6 @@ class ExternalCreateFetcher(BaseFetcher):
336
341
  _ = self.get_result()
337
342
 
338
343
 
339
- class ExternalAirbyteFetcher(BaseFetcher):
340
- def __init__(self, airbyte_settings: AirbyteSettings, wait_on_completion: bool = True):
341
- r = trigger_airbyte_collector(airbyte_settings)
342
- super().__init__(file_ref=r.background_task_id)
343
- self.running = r.status == 'Processing'
344
- if wait_on_completion:
345
- _ = self.get_result()
346
-
347
-
348
344
  class ExternalDatabaseFetcher(BaseFetcher):
349
345
  def __init__(self, database_external_read_settings: DatabaseExternalReadSettings,
350
346
  wait_on_completion: bool = True):
@@ -365,6 +361,17 @@ class ExternalDatabaseWriter(BaseFetcher):
365
361
  _ = self.get_result()
366
362
 
367
363
 
364
+ class ExternalCloudWriter(BaseFetcher):
365
+
366
+ def __init__(self, cloud_storage_write_settings: CloudStorageWriteSettingsWorkerInterface,
367
+ wait_on_completion: bool = True):
368
+ r = trigger_cloud_storage_write(database_external_write_settings=cloud_storage_write_settings)
369
+ super().__init__(file_ref=r.background_task_id)
370
+ self.running = r.status == 'Processing'
371
+ if wait_on_completion:
372
+ _ = self.get_result()
373
+
374
+
368
375
  class ExternalExecutorTracker:
369
376
  result: Optional[pl.LazyFrame]
370
377
  started: bool = False
@@ -3,30 +3,13 @@ from flowfile_core.configs.settings import AVAILABLE_RAM, WORKER_URL
3
3
  from flowfile_core.configs import logger
4
4
  from flowfile_core.flowfile.flow_data_engine.subprocess_operations import ExternalDfFetcher
5
5
  from flowfile_core.flowfile.flow_data_engine.subprocess_operations import Status
6
+ from flowfile_core.utils.utils import standardize_col_dtype
6
7
  import os
7
8
  from typing import List, Dict, Iterable, Callable, Any
8
- from itertools import chain
9
9
  import requests
10
10
  from base64 import encodebytes
11
11
 
12
12
 
13
- def convert_to_string(v):
14
- try:
15
- return str(v)
16
- except:
17
- return None
18
-
19
-
20
- def standardize_col_dtype(vals):
21
- types = set(type(val) for val in vals)
22
- if len(types) == 1:
23
- return vals
24
- elif int in types and float in types:
25
- return vals
26
- else:
27
- return [convert_to_string(v) for v in vals]
28
-
29
-
30
13
  def get_data_type(vals: Iterable[Any]):
31
14
  types = set(type(val) for val in vals)
32
15
  if len(types) == 1:
@@ -37,28 +20,6 @@ def get_data_type(vals: Iterable[Any]):
37
20
  return 'str'
38
21
 
39
22
 
40
- def ensure_similarity_dicts(datas: List[Dict], respect_order: bool = True):
41
- all_cols = (data.keys() for data in datas)
42
- if not respect_order:
43
- unique_cols = set(chain(*all_cols))
44
- else:
45
- col_store = set()
46
- unique_cols = list()
47
- for row in all_cols:
48
- for col in row:
49
- if col not in col_store:
50
- unique_cols.append(col)
51
- col_store.update((col,))
52
- output = []
53
- for data in datas:
54
- new_record = dict()
55
- for col in unique_cols:
56
- val = data.get(col)
57
- new_record[col] = val
58
- output.append(new_record)
59
- return output
60
-
61
-
62
23
  def calculate_schema(lf: pl.LazyFrame) -> List[Dict]:
63
24
  r = ExternalDfFetcher(lf=lf, operation_type='calculate_schema', wait_on_completion=False, flow_id=-1, node_id=-1)
64
25
  schema_stats: List[Dict] = r.get_result()