Flowfile 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +27 -6
- flowfile/api.py +1 -0
- flowfile/web/__init__.py +2 -2
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
- flowfile/web/static/assets/CloudConnectionManager-c20a740f.js +783 -0
- flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
- flowfile/web/static/assets/CloudStorageReader-960b400a.js +437 -0
- flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
- flowfile/web/static/assets/CloudStorageWriter-e3decbdd.js +430 -0
- flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-d67e2405.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-9ea35e84.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-9578bfa5.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-19531098.js} +9 -9
- flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-40476474.js} +47141 -43697
- flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-2297ef96.js} +6 -6
- flowfile/web/static/assets/{Filter-f87bb897.js → Filter-f211c03a.js} +8 -8
- flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
- flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-4207ea31.js} +75 -9
- flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-bf120df0.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-5bb7497a.js} +5 -5
- flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-92c81b65.js} +6 -6
- flowfile/web/static/assets/{Join-eec38203.js → Join-4e49a274.js} +23 -15
- flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
- flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-90998ae8.js} +106 -34
- flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
- flowfile/web/static/assets/{Output-3b2ca045.js → Output-81e3e917.js} +4 -4
- flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-a3419842.js} +6 -6
- flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-72710deb.js} +6 -6
- flowfile/web/static/assets/{Read-07acdc9a.js → Read-c4059daf.js} +6 -6
- flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-c2b5e095.js} +5 -5
- flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-10baf191.js} +6 -6
- flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-3ed9a0ae.js} +5 -5
- flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-0d49c0e8.js} +2 -2
- flowfile/web/static/assets/{Select-32b28406.js → Select-8a02a0b3.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-4c0f45f5.js} +1 -1
- flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-f55c9f9d.js} +6 -6
- flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-5dbc2145.js} +8 -8
- flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-a1768e52.js} +2 -2
- flowfile/web/static/assets/{Union-39eecc6c.js → Union-f2aefdc9.js} +5 -5
- flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-46b250da.js} +8 -8
- flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-25ac84cc.js} +5 -5
- flowfile/web/static/assets/api-6ef0dcef.js +80 -0
- flowfile/web/static/assets/{api-44ca9e9c.js → api-a0abbdc7.js} +1 -1
- flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
- flowfile/web/static/assets/{designer-267d44f1.js → designer-13eabd83.js} +36 -34
- flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-b87e7f6f.js} +1 -1
- flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-13564764.js} +1 -1
- flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-fd2cd6f9.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-71e11604.js} +3 -3
- flowfile/web/static/assets/{index-e235a8bc.js → index-f6c15e76.js} +59 -22
- flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-988d9efe.js} +3 -3
- flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-dd636aa2.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-af36165e.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-2847001e.js} +2 -1
- flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-0371da73.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/METADATA +9 -4
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/RECORD +131 -124
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +2 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/auth/jwt.py +39 -0
- flowfile_core/configs/node_store/nodes.py +9 -6
- flowfile_core/configs/settings.py +6 -5
- flowfile_core/database/connection.py +63 -15
- flowfile_core/database/init_db.py +0 -1
- flowfile_core/database/models.py +49 -2
- flowfile_core/flowfile/code_generator/code_generator.py +472 -17
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
- flowfile_core/flowfile/extensions.py +1 -1
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1062 -311
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
- flowfile_core/flowfile/flow_graph.py +718 -253
- flowfile_core/flowfile/flow_graph_utils.py +2 -2
- flowfile_core/flowfile/flow_node/flow_node.py +563 -117
- flowfile_core/flowfile/flow_node/models.py +154 -20
- flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
- flowfile_core/flowfile/handler.py +2 -33
- flowfile_core/flowfile/manage/open_flowfile.py +1 -2
- flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
- flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
- flowfile_core/flowfile/util/calculate_layout.py +0 -2
- flowfile_core/flowfile/utils.py +35 -26
- flowfile_core/main.py +35 -15
- flowfile_core/routes/cloud_connections.py +77 -0
- flowfile_core/routes/logs.py +2 -7
- flowfile_core/routes/public.py +1 -0
- flowfile_core/routes/routes.py +130 -90
- flowfile_core/routes/secrets.py +72 -14
- flowfile_core/schemas/__init__.py +8 -0
- flowfile_core/schemas/cloud_storage_schemas.py +215 -0
- flowfile_core/schemas/input_schema.py +121 -71
- flowfile_core/schemas/output_model.py +19 -3
- flowfile_core/schemas/schemas.py +150 -12
- flowfile_core/schemas/transform_schema.py +175 -35
- flowfile_core/utils/utils.py +40 -1
- flowfile_core/utils/validate_setup.py +41 -0
- flowfile_frame/__init__.py +9 -1
- flowfile_frame/cloud_storage/frame_helpers.py +39 -0
- flowfile_frame/cloud_storage/secret_manager.py +73 -0
- flowfile_frame/expr.py +28 -1
- flowfile_frame/expr.pyi +76 -61
- flowfile_frame/flow_frame.py +481 -208
- flowfile_frame/flow_frame.pyi +140 -91
- flowfile_frame/flow_frame_methods.py +160 -22
- flowfile_frame/group_frame.py +3 -0
- flowfile_frame/utils.py +25 -3
- flowfile_worker/external_sources/s3_source/main.py +216 -0
- flowfile_worker/external_sources/s3_source/models.py +142 -0
- flowfile_worker/funcs.py +51 -6
- flowfile_worker/models.py +22 -2
- flowfile_worker/routes.py +40 -38
- flowfile_worker/utils.py +1 -1
- test_utils/s3/commands.py +46 -0
- test_utils/s3/data_generator.py +292 -0
- test_utils/s3/demo_data_generator.py +186 -0
- test_utils/s3/fixtures.py +214 -0
- flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
- flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
- flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
- flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
- flowfile_core/schemas/defaults.py +0 -9
- flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
- flowfile_core/schemas/models.py +0 -193
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
- flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
- flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
- {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_frame/cloud_storage}/__init__.py +0 -0
- {flowfile_core/schemas/external_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
- {flowfile_worker/external_sources/airbyte_sources → test_utils/s3}/__init__.py +0 -0
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
|
|
1
2
|
from dataclasses import dataclass
|
|
2
|
-
from typing import Optional, Any, List, Dict, Literal
|
|
3
|
+
from typing import Optional, Any, List, Dict, Literal, Iterable
|
|
4
|
+
|
|
3
5
|
from flowfile_core.schemas import input_schema
|
|
4
6
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
|
|
5
7
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.polars_type import PlType
|
|
6
|
-
from polars import datatypes
|
|
7
8
|
import polars as pl
|
|
8
9
|
# TODO: rename flow_file_column to flowfile_column
|
|
9
10
|
DataTypeGroup = Literal['numeric', 'str', 'date']
|
|
@@ -175,3 +176,12 @@ def convert_stats_to_column_info(stats: List[Dict]) -> List[FlowfileColumn]:
|
|
|
175
176
|
def convert_pl_schema_to_raw_data_format(pl_schema: pl.Schema) -> List[input_schema.MinimalFieldInfo]:
|
|
176
177
|
return [FlowfileColumn.create_from_polars_type(PlType(column_name=k, pl_datatype=v)).get_minimal_field_info()
|
|
177
178
|
for k, v in pl_schema.items()]
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def assert_if_flowfile_schema(obj: Iterable) -> bool:
|
|
182
|
+
"""
|
|
183
|
+
Assert that the object is a valid iterable of FlowfileColumn objects.
|
|
184
|
+
"""
|
|
185
|
+
if isinstance(obj, (list, set, tuple)):
|
|
186
|
+
return all(isinstance(item, FlowfileColumn) for item in obj)
|
|
187
|
+
return False
|
|
@@ -32,7 +32,7 @@ def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInput,
|
|
|
32
32
|
output_schema.append(FlowfileColumn.from_input(column.new_name, column_schema.data_type,
|
|
33
33
|
example_values=column_schema.example_values))
|
|
34
34
|
|
|
35
|
-
for i, fm in enumerate(fm_input.
|
|
35
|
+
for i, fm in enumerate(fm_input.join_mapping):
|
|
36
36
|
output_schema.append(FlowfileColumn.from_input(f'fuzzy_score_{i}', 'Float64'))
|
|
37
37
|
return output_schema
|
|
38
38
|
|
|
@@ -1 +1,2 @@
|
|
|
1
|
-
from flowfile_core.flowfile.flow_data_engine.join.verify_integrity import *
|
|
1
|
+
from flowfile_core.flowfile.flow_data_engine.join.verify_integrity import *
|
|
2
|
+
from flowfile_core.flowfile.flow_data_engine.join.utils import *
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Standard library imports
|
|
2
|
+
from typing import Dict, Tuple, TypeVar
|
|
3
|
+
|
|
4
|
+
# Third-party imports
|
|
5
|
+
import polars as pl
|
|
6
|
+
|
|
7
|
+
from flowfile_core.schemas import (
|
|
8
|
+
transform_schema as transform_schemas
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
T = TypeVar('T', pl.DataFrame, pl.LazyFrame)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def rename_df_table_for_join(left_df: T, right_df: T, join_key_rename: transform_schemas.FullJoinKeyResponse) -> Tuple[T, T]:
|
|
15
|
+
return (left_df.rename({r[0]: r[1] for r in join_key_rename.left.join_key_renames}),
|
|
16
|
+
right_df.rename({r[0]: r[1] for r in join_key_rename.right.join_key_renames}))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_undo_rename_mapping_join(join_input: transform_schemas.JoinInput) -> Dict[str, str]:
|
|
20
|
+
join_key_rename = join_input.get_join_key_renames(True)
|
|
21
|
+
return {r[1]: r[0] for r in join_key_rename.right.join_key_renames + join_key_rename.left.join_key_renames}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_col_name_to_delete(col: transform_schemas.SelectInput, side: transform_schemas.SideLit):
|
|
25
|
+
return col.new_name if not col.join_key else transform_schemas.construct_join_key_name(side, col.new_name)
|
|
@@ -121,6 +121,7 @@ class PolarsCodeParser:
|
|
|
121
121
|
"""
|
|
122
122
|
|
|
123
123
|
def __init__(self):
|
|
124
|
+
import datetime
|
|
124
125
|
self.safe_globals = {
|
|
125
126
|
# Polars functionality
|
|
126
127
|
'pl': pl,
|
|
@@ -175,7 +176,8 @@ class PolarsCodeParser:
|
|
|
175
176
|
'False': False,
|
|
176
177
|
'None': None,
|
|
177
178
|
'time': time,
|
|
178
|
-
'BytesIO': BytesIO
|
|
179
|
+
'BytesIO': BytesIO,
|
|
180
|
+
'datetime': datetime,
|
|
179
181
|
}
|
|
180
182
|
|
|
181
183
|
@staticmethod
|
|
@@ -18,9 +18,9 @@ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.models import
|
|
|
18
18
|
PolarsOperation,
|
|
19
19
|
Status
|
|
20
20
|
)
|
|
21
|
-
from flowfile_core.flowfile.sources.external_sources.airbyte_sources.models import AirbyteSettings
|
|
22
21
|
from flowfile_core.flowfile.sources.external_sources.sql_source.models import (DatabaseExternalReadSettings,
|
|
23
22
|
DatabaseExternalWriteSettings)
|
|
23
|
+
from flowfile_core.schemas.cloud_storage_schemas import CloudStorageWriteSettingsWorkerInterface
|
|
24
24
|
from flowfile_core.schemas.input_schema import (
|
|
25
25
|
ReceivedCsvTable,
|
|
26
26
|
ReceivedExcelTable,
|
|
@@ -81,13 +81,6 @@ def trigger_create_operation(flow_id: int, node_id: int | str, received_table: R
|
|
|
81
81
|
return Status(**f.json())
|
|
82
82
|
|
|
83
83
|
|
|
84
|
-
def trigger_airbyte_collector(airbyte_settings: AirbyteSettings):
|
|
85
|
-
f = requests.post(url=f'{WORKER_URL}/store_airbyte_result', data=airbyte_settings.model_dump_json())
|
|
86
|
-
if not f.ok:
|
|
87
|
-
raise Exception(f'Could not cache the data, {f.text}')
|
|
88
|
-
return Status(**f.json())
|
|
89
|
-
|
|
90
|
-
|
|
91
84
|
def trigger_database_read_collector(database_external_read_settings: DatabaseExternalReadSettings):
|
|
92
85
|
f = requests.post(url=f'{WORKER_URL}/store_database_read_result',
|
|
93
86
|
data=database_external_read_settings.model_dump_json())
|
|
@@ -104,6 +97,14 @@ def trigger_database_write(database_external_write_settings: DatabaseExternalWri
|
|
|
104
97
|
return Status(**f.json())
|
|
105
98
|
|
|
106
99
|
|
|
100
|
+
def trigger_cloud_storage_write(database_external_write_settings: CloudStorageWriteSettingsWorkerInterface):
|
|
101
|
+
f = requests.post(url=f'{WORKER_URL}/write_data_to_cloud',
|
|
102
|
+
data=database_external_write_settings.model_dump_json())
|
|
103
|
+
if not f.ok:
|
|
104
|
+
raise Exception(f'Could not cache the data, {f.text}')
|
|
105
|
+
return Status(**f.json())
|
|
106
|
+
|
|
107
|
+
|
|
107
108
|
def get_results(file_ref: str) -> Status | None:
|
|
108
109
|
f = requests.get(f'{WORKER_URL}/status/{file_ref}')
|
|
109
110
|
if f.status_code == 200:
|
|
@@ -113,11 +114,15 @@ def get_results(file_ref: str) -> Status | None:
|
|
|
113
114
|
|
|
114
115
|
|
|
115
116
|
def results_exists(file_ref: str):
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
if f.
|
|
119
|
-
|
|
120
|
-
|
|
117
|
+
try:
|
|
118
|
+
f = requests.get(f'{WORKER_URL}/status/{file_ref}')
|
|
119
|
+
if f.status_code == 200:
|
|
120
|
+
if f.json()['status'] == 'Completed':
|
|
121
|
+
return True
|
|
122
|
+
return False
|
|
123
|
+
except requests.RequestException as e:
|
|
124
|
+
logger.error(f"Failed to check results existence: {str(e)}")
|
|
125
|
+
return False
|
|
121
126
|
|
|
122
127
|
|
|
123
128
|
def get_df_result(encoded_df: str) -> pl.LazyFrame:
|
|
@@ -336,15 +341,6 @@ class ExternalCreateFetcher(BaseFetcher):
|
|
|
336
341
|
_ = self.get_result()
|
|
337
342
|
|
|
338
343
|
|
|
339
|
-
class ExternalAirbyteFetcher(BaseFetcher):
|
|
340
|
-
def __init__(self, airbyte_settings: AirbyteSettings, wait_on_completion: bool = True):
|
|
341
|
-
r = trigger_airbyte_collector(airbyte_settings)
|
|
342
|
-
super().__init__(file_ref=r.background_task_id)
|
|
343
|
-
self.running = r.status == 'Processing'
|
|
344
|
-
if wait_on_completion:
|
|
345
|
-
_ = self.get_result()
|
|
346
|
-
|
|
347
|
-
|
|
348
344
|
class ExternalDatabaseFetcher(BaseFetcher):
|
|
349
345
|
def __init__(self, database_external_read_settings: DatabaseExternalReadSettings,
|
|
350
346
|
wait_on_completion: bool = True):
|
|
@@ -365,6 +361,17 @@ class ExternalDatabaseWriter(BaseFetcher):
|
|
|
365
361
|
_ = self.get_result()
|
|
366
362
|
|
|
367
363
|
|
|
364
|
+
class ExternalCloudWriter(BaseFetcher):
|
|
365
|
+
|
|
366
|
+
def __init__(self, cloud_storage_write_settings: CloudStorageWriteSettingsWorkerInterface,
|
|
367
|
+
wait_on_completion: bool = True):
|
|
368
|
+
r = trigger_cloud_storage_write(database_external_write_settings=cloud_storage_write_settings)
|
|
369
|
+
super().__init__(file_ref=r.background_task_id)
|
|
370
|
+
self.running = r.status == 'Processing'
|
|
371
|
+
if wait_on_completion:
|
|
372
|
+
_ = self.get_result()
|
|
373
|
+
|
|
374
|
+
|
|
368
375
|
class ExternalExecutorTracker:
|
|
369
376
|
result: Optional[pl.LazyFrame]
|
|
370
377
|
started: bool = False
|
|
@@ -3,30 +3,13 @@ from flowfile_core.configs.settings import AVAILABLE_RAM, WORKER_URL
|
|
|
3
3
|
from flowfile_core.configs import logger
|
|
4
4
|
from flowfile_core.flowfile.flow_data_engine.subprocess_operations import ExternalDfFetcher
|
|
5
5
|
from flowfile_core.flowfile.flow_data_engine.subprocess_operations import Status
|
|
6
|
+
from flowfile_core.utils.utils import standardize_col_dtype
|
|
6
7
|
import os
|
|
7
8
|
from typing import List, Dict, Iterable, Callable, Any
|
|
8
|
-
from itertools import chain
|
|
9
9
|
import requests
|
|
10
10
|
from base64 import encodebytes
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
def convert_to_string(v):
|
|
14
|
-
try:
|
|
15
|
-
return str(v)
|
|
16
|
-
except:
|
|
17
|
-
return None
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def standardize_col_dtype(vals):
|
|
21
|
-
types = set(type(val) for val in vals)
|
|
22
|
-
if len(types) == 1:
|
|
23
|
-
return vals
|
|
24
|
-
elif int in types and float in types:
|
|
25
|
-
return vals
|
|
26
|
-
else:
|
|
27
|
-
return [convert_to_string(v) for v in vals]
|
|
28
|
-
|
|
29
|
-
|
|
30
13
|
def get_data_type(vals: Iterable[Any]):
|
|
31
14
|
types = set(type(val) for val in vals)
|
|
32
15
|
if len(types) == 1:
|
|
@@ -37,28 +20,6 @@ def get_data_type(vals: Iterable[Any]):
|
|
|
37
20
|
return 'str'
|
|
38
21
|
|
|
39
22
|
|
|
40
|
-
def ensure_similarity_dicts(datas: List[Dict], respect_order: bool = True):
|
|
41
|
-
all_cols = (data.keys() for data in datas)
|
|
42
|
-
if not respect_order:
|
|
43
|
-
unique_cols = set(chain(*all_cols))
|
|
44
|
-
else:
|
|
45
|
-
col_store = set()
|
|
46
|
-
unique_cols = list()
|
|
47
|
-
for row in all_cols:
|
|
48
|
-
for col in row:
|
|
49
|
-
if col not in col_store:
|
|
50
|
-
unique_cols.append(col)
|
|
51
|
-
col_store.update((col,))
|
|
52
|
-
output = []
|
|
53
|
-
for data in datas:
|
|
54
|
-
new_record = dict()
|
|
55
|
-
for col in unique_cols:
|
|
56
|
-
val = data.get(col)
|
|
57
|
-
new_record[col] = val
|
|
58
|
-
output.append(new_record)
|
|
59
|
-
return output
|
|
60
|
-
|
|
61
|
-
|
|
62
23
|
def calculate_schema(lf: pl.LazyFrame) -> List[Dict]:
|
|
63
24
|
r = ExternalDfFetcher(lf=lf, operation_type='calculate_schema', wait_on_completion=False, flow_id=-1, node_id=-1)
|
|
64
25
|
schema_stats: List[Dict] = r.get_result()
|