Flowfile 0.3.1.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +2 -1
- flowfile/api.py +5 -3
- flowfile/web/__init__.py +3 -0
- flowfile/web/static/assets/{AirbyteReader-cb0c1d4a.js → AirbyteReader-2b1cf2d8.js} +10 -9
- flowfile/web/static/assets/{CrossJoin-a514fa59.js → CrossJoin-cc3ab73c.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-f2cecf33.js → DatabaseConnectionSettings-307c4652.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-83ee3c98.js → DatabaseManager-69faa6e1.js} +10 -6
- flowfile/web/static/assets/{DatabaseReader-dc0c6881.js → DatabaseReader-e4134cd0.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-5afe9f8d.js → DatabaseWriter-d32d75b1.js} +9 -9
- flowfile/web/static/assets/{ExploreData-c7ee19cf.js → ExploreData-5eb48389.js} +18639 -18629
- flowfile/web/static/assets/{ExternalSource-17b23a01.js → ExternalSource-29489051.js} +8 -21
- flowfile/web/static/assets/{Filter-90856b4f.js → Filter-031332bb.js} +9 -9
- flowfile/web/static/assets/{Formula-38b71e9e.js → Formula-3b900540.js} +15 -15
- flowfile/web/static/assets/{Formula-d60a74f4.css → Formula-b8cefc31.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-d0f1fe81.js → FuzzyMatch-dee31153.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-0c86bbc6.js → GraphSolver-ca74eb47.js} +5 -5
- flowfile/web/static/assets/{GroupBy-f2772e9f.js → GroupBy-081b6591.js} +8 -7
- flowfile/web/static/assets/{Join-bc3e1cf7.js → Join-b467376f.js} +11 -10
- flowfile/web/static/assets/{ManualInput-03aa0245.js → ManualInput-ffffb80a.js} +11 -8
- flowfile/web/static/assets/{Output-5b35eee8.js → Output-9a87d4ba.js} +4 -4
- flowfile/web/static/assets/{Pivot-7164087c.js → Pivot-ee3e6093.js} +8 -7
- flowfile/web/static/assets/{PolarsCode-3abf6507.js → PolarsCode-03921254.js} +13 -11
- flowfile/web/static/assets/{PopOver-b37ff9be.js → PopOver-3bdf8951.js} +1 -1
- flowfile/web/static/assets/{Read-65966a3e.js → Read-67fee3a0.js} +6 -6
- flowfile/web/static/assets/{RecordCount-c66c6d6d.js → RecordCount-a2acd02d.js} +7 -6
- flowfile/web/static/assets/{RecordId-826dc095.js → RecordId-0c8bcd77.js} +10 -8
- flowfile/web/static/assets/{Sample-4ed555c8.js → Sample-60594a3a.js} +7 -6
- flowfile/web/static/assets/{SecretManager-eac1e97d.js → SecretManager-bbcec2ac.js} +2 -2
- flowfile/web/static/assets/{Select-085f05cc.js → Select-9540e6ca.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-1f5e79c1.js → SettingsSection-48f28104.js} +1 -1
- flowfile/web/static/assets/{Sort-3e6cb414.js → Sort-6dbe3633.js} +6 -6
- flowfile/web/static/assets/{TextToRows-606349bc.js → TextToRows-27aab4a8.js} +18 -13
- flowfile/web/static/assets/{UnavailableFields-b41976ed.js → UnavailableFields-8143044b.js} +2 -2
- flowfile/web/static/assets/{Union-fca91665.js → Union-52460248.js} +7 -6
- flowfile/web/static/assets/{Unique-a59f830e.js → Unique-f6962644.js} +8 -8
- flowfile/web/static/assets/{Unpivot-c3815565.js → Unpivot-1ff1e938.js} +5 -5
- flowfile/web/static/assets/{api-22b338bd.js → api-3b345d92.js} +1 -1
- flowfile/web/static/assets/{designer-e5bbe26f.js → designer-4736134f.js} +72 -42
- flowfile/web/static/assets/{documentation-08045cf2.js → documentation-b9545eba.js} +1 -1
- flowfile/web/static/assets/{dropDown-5e7e9a5a.js → dropDown-d5a4014c.js} +1 -1
- flowfile/web/static/assets/{dropDownGeneric-50a91b99.js → dropDownGeneric-1f4e32ec.js} +2 -2
- flowfile/web/static/assets/{fullEditor-705c6ccb.js → fullEditor-f4791c23.js} +3 -3
- flowfile/web/static/assets/{genericNodeSettings-65587f20.js → genericNodeSettings-1d456350.js} +3 -3
- flowfile/web/static/assets/{index-552863fd.js → index-f25c9283.js} +2608 -1570
- flowfile/web/static/assets/{nodeTitle-cf9bae3c.js → nodeTitle-cad6fd9d.js} +3 -3
- flowfile/web/static/assets/{secretApi-3ad510e1.js → secretApi-01f07e2c.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-bd644891.js → selectDynamic-f46a4e3f.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-dd17b478.js → vue-codemirror.esm-eb98fc8b.js} +15 -14
- flowfile/web/static/assets/{vue-content-loader.es-6b36f05e.js → vue-content-loader.es-860c0380.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/METADATA +1 -3
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/RECORD +97 -88
- flowfile_core/configs/__init__.py +15 -4
- flowfile_core/configs/node_store/nodes.py +2 -4
- flowfile_core/configs/settings.py +5 -3
- flowfile_core/configs/utils.py +18 -0
- flowfile_core/flowfile/FlowfileFlow.py +84 -29
- flowfile_core/flowfile/database_connection_manager/db_connections.py +1 -1
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +55 -18
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +42 -9
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +42 -3
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +34 -2
- flowfile_core/flowfile/flow_data_engine/sample_data.py +25 -7
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +4 -3
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -0
- flowfile_core/flowfile/flow_graph_utils.py +320 -0
- flowfile_core/flowfile/flow_node/flow_node.py +2 -1
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +2 -2
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +1 -1
- flowfile_core/flowfile/utils.py +34 -3
- flowfile_core/main.py +2 -3
- flowfile_core/routes/secrets.py +1 -1
- flowfile_core/schemas/input_schema.py +12 -14
- flowfile_core/schemas/transform_schema.py +25 -47
- flowfile_frame/__init__.py +11 -4
- flowfile_frame/adding_expr.py +280 -0
- flowfile_frame/config.py +9 -0
- flowfile_frame/expr.py +301 -83
- flowfile_frame/expr.pyi +2174 -0
- flowfile_frame/expr_name.py +258 -0
- flowfile_frame/flow_frame.py +616 -627
- flowfile_frame/flow_frame.pyi +336 -0
- flowfile_frame/flow_frame_methods.py +617 -0
- flowfile_frame/group_frame.py +89 -42
- flowfile_frame/join.py +1 -2
- flowfile_frame/lazy.py +704 -0
- flowfile_frame/lazy_methods.py +201 -0
- flowfile_frame/list_name_space.py +324 -0
- flowfile_frame/selectors.py +3 -0
- flowfile_frame/series.py +70 -0
- flowfile_frame/utils.py +80 -4
- flowfile/web/static/assets/GoogleSheet-854294a4.js +0 -2616
- flowfile/web/static/assets/GoogleSheet-92084da7.css +0 -233
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +0 -74
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/LICENSE +0 -0
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/WHEEL +0 -0
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/entry_points.txt +0 -0
- /flowfile_core/{secrets → secret_manager}/__init__.py +0 -0
- /flowfile_core/{secrets/secrets.py → secret_manager/secret_manager.py} +0 -0
|
@@ -5,10 +5,10 @@ import os
|
|
|
5
5
|
import tempfile
|
|
6
6
|
import argparse
|
|
7
7
|
|
|
8
|
-
from databases import DatabaseURL
|
|
9
8
|
from passlib.context import CryptContext
|
|
10
9
|
from starlette.config import Config
|
|
11
|
-
|
|
10
|
+
|
|
11
|
+
from flowfile_core.configs.utils import MutableBool
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
# Constants for server and worker configuration
|
|
@@ -18,6 +18,9 @@ DEFAULT_WORKER_PORT = 63579
|
|
|
18
18
|
SINGLE_FILE_MODE: bool = os.environ.get("SINGLE_FILE_MODE", "0") == "1"
|
|
19
19
|
|
|
20
20
|
|
|
21
|
+
OFFLOAD_TO_WORKER = MutableBool(True)
|
|
22
|
+
|
|
23
|
+
|
|
21
24
|
def parse_args():
|
|
22
25
|
"""Parse command line arguments"""
|
|
23
26
|
parser = argparse.ArgumentParser(description="Flowfile Backend Server")
|
|
@@ -79,7 +82,6 @@ args = parse_args()
|
|
|
79
82
|
SERVER_HOST = args.host if args.host is not None else DEFAULT_SERVER_HOST
|
|
80
83
|
SERVER_PORT = args.port if args.port is not None else DEFAULT_SERVER_PORT
|
|
81
84
|
WORKER_PORT = args.worker_port if args.worker_port is not None else int(os.getenv("WORKER_PORT", DEFAULT_WORKER_PORT))
|
|
82
|
-
# Worker configuration
|
|
83
85
|
WORKER_HOST = os.getenv("WORKER_HOST", "0.0.0.0" if platform.system() != "Windows" else "127.0.0.1")
|
|
84
86
|
|
|
85
87
|
config = Config(".env")
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class MutableBool:
|
|
6
|
+
value: bool
|
|
7
|
+
|
|
8
|
+
def __bool__(self) -> bool:
|
|
9
|
+
"""Allow direct boolean evaluation"""
|
|
10
|
+
return self.value
|
|
11
|
+
|
|
12
|
+
def __eq__(self, other) -> bool:
|
|
13
|
+
"""Allow equality comparison with booleans"""
|
|
14
|
+
if isinstance(other, bool):
|
|
15
|
+
return self.value == other
|
|
16
|
+
elif isinstance(other, MutableBool):
|
|
17
|
+
return self.value == other.value
|
|
18
|
+
return NotImplemented
|
|
@@ -2,6 +2,8 @@ import datetime
|
|
|
2
2
|
import pickle
|
|
3
3
|
import polars as pl
|
|
4
4
|
import fastexcel
|
|
5
|
+
import copy
|
|
6
|
+
|
|
5
7
|
from fastapi.exceptions import HTTPException
|
|
6
8
|
from time import time
|
|
7
9
|
from functools import partial
|
|
@@ -13,7 +15,7 @@ from flowfile_core.configs import logger
|
|
|
13
15
|
from flowfile_core.configs.flow_logger import FlowLogger
|
|
14
16
|
from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
|
|
15
17
|
from flowfile_core.flowfile.sources.external_sources.airbyte_sources.settings import airbyte_settings_from_config
|
|
16
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import
|
|
18
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import cast_str_to_polars_type, FlowfileColumn
|
|
17
19
|
from flowfile_core.flowfile.flow_data_engine.fuzzy_matching.settings_validator import (calculate_fuzzy_match_schema,
|
|
18
20
|
pre_calculate_pivot_schema)
|
|
19
21
|
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
@@ -23,7 +25,7 @@ from flowfile_core.flowfile.flow_data_engine.read_excel_tables import get_open_x
|
|
|
23
25
|
from flowfile_core.flowfile.sources import external_sources
|
|
24
26
|
from flowfile_core.schemas import input_schema, schemas, transform_schema
|
|
25
27
|
from flowfile_core.schemas.output_model import TableExample, NodeData, NodeResult, RunInformation
|
|
26
|
-
from flowfile_core.flowfile.utils import snake_case_to_camel_case
|
|
28
|
+
from flowfile_core.flowfile.utils import snake_case_to_camel_case, _handle_raw_data
|
|
27
29
|
from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
|
|
28
30
|
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
29
31
|
from flowfile_core.flowfile.util.execution_orderer import determine_execution_order
|
|
@@ -32,7 +34,7 @@ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_op
|
|
|
32
34
|
ExternalDatabaseFetcher,
|
|
33
35
|
ExternalDatabaseWriter,
|
|
34
36
|
ExternalDfFetcher)
|
|
35
|
-
from flowfile_core.
|
|
37
|
+
from flowfile_core.secret_manager.secret_manager import get_encrypted_secret, decrypt_secret
|
|
36
38
|
from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils, models as sql_models
|
|
37
39
|
from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import SqlSource, BaseSqlSource
|
|
38
40
|
from flowfile_core.flowfile.database_connection_manager.db_connections import get_local_database_connection
|
|
@@ -203,28 +205,20 @@ class FlowGraph:
|
|
|
203
205
|
sample_size: int = 10000
|
|
204
206
|
|
|
205
207
|
def analysis_preparation(flowfile_table: FlowDataEngine):
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
number_of_records = ExternalDfFetcher(
|
|
210
|
-
lf=flowfile_table.data_frame,
|
|
211
|
-
operation_type="calculate_number_of_records",
|
|
212
|
-
flow_id=self.flow_id,
|
|
213
|
-
node_id=node.node_id,
|
|
214
|
-
).result
|
|
208
|
+
if flowfile_table.number_of_records <= 0:
|
|
209
|
+
number_of_records = flowfile_table.get_number_of_records(calculate_in_worker_process=True)
|
|
215
210
|
else:
|
|
216
211
|
number_of_records = flowfile_table.number_of_records
|
|
217
212
|
if number_of_records > sample_size:
|
|
218
213
|
flowfile_table = flowfile_table.get_sample(sample_size, random=True)
|
|
219
|
-
|
|
220
214
|
external_sampler = ExternalDfFetcher(
|
|
221
215
|
lf=flowfile_table.data_frame,
|
|
222
|
-
file_ref=node.hash,
|
|
216
|
+
file_ref="__gf_walker"+node.hash,
|
|
223
217
|
wait_on_completion=True,
|
|
224
218
|
node_id=node.node_id,
|
|
225
219
|
flow_id=self.flow_id,
|
|
226
220
|
)
|
|
227
|
-
node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref
|
|
221
|
+
node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref)
|
|
228
222
|
return flowfile_table
|
|
229
223
|
|
|
230
224
|
def schema_callback():
|
|
@@ -439,11 +433,11 @@ class FlowGraph:
|
|
|
439
433
|
|
|
440
434
|
def add_formula(self, function_settings: input_schema.NodeFormula):
|
|
441
435
|
error = ""
|
|
442
|
-
if function_settings.function.field.data_type
|
|
443
|
-
output_type =
|
|
436
|
+
if function_settings.function.field.data_type not in (None, "Auto"):
|
|
437
|
+
output_type = cast_str_to_polars_type(function_settings.function.field.data_type)
|
|
444
438
|
else:
|
|
445
439
|
output_type = None
|
|
446
|
-
if output_type
|
|
440
|
+
if output_type not in (None, "Auto"):
|
|
447
441
|
new_col = [FlowfileColumn.from_input(column_name=function_settings.function.field.name,
|
|
448
442
|
data_type=str(output_type))]
|
|
449
443
|
else:
|
|
@@ -485,7 +479,8 @@ class FlowGraph:
|
|
|
485
479
|
function=_func,
|
|
486
480
|
input_columns=[],
|
|
487
481
|
node_type='cross_join',
|
|
488
|
-
setting_input=cross_join_settings
|
|
482
|
+
setting_input=cross_join_settings,
|
|
483
|
+
input_node_ids=cross_join_settings.depending_on_ids)
|
|
489
484
|
return self
|
|
490
485
|
|
|
491
486
|
def add_join(self, join_settings: input_schema.NodeJoin) -> "FlowGraph":
|
|
@@ -587,6 +582,8 @@ class FlowGraph:
|
|
|
587
582
|
input_cols = set(f.name for f in table.schema)
|
|
588
583
|
ids_to_remove = []
|
|
589
584
|
for i, select_col in enumerate(select_cols):
|
|
585
|
+
if select_col.data_type is None:
|
|
586
|
+
select_col.data_type = table.get_schema_column(select_col.old_name).data_type
|
|
590
587
|
if select_col.old_name not in input_cols:
|
|
591
588
|
select_col.is_available = False
|
|
592
589
|
if not select_col.keep:
|
|
@@ -900,9 +897,6 @@ class FlowGraph:
|
|
|
900
897
|
if external_source_input.source_settings.fields and len(external_source_input.source_settings.fields) > 0:
|
|
901
898
|
logger.info('Using provided schema in the node')
|
|
902
899
|
|
|
903
|
-
def add_google_sheet(self, external_source_input: input_schema.NodeExternalSource):
|
|
904
|
-
logger.info('Adding google sheet reader')
|
|
905
|
-
self.add_external_source(external_source_input)
|
|
906
900
|
|
|
907
901
|
def add_sql_source(self, external_source_input: input_schema.NodeExternalSource):
|
|
908
902
|
logger.info('Adding sql source')
|
|
@@ -1044,11 +1038,10 @@ class FlowGraph:
|
|
|
1044
1038
|
return self
|
|
1045
1039
|
|
|
1046
1040
|
def add_datasource(self, input_file: input_schema.NodeDatasource | input_schema.NodeManualInput):
|
|
1047
|
-
|
|
1048
1041
|
if isinstance(input_file, input_schema.NodeManualInput):
|
|
1049
|
-
|
|
1042
|
+
_handle_raw_data(input_file)
|
|
1043
|
+
input_data = FlowDataEngine(input_file.raw_data_format)
|
|
1050
1044
|
ref = 'manual_input'
|
|
1051
|
-
|
|
1052
1045
|
else:
|
|
1053
1046
|
input_data = FlowDataEngine(path_ref=input_file.file_ref)
|
|
1054
1047
|
ref = 'datasource'
|
|
@@ -1061,7 +1054,9 @@ class FlowGraph:
|
|
|
1061
1054
|
|
|
1062
1055
|
if not input_file.node_id in set(start_node.node_id for start_node in self._flow_starts):
|
|
1063
1056
|
self._flow_starts.append(node)
|
|
1057
|
+
|
|
1064
1058
|
else:
|
|
1059
|
+
input_data.collect()
|
|
1065
1060
|
node = FlowNode(input_file.node_id, function=input_data,
|
|
1066
1061
|
setting_input=input_file,
|
|
1067
1062
|
name=ref, node_type=ref, parent_uuid=self.uuid)
|
|
@@ -1083,7 +1078,7 @@ class FlowGraph:
|
|
|
1083
1078
|
self._output_cols += cols_available
|
|
1084
1079
|
|
|
1085
1080
|
@property
|
|
1086
|
-
def input_data_columns(self) -> List[str]:
|
|
1081
|
+
def input_data_columns(self) -> List[str] | None:
|
|
1087
1082
|
if self._input_cols:
|
|
1088
1083
|
return list(set([col for col in self._input_cols if
|
|
1089
1084
|
col in [table_col.name for table_col in self._input_data.schema]]))
|
|
@@ -1102,7 +1097,7 @@ class FlowGraph:
|
|
|
1102
1097
|
return implicit_starting_nodes
|
|
1103
1098
|
|
|
1104
1099
|
@execution_mode.setter
|
|
1105
|
-
def execution_mode(self, mode:
|
|
1100
|
+
def execution_mode(self, mode: schemas.ExecutionModeLiteral):
|
|
1106
1101
|
self.flow_settings.execution_mode = mode
|
|
1107
1102
|
|
|
1108
1103
|
@property
|
|
@@ -1158,13 +1153,13 @@ class FlowGraph:
|
|
|
1158
1153
|
continue
|
|
1159
1154
|
node_result.success = node.results.errors is None
|
|
1160
1155
|
node_result.end_timestamp = time()
|
|
1161
|
-
node_result.run_time = node_result.end_timestamp - node_result.start_timestamp
|
|
1156
|
+
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1162
1157
|
node_result.is_running = False
|
|
1163
1158
|
except Exception as e:
|
|
1164
1159
|
node_result.error = 'Node did not run'
|
|
1165
1160
|
node_result.success = False
|
|
1166
1161
|
node_result.end_timestamp = time()
|
|
1167
|
-
node_result.run_time = node_result.end_timestamp - node_result.start_timestamp
|
|
1162
|
+
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1168
1163
|
node_result.is_running = False
|
|
1169
1164
|
node_logger.error(f'Error in node {node.node_id}: {e}')
|
|
1170
1165
|
if not node_result.success:
|
|
@@ -1352,6 +1347,66 @@ class FlowGraph:
|
|
|
1352
1347
|
getattr(self, f"add_{node_type}")(combined_settings)
|
|
1353
1348
|
|
|
1354
1349
|
|
|
1350
|
+
def combine_flow_graphs(*flow_graphs: FlowGraph) -> FlowGraph:
|
|
1351
|
+
"""
|
|
1352
|
+
Combine multiple flow graphs into a single graph, ensuring node IDs don't overlap.
|
|
1353
|
+
|
|
1354
|
+
Args:
|
|
1355
|
+
*flow_graphs: Multiple FlowGraph instances to combine
|
|
1356
|
+
|
|
1357
|
+
Returns:
|
|
1358
|
+
A new FlowGraph containing all nodes and edges from the input graphs with remapped IDs
|
|
1359
|
+
|
|
1360
|
+
Raises:
|
|
1361
|
+
ValueError: If any flow_ids overlap
|
|
1362
|
+
"""
|
|
1363
|
+
# Validate flow IDs are unique
|
|
1364
|
+
_validate_unique_flow_ids(flow_graphs)
|
|
1365
|
+
|
|
1366
|
+
# Create ID mapping for all nodes
|
|
1367
|
+
node_id_mapping = _create_node_id_mapping(flow_graphs)
|
|
1368
|
+
|
|
1369
|
+
# Remap and combine nodes
|
|
1370
|
+
all_nodes = _remap_nodes(flow_graphs, node_id_mapping)
|
|
1371
|
+
|
|
1372
|
+
# Create a new combined flow graph
|
|
1373
|
+
combined_flow_id = hash(tuple(fg.flow_id for fg in flow_graphs))
|
|
1374
|
+
# return FlowGraph(flow_id=combined_flow_id, nodes=all_nodes, edges=all_edges)
|
|
1375
|
+
|
|
1376
|
+
|
|
1377
|
+
def _validate_unique_flow_ids(flow_graphs: Tuple[FlowGraph, ...]) -> None:
|
|
1378
|
+
"""Ensure all flow graphs have unique flow_ids."""
|
|
1379
|
+
all_flow_ids = [fg.flow_id for fg in flow_graphs]
|
|
1380
|
+
if len(all_flow_ids) != len(set(all_flow_ids)):
|
|
1381
|
+
raise ValueError("Cannot combine overlapping graphs, make sure the graphs have a unique identifier")
|
|
1382
|
+
|
|
1383
|
+
|
|
1384
|
+
def _create_node_id_mapping(flow_graphs: Tuple[FlowGraph, ...]) -> Dict[int, Dict[int, int]]:
|
|
1385
|
+
"""Create a mapping from original node IDs to new unique node IDs."""
|
|
1386
|
+
node_id_mapping: Dict[int, Dict[int, int]] = {}
|
|
1387
|
+
next_node_id = 0
|
|
1388
|
+
|
|
1389
|
+
for fg in flow_graphs:
|
|
1390
|
+
node_id_mapping[fg.flow_id] = {}
|
|
1391
|
+
for node in fg.nodes:
|
|
1392
|
+
node_id_mapping[fg.flow_id][node.node_id] = next_node_id
|
|
1393
|
+
next_node_id += 1
|
|
1394
|
+
|
|
1395
|
+
return node_id_mapping
|
|
1396
|
+
|
|
1397
|
+
|
|
1398
|
+
def _remap_nodes(flow_graphs: Tuple[FlowGraph, ...],
|
|
1399
|
+
node_id_mapping: Dict[int, Dict[int, int]]) -> List:
|
|
1400
|
+
"""Create new nodes with remapped IDs."""
|
|
1401
|
+
all_nodes = []
|
|
1402
|
+
for fg in flow_graphs:
|
|
1403
|
+
for node in fg.nodes:
|
|
1404
|
+
new_node = copy.deepcopy(node)
|
|
1405
|
+
new_node.node_id = node_id_mapping[fg.flow_id][node.node_id]
|
|
1406
|
+
all_nodes.append(new_node)
|
|
1407
|
+
return all_nodes
|
|
1408
|
+
|
|
1409
|
+
|
|
1355
1410
|
def combine_existing_settings_and_new_settings(setting_input: Any, new_settings: input_schema.NodePromise) -> Any:
|
|
1356
1411
|
"""Combine excopy_nodeisting settings with new settings from a NodePromise."""
|
|
1357
1412
|
copied_setting_input = deepcopy(setting_input)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from flowfile_core.schemas.input_schema import FullDatabaseConnection, FullDatabaseConnectionInterface
|
|
2
2
|
from sqlalchemy.orm import Session
|
|
3
3
|
from flowfile_core.database.models import DatabaseConnection as DBConnectionModel, Secret
|
|
4
|
-
from flowfile_core.
|
|
4
|
+
from flowfile_core.secret_manager.secret_manager import store_secret, SecretInput, decrypt_secret
|
|
5
5
|
from flowfile_core.database.connection import get_db_context
|
|
6
6
|
|
|
7
7
|
|
|
@@ -17,6 +17,7 @@ from pyarrow.parquet import ParquetFile
|
|
|
17
17
|
# Local imports - Core
|
|
18
18
|
from flowfile_core.configs import logger
|
|
19
19
|
from flowfile_core.configs.flow_logger import NodeLogger
|
|
20
|
+
from flowfile_core.configs.settings import OFFLOAD_TO_WORKER
|
|
20
21
|
from flowfile_core.schemas import (
|
|
21
22
|
input_schema,
|
|
22
23
|
transform_schema as transform_schemas
|
|
@@ -29,7 +30,7 @@ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import (
|
|
|
29
30
|
FlowfileColumn,
|
|
30
31
|
convert_stats_to_column_info
|
|
31
32
|
)
|
|
32
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import
|
|
33
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
|
|
33
34
|
from flowfile_core.flowfile.flow_data_engine.fuzzy_matching.prepare_for_fuzzy_match import prepare_for_fuzzy_match
|
|
34
35
|
from flowfile_core.flowfile.flow_data_engine.join import (
|
|
35
36
|
verify_join_select_integrity,
|
|
@@ -109,7 +110,7 @@ class FlowDataEngine:
|
|
|
109
110
|
# flow_id: int = None # TODO: Implement flow_id
|
|
110
111
|
|
|
111
112
|
def __init__(self,
|
|
112
|
-
raw_data: Union[List[Dict], List[Any], 'ParquetFile', pl.DataFrame, pl.LazyFrame] = None,
|
|
113
|
+
raw_data: Union[List[Dict], List[Any], 'ParquetFile', pl.DataFrame, pl.LazyFrame, input_schema.RawData] = None,
|
|
113
114
|
path_ref: str = None,
|
|
114
115
|
name: str = None,
|
|
115
116
|
optimize_memory: bool = True,
|
|
@@ -147,7 +148,10 @@ class FlowDataEngine:
|
|
|
147
148
|
|
|
148
149
|
def _handle_raw_data(self, raw_data, number_of_records, optimize_memory):
|
|
149
150
|
"""Process different types of input data."""
|
|
150
|
-
|
|
151
|
+
|
|
152
|
+
if isinstance(raw_data, input_schema.RawData):
|
|
153
|
+
self._handle_raw_data_format(raw_data)
|
|
154
|
+
elif isinstance(raw_data, pl.DataFrame):
|
|
151
155
|
self._handle_polars_dataframe(raw_data, number_of_records)
|
|
152
156
|
elif isinstance(raw_data, pl.LazyFrame):
|
|
153
157
|
self._handle_polars_lazy_frame(raw_data, number_of_records, optimize_memory)
|
|
@@ -190,6 +194,20 @@ class FlowDataEngine:
|
|
|
190
194
|
self.number_of_records = 1
|
|
191
195
|
self.data_frame = pl.DataFrame([data])
|
|
192
196
|
|
|
197
|
+
def _handle_raw_data_format(self, raw_data: input_schema.RawData):
|
|
198
|
+
"""Create a FlowDataEngine from a RawData object."""
|
|
199
|
+
flowfile_schema = list(FlowfileColumn.create_from_minimal_field_info(c) for c in raw_data.columns)
|
|
200
|
+
polars_schema = pl.Schema([(flowfile_column.column_name, flowfile_column.get_polars_type().pl_datatype)
|
|
201
|
+
for flowfile_column in flowfile_schema])
|
|
202
|
+
try:
|
|
203
|
+
df = pl.DataFrame(raw_data.data, polars_schema)
|
|
204
|
+
except TypeError as e:
|
|
205
|
+
logger.warning(f"Could not parse the data with the schema:\n{e}")
|
|
206
|
+
df = pl.DataFrame(raw_data.data)
|
|
207
|
+
self.number_of_records = len(df)
|
|
208
|
+
self.data_frame = df.lazy()
|
|
209
|
+
self.lazy = True
|
|
210
|
+
|
|
193
211
|
def _handle_list_input(self, data: List):
|
|
194
212
|
"""Handle list input."""
|
|
195
213
|
number_of_records = len(data)
|
|
@@ -462,6 +480,9 @@ class FlowDataEngine:
|
|
|
462
480
|
return self.data_frame.collect(engine="streaming" if self._streamable else "auto").to_dicts()
|
|
463
481
|
return self.data_frame.to_dicts()
|
|
464
482
|
|
|
483
|
+
def to_dict(self) -> Dict[str, List]:
|
|
484
|
+
return self.data_frame.collect(engine="streaming" if self._streamable else "auto").to_dict(as_series=False)
|
|
485
|
+
|
|
465
486
|
@classmethod
|
|
466
487
|
def create_from_external_source(cls, external_source: ExternalDataSource) -> "FlowDataEngine":
|
|
467
488
|
"""Create a FlowDataEngine from an external data source."""
|
|
@@ -484,7 +505,7 @@ class FlowDataEngine:
|
|
|
484
505
|
"""Create a FlowDataEngine from a schema definition."""
|
|
485
506
|
pl_schema = []
|
|
486
507
|
for i, flow_file_column in enumerate(schema):
|
|
487
|
-
pl_schema.append((flow_file_column.name,
|
|
508
|
+
pl_schema.append((flow_file_column.name, cast_str_to_polars_type(flow_file_column.data_type)))
|
|
488
509
|
schema[i].col_index = i
|
|
489
510
|
df = pl.LazyFrame(schema=pl_schema)
|
|
490
511
|
return cls(df, schema=schema, calculate_schema_stats=False, number_of_records=0)
|
|
@@ -824,7 +845,7 @@ class FlowDataEngine:
|
|
|
824
845
|
Returns:
|
|
825
846
|
FlowDataEngine: New instance with sampled data
|
|
826
847
|
"""
|
|
827
|
-
n_records = min(n_rows, self.
|
|
848
|
+
n_records = min(n_rows, self.get_number_of_records(calculate_in_worker_process=True))
|
|
828
849
|
logging.info(f'Getting sample of {n_rows} rows')
|
|
829
850
|
|
|
830
851
|
if random:
|
|
@@ -1158,14 +1179,25 @@ class FlowDataEngine:
|
|
|
1158
1179
|
self.number_of_records = 0
|
|
1159
1180
|
self._lazy = True
|
|
1160
1181
|
|
|
1161
|
-
def
|
|
1182
|
+
def _calculate_number_of_records_in_worker(self) -> int:
|
|
1183
|
+
number_of_records = ExternalDfFetcher(
|
|
1184
|
+
lf=self.data_frame,
|
|
1185
|
+
operation_type="calculate_number_of_records",
|
|
1186
|
+
flow_id=-1,
|
|
1187
|
+
node_id=-1,
|
|
1188
|
+
wait_on_completion=True
|
|
1189
|
+
).result
|
|
1190
|
+
return number_of_records
|
|
1191
|
+
|
|
1192
|
+
def get_number_of_records(self, warn: bool = False, force_calculate: bool = False,
|
|
1193
|
+
calculate_in_worker_process: bool = False) -> int:
|
|
1162
1194
|
"""
|
|
1163
1195
|
Get the total number of records in the DataFrame.
|
|
1164
1196
|
|
|
1165
1197
|
Args:
|
|
1166
1198
|
warn: Whether to warn about expensive operations
|
|
1167
1199
|
force_calculate: Whether to force recalculation
|
|
1168
|
-
|
|
1200
|
+
calculate_in_worker_process: Whether to offload compute to the worker process
|
|
1169
1201
|
Returns:
|
|
1170
1202
|
int: Number of records
|
|
1171
1203
|
|
|
@@ -1174,22 +1206,24 @@ class FlowDataEngine:
|
|
|
1174
1206
|
"""
|
|
1175
1207
|
if self.is_future and not self.is_collected:
|
|
1176
1208
|
return -1
|
|
1177
|
-
|
|
1209
|
+
calculate_in_worker_process = False if not OFFLOAD_TO_WORKER.value else calculate_in_worker_process
|
|
1178
1210
|
if self.number_of_records is None or self.number_of_records < 0 or force_calculate:
|
|
1179
1211
|
if self._number_of_records_callback is not None:
|
|
1180
1212
|
self._number_of_records_callback(self)
|
|
1181
1213
|
|
|
1182
1214
|
if self.lazy:
|
|
1183
|
-
if
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1215
|
+
if calculate_in_worker_process:
|
|
1216
|
+
self.number_of_records = self._calculate_number_of_records_in_worker()
|
|
1217
|
+
else:
|
|
1218
|
+
if warn:
|
|
1219
|
+
logger.warning('Calculating the number of records this can be expensive on a lazy frame')
|
|
1220
|
+
try:
|
|
1221
|
+
self.number_of_records = self.data_frame.select(pl.len()).collect(
|
|
1222
|
+
engine="streaming" if self._streamable else "auto")[0, 0]
|
|
1223
|
+
except Exception:
|
|
1224
|
+
raise ValueError('Could not get number of records')
|
|
1190
1225
|
else:
|
|
1191
1226
|
self.number_of_records = self.data_frame.__len__()
|
|
1192
|
-
|
|
1193
1227
|
return self.number_of_records
|
|
1194
1228
|
|
|
1195
1229
|
# Properties
|
|
@@ -1345,7 +1379,7 @@ class FlowDataEngine:
|
|
|
1345
1379
|
FlowDataEngine: New instance with added column
|
|
1346
1380
|
"""
|
|
1347
1381
|
expr = to_expr(func)
|
|
1348
|
-
if output_data_type
|
|
1382
|
+
if output_data_type not in (None, "Auto"):
|
|
1349
1383
|
df = self.data_frame.with_columns(expr.cast(output_data_type).alias(col_name))
|
|
1350
1384
|
else:
|
|
1351
1385
|
df = self.data_frame.with_columns(expr.alias(col_name))
|
|
@@ -1518,4 +1552,7 @@ def execute_polars_code(*flowfile_tables: "FlowDataEngine", code: str) -> "FlowD
|
|
|
1518
1552
|
kwargs = {'input_df': flowfile_tables[0].data_frame}
|
|
1519
1553
|
else:
|
|
1520
1554
|
kwargs = {f'input_df_{i+1}': flowfile_table.data_frame for i, flowfile_table in enumerate(flowfile_tables)}
|
|
1521
|
-
|
|
1555
|
+
df = polars_executable(**kwargs)
|
|
1556
|
+
if isinstance(df, pl.DataFrame):
|
|
1557
|
+
logger.warning("Got a non lazy DataFrame, possibly harming performance, if possible, try to use a lazy method")
|
|
1558
|
+
return FlowDataEngine(df)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from typing import Optional, Any, List, Dict, Literal
|
|
3
3
|
from flowfile_core.schemas import input_schema
|
|
4
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import
|
|
4
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
|
|
5
5
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.polars_type import PlType
|
|
6
6
|
from polars import datatypes
|
|
7
7
|
import polars as pl
|
|
@@ -9,6 +9,37 @@ import polars as pl
|
|
|
9
9
|
DataTypeGroup = Literal['numeric', 'str', 'date']
|
|
10
10
|
|
|
11
11
|
|
|
12
|
+
def convert_pl_type_to_string(pl_type: pl.DataType, inner: bool = False) -> str:
|
|
13
|
+
if isinstance(pl_type, pl.List):
|
|
14
|
+
inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
|
|
15
|
+
return f"pl.List({inner_str})"
|
|
16
|
+
elif isinstance(pl_type, pl.Array):
|
|
17
|
+
inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
|
|
18
|
+
return f"pl.Array({inner_str})"
|
|
19
|
+
elif isinstance(pl_type, pl.Decimal):
|
|
20
|
+
precision = pl_type.precision if hasattr(pl_type, 'precision') else None
|
|
21
|
+
scale = pl_type.scale if hasattr(pl_type, 'scale') else None
|
|
22
|
+
if precision is not None and scale is not None:
|
|
23
|
+
return f"pl.Decimal({precision}, {scale})"
|
|
24
|
+
elif precision is not None:
|
|
25
|
+
return f"pl.Decimal({precision})"
|
|
26
|
+
else:
|
|
27
|
+
return "pl.Decimal()"
|
|
28
|
+
elif isinstance(pl_type, pl.Struct):
|
|
29
|
+
# Handle Struct with field definitions
|
|
30
|
+
fields = []
|
|
31
|
+
if hasattr(pl_type, 'fields'):
|
|
32
|
+
for field in pl_type.fields:
|
|
33
|
+
field_name = field.name
|
|
34
|
+
field_type = convert_pl_type_to_string(field.dtype, inner=True)
|
|
35
|
+
fields.append(f'pl.Field("{field_name}", {field_type})')
|
|
36
|
+
field_str = ", ".join(fields)
|
|
37
|
+
return f"pl.Struct([{field_str}])"
|
|
38
|
+
else:
|
|
39
|
+
# For base types, we want the full pl.TypeName format
|
|
40
|
+
return str(pl_type.base_type()) if not inner else f"pl.{pl_type}"
|
|
41
|
+
|
|
42
|
+
|
|
12
43
|
@dataclass
|
|
13
44
|
class FlowfileColumn:
|
|
14
45
|
column_name: str
|
|
@@ -28,7 +59,7 @@ class FlowfileColumn:
|
|
|
28
59
|
__perc_unique: Optional[float]
|
|
29
60
|
|
|
30
61
|
def __init__(self, polars_type: PlType):
|
|
31
|
-
self.data_type =
|
|
62
|
+
self.data_type = convert_pl_type_to_string(polars_type.pl_datatype)
|
|
32
63
|
self.size = polars_type.count - polars_type.null_count
|
|
33
64
|
self.max_value = polars_type.max
|
|
34
65
|
self.min_value = polars_type.min
|
|
@@ -53,7 +84,7 @@ class FlowfileColumn:
|
|
|
53
84
|
|
|
54
85
|
@classmethod
|
|
55
86
|
def from_input(cls, column_name: str, data_type: str, **kwargs) -> "FlowfileColumn":
|
|
56
|
-
pl_type =
|
|
87
|
+
pl_type = cast_str_to_polars_type(data_type)
|
|
57
88
|
if pl_type is not None:
|
|
58
89
|
data_type = pl_type
|
|
59
90
|
return cls(PlType(column_name=column_name, pl_datatype=data_type, **kwargs))
|
|
@@ -129,12 +160,9 @@ class FlowfileColumn:
|
|
|
129
160
|
return 'date'
|
|
130
161
|
|
|
131
162
|
def get_polars_type(self) -> PlType:
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
pl_datatype = None
|
|
136
|
-
|
|
137
|
-
return PlType(pl_datatype=pl_datatype, **self.__dict__)
|
|
163
|
+
pl_datatype = cast_str_to_polars_type(self.data_type)
|
|
164
|
+
pl_type = PlType(pl_datatype=pl_datatype, **self.__dict__)
|
|
165
|
+
return pl_type
|
|
138
166
|
|
|
139
167
|
def update_type_from_polars_type(self, pl_type: PlType):
|
|
140
168
|
self.data_type = str(pl_type.pl_datatype.base_type())
|
|
@@ -142,3 +170,8 @@ class FlowfileColumn:
|
|
|
142
170
|
|
|
143
171
|
def convert_stats_to_column_info(stats: List[Dict]) -> List[FlowfileColumn]:
|
|
144
172
|
return [FlowfileColumn.create_from_polars_type(PlType(**c)) for c in stats]
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def convert_pl_schema_to_raw_data_format(pl_schema: pl.Schema) -> List[input_schema.MinimalFieldInfo]:
|
|
176
|
+
return [FlowfileColumn.create_from_polars_type(PlType(column_name=k, pl_datatype=v)).get_minimal_field_info()
|
|
177
|
+
for k, v in pl_schema.items()]
|
|
@@ -18,10 +18,45 @@ dtype_to_pl = {
|
|
|
18
18
|
'time': pl.Time,
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
+
|
|
22
|
+
def safe_eval_pl_type(type_string: str):
|
|
23
|
+
"""
|
|
24
|
+
Safely evaluate a Polars type string with restricted namespace.
|
|
25
|
+
Only allows Polars types and basic Python literals.
|
|
26
|
+
"""
|
|
27
|
+
# Define allowed names in the evaluation namespace
|
|
28
|
+
safe_dict = {
|
|
29
|
+
# Polars module and types
|
|
30
|
+
'pl': pl,
|
|
31
|
+
|
|
32
|
+
# Basic Python built-ins for literals
|
|
33
|
+
'int': int,
|
|
34
|
+
'str': str,
|
|
35
|
+
'float': float,
|
|
36
|
+
'bool': bool,
|
|
37
|
+
'list': list,
|
|
38
|
+
'dict': dict,
|
|
39
|
+
'tuple': tuple,
|
|
40
|
+
|
|
41
|
+
# Disable dangerous built-ins
|
|
42
|
+
'__builtins__': {},
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
return eval(type_string, safe_dict, {})
|
|
47
|
+
except Exception as e:
|
|
48
|
+
raise ValueError(f"Failed to safely evaluate type string '{type_string}': {e}")
|
|
49
|
+
|
|
50
|
+
|
|
21
51
|
dtype_to_pl_str = {k: v.__name__ for k, v in dtype_to_pl.items()}
|
|
22
52
|
|
|
23
53
|
|
|
24
|
-
def
|
|
54
|
+
def get_polars_type(dtype: str):
|
|
55
|
+
if 'pl.' in dtype:
|
|
56
|
+
try:
|
|
57
|
+
return safe_eval_pl_type(dtype)
|
|
58
|
+
except Exception as e:
|
|
59
|
+
return pl.String
|
|
25
60
|
pl_datetype = dtype_to_pl.get(dtype.lower())
|
|
26
61
|
if pl_datetype is not None:
|
|
27
62
|
return pl_datetype
|
|
@@ -31,6 +66,10 @@ def type_to_polars(dtype: str):
|
|
|
31
66
|
return pl.String
|
|
32
67
|
|
|
33
68
|
|
|
34
|
-
def
|
|
35
|
-
|
|
69
|
+
def cast_str_to_polars_type(dtype: str) -> pl.DataType:
|
|
70
|
+
pl_type = get_polars_type(dtype)
|
|
71
|
+
if hasattr(pl_type, '__call__'):
|
|
72
|
+
return pl_type()
|
|
73
|
+
else:
|
|
74
|
+
return pl_type
|
|
36
75
|
|
|
@@ -3,6 +3,7 @@ from typing import Dict, Any, Callable
|
|
|
3
3
|
import textwrap
|
|
4
4
|
import ast
|
|
5
5
|
import time
|
|
6
|
+
from io import BytesIO
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
def remove_comments_and_docstrings(source: str) -> str:
|
|
@@ -126,6 +127,37 @@ class PolarsCodeParser:
|
|
|
126
127
|
'col': pl.col,
|
|
127
128
|
'lit': pl.lit,
|
|
128
129
|
'expr': pl.expr,
|
|
130
|
+
|
|
131
|
+
# Polars datatypes - added directly
|
|
132
|
+
'Int8': pl.Int8,
|
|
133
|
+
'Int16': pl.Int16,
|
|
134
|
+
'Int32': pl.Int32,
|
|
135
|
+
'Int64': pl.Int64,
|
|
136
|
+
'Int128': pl.Int128,
|
|
137
|
+
'UInt8': pl.UInt8,
|
|
138
|
+
'UInt16': pl.UInt16,
|
|
139
|
+
'UInt32': pl.UInt32,
|
|
140
|
+
'UInt64': pl.UInt64,
|
|
141
|
+
'Float32': pl.Float32,
|
|
142
|
+
'Float64': pl.Float64,
|
|
143
|
+
'Boolean': pl.Boolean,
|
|
144
|
+
'String': pl.String,
|
|
145
|
+
'Utf8': pl.Utf8,
|
|
146
|
+
'Binary': pl.Binary,
|
|
147
|
+
'Null': pl.Null,
|
|
148
|
+
'List': pl.List,
|
|
149
|
+
'Array': pl.Array,
|
|
150
|
+
'Struct': pl.Struct,
|
|
151
|
+
'Object': pl.Object,
|
|
152
|
+
'Date': pl.Date,
|
|
153
|
+
'Time': pl.Time,
|
|
154
|
+
'Datetime': pl.Datetime,
|
|
155
|
+
'Duration': pl.Duration,
|
|
156
|
+
'Categorical': pl.Categorical,
|
|
157
|
+
'Decimal': pl.Decimal,
|
|
158
|
+
'Enum': pl.Enum,
|
|
159
|
+
'Unknown': pl.Unknown,
|
|
160
|
+
|
|
129
161
|
# Basic Python built-ins
|
|
130
162
|
'print': print,
|
|
131
163
|
'len': len,
|
|
@@ -142,7 +174,8 @@ class PolarsCodeParser:
|
|
|
142
174
|
'True': True,
|
|
143
175
|
'False': False,
|
|
144
176
|
'None': None,
|
|
145
|
-
'time': time
|
|
177
|
+
'time': time,
|
|
178
|
+
'BytesIO': BytesIO
|
|
146
179
|
}
|
|
147
180
|
|
|
148
181
|
@staticmethod
|
|
@@ -225,7 +258,6 @@ class PolarsCodeParser:
|
|
|
225
258
|
|
|
226
259
|
# Wrap the code in a function
|
|
227
260
|
wrapped_code = self._wrap_in_function(code, num_inputs)
|
|
228
|
-
|
|
229
261
|
try:
|
|
230
262
|
# Create namespace for execution
|
|
231
263
|
local_namespace: Dict[str, Any] = {}
|