Flowfile 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- build_backends/__init__.py +0 -0
- build_backends/main.py +313 -0
- build_backends/main_prd.py +202 -0
- flowfile/__init__.py +71 -0
- flowfile/__main__.py +24 -0
- flowfile-0.2.2.dist-info/LICENSE +21 -0
- flowfile-0.2.2.dist-info/METADATA +225 -0
- flowfile-0.2.2.dist-info/RECORD +171 -0
- flowfile-0.2.2.dist-info/WHEEL +4 -0
- flowfile-0.2.2.dist-info/entry_points.txt +9 -0
- flowfile_core/__init__.py +13 -0
- flowfile_core/auth/__init__.py +0 -0
- flowfile_core/auth/jwt.py +140 -0
- flowfile_core/auth/models.py +40 -0
- flowfile_core/auth/secrets.py +178 -0
- flowfile_core/configs/__init__.py +35 -0
- flowfile_core/configs/flow_logger.py +433 -0
- flowfile_core/configs/node_store/__init__.py +0 -0
- flowfile_core/configs/node_store/nodes.py +98 -0
- flowfile_core/configs/settings.py +120 -0
- flowfile_core/database/__init__.py +0 -0
- flowfile_core/database/connection.py +51 -0
- flowfile_core/database/init_db.py +45 -0
- flowfile_core/database/models.py +41 -0
- flowfile_core/fileExplorer/__init__.py +0 -0
- flowfile_core/fileExplorer/funcs.py +259 -0
- flowfile_core/fileExplorer/utils.py +53 -0
- flowfile_core/flowfile/FlowfileFlow.py +1403 -0
- flowfile_core/flowfile/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
- flowfile_core/flowfile/analytics/__init__.py +0 -0
- flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
- flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
- flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
- flowfile_core/flowfile/analytics/utils.py +9 -0
- flowfile_core/flowfile/connection_manager/__init__.py +3 -0
- flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
- flowfile_core/flowfile/connection_manager/models.py +10 -0
- flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
- flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
- flowfile_core/flowfile/database_connection_manager/models.py +15 -0
- flowfile_core/flowfile/extensions.py +36 -0
- flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
- flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
- flowfile_core/flowfile/flow_data_engine/types.py +0 -0
- flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
- flowfile_core/flowfile/flow_node/__init__.py +0 -0
- flowfile_core/flowfile/flow_node/flow_node.py +771 -0
- flowfile_core/flowfile/flow_node/models.py +111 -0
- flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
- flowfile_core/flowfile/handler.py +123 -0
- flowfile_core/flowfile/manage/__init__.py +0 -0
- flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
- flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
- flowfile_core/flowfile/manage/open_flowfile.py +136 -0
- flowfile_core/flowfile/setting_generator/__init__.py +2 -0
- flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
- flowfile_core/flowfile/setting_generator/settings.py +176 -0
- flowfile_core/flowfile/sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
- flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
- flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
- flowfile_core/flowfile/util/__init__.py +0 -0
- flowfile_core/flowfile/util/calculate_layout.py +137 -0
- flowfile_core/flowfile/util/execution_orderer.py +141 -0
- flowfile_core/flowfile/utils.py +106 -0
- flowfile_core/main.py +138 -0
- flowfile_core/routes/__init__.py +0 -0
- flowfile_core/routes/auth.py +34 -0
- flowfile_core/routes/logs.py +163 -0
- flowfile_core/routes/public.py +10 -0
- flowfile_core/routes/routes.py +601 -0
- flowfile_core/routes/secrets.py +85 -0
- flowfile_core/run_lock.py +11 -0
- flowfile_core/schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
- flowfile_core/schemas/defaults.py +9 -0
- flowfile_core/schemas/external_sources/__init__.py +0 -0
- flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
- flowfile_core/schemas/input_schema.py +477 -0
- flowfile_core/schemas/models.py +193 -0
- flowfile_core/schemas/output_model.py +115 -0
- flowfile_core/schemas/schemas.py +106 -0
- flowfile_core/schemas/transform_schema.py +569 -0
- flowfile_core/secrets/__init__.py +0 -0
- flowfile_core/secrets/secrets.py +64 -0
- flowfile_core/utils/__init__.py +0 -0
- flowfile_core/utils/arrow_reader.py +247 -0
- flowfile_core/utils/excel_file_manager.py +18 -0
- flowfile_core/utils/fileManager.py +45 -0
- flowfile_core/utils/fl_executor.py +38 -0
- flowfile_core/utils/utils.py +8 -0
- flowfile_frame/__init__.py +56 -0
- flowfile_frame/__main__.py +12 -0
- flowfile_frame/adapters.py +17 -0
- flowfile_frame/expr.py +1163 -0
- flowfile_frame/flow_frame.py +2093 -0
- flowfile_frame/group_frame.py +199 -0
- flowfile_frame/join.py +75 -0
- flowfile_frame/selectors.py +242 -0
- flowfile_frame/utils.py +184 -0
- flowfile_worker/__init__.py +55 -0
- flowfile_worker/configs.py +95 -0
- flowfile_worker/create/__init__.py +37 -0
- flowfile_worker/create/funcs.py +146 -0
- flowfile_worker/create/models.py +86 -0
- flowfile_worker/create/pl_types.py +35 -0
- flowfile_worker/create/read_excel_tables.py +110 -0
- flowfile_worker/create/utils.py +84 -0
- flowfile_worker/external_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
- flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
- flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- flowfile_worker/external_sources/sql_source/__init__.py +0 -0
- flowfile_worker/external_sources/sql_source/main.py +56 -0
- flowfile_worker/external_sources/sql_source/models.py +72 -0
- flowfile_worker/flow_logger.py +58 -0
- flowfile_worker/funcs.py +327 -0
- flowfile_worker/main.py +108 -0
- flowfile_worker/models.py +95 -0
- flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
- flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
- flowfile_worker/polars_fuzzy_match/models.py +36 -0
- flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
- flowfile_worker/polars_fuzzy_match/process.py +86 -0
- flowfile_worker/polars_fuzzy_match/utils.py +50 -0
- flowfile_worker/process_manager.py +36 -0
- flowfile_worker/routes.py +440 -0
- flowfile_worker/secrets.py +148 -0
- flowfile_worker/spawner.py +187 -0
- flowfile_worker/utils.py +25 -0
- test_utils/__init__.py +3 -0
- test_utils/postgres/__init__.py +1 -0
- test_utils/postgres/commands.py +109 -0
- test_utils/postgres/fixtures.py +417 -0
|
File without changes
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional, Literal
|
|
2
|
+
from pydantic import BaseModel, Field, model_validator
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class GeoRole(BaseModel):
|
|
6
|
+
# Placeholder for geo role specifics
|
|
7
|
+
role_type: str # Example attribute
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Expression(BaseModel):
|
|
11
|
+
# Placeholder for expression specifics
|
|
12
|
+
expression: str # Example attribute
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
AnalyticTypeLit = Literal['measure', 'dimension']
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class IField(BaseModel):
|
|
19
|
+
fid: str
|
|
20
|
+
name: str
|
|
21
|
+
basename: Optional[str] = None
|
|
22
|
+
semanticType: str
|
|
23
|
+
analyticType: AnalyticTypeLit
|
|
24
|
+
cmp: Optional[str] = None
|
|
25
|
+
geoRole: Optional[GeoRole] = None
|
|
26
|
+
computed: Optional[bool] = None
|
|
27
|
+
expression: Optional[str] = None
|
|
28
|
+
timeUnit: Optional[str] = None
|
|
29
|
+
path: Optional[List[str]] = None
|
|
30
|
+
offset: Optional[int] = None
|
|
31
|
+
aggName: Optional[str] = None
|
|
32
|
+
aggregated: Optional[bool] = None
|
|
33
|
+
|
|
34
|
+
@model_validator(mode='after')
|
|
35
|
+
def set_default_aggname(self):
|
|
36
|
+
if self.aggName is None and self.analyticType == 'measure':
|
|
37
|
+
self.aggName = "sum"
|
|
38
|
+
return self
|
|
39
|
+
|
|
40
|
+
def model_dump_dict(self):
|
|
41
|
+
d = self.model_dump(exclude_none=True)
|
|
42
|
+
d['offset'] = None
|
|
43
|
+
return d
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ViewField(IField):
|
|
47
|
+
sort: Optional[str] = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class FilterField(ViewField):
|
|
51
|
+
rule: Any
|
|
52
|
+
enableAgg: Optional[bool] = False
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class DraggableFieldState(BaseModel):
|
|
56
|
+
dimensions: List[ViewField]
|
|
57
|
+
measures: List[ViewField]
|
|
58
|
+
rows: List[ViewField]
|
|
59
|
+
columns: List[ViewField]
|
|
60
|
+
color: List[ViewField]
|
|
61
|
+
opacity: List[ViewField]
|
|
62
|
+
size: List[ViewField]
|
|
63
|
+
shape: List[ViewField]
|
|
64
|
+
theta: List[ViewField]
|
|
65
|
+
radius: List[ViewField]
|
|
66
|
+
longitude: List[ViewField]
|
|
67
|
+
latitude: List[ViewField]
|
|
68
|
+
geoId: List[ViewField]
|
|
69
|
+
details: List[ViewField]
|
|
70
|
+
filters: List[FilterField]
|
|
71
|
+
text: List[ViewField]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ConfigScale(BaseModel):
|
|
75
|
+
rangeMax: Optional[int]
|
|
76
|
+
rangeMin: Optional[int]
|
|
77
|
+
domainMin: Optional[int]
|
|
78
|
+
domainMax: Optional[int]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class MutField(BaseModel):
|
|
82
|
+
fid: str
|
|
83
|
+
key: Optional[str] = None
|
|
84
|
+
name: Optional[str] = None
|
|
85
|
+
basename: Optional[str] = None
|
|
86
|
+
disable: Optional[bool] = False
|
|
87
|
+
semanticType: str
|
|
88
|
+
analyticType: AnalyticTypeLit
|
|
89
|
+
path: Optional[List[str]] = None
|
|
90
|
+
offset: Optional[int] = None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class DataModel(BaseModel):
|
|
94
|
+
data: List[Dict[str, Any]]
|
|
95
|
+
fields: List[MutField]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class IVisualConfigNew (BaseModel):
|
|
99
|
+
defaultAggregated: bool
|
|
100
|
+
geoms: List[str]
|
|
101
|
+
coordSystem: Optional[str]
|
|
102
|
+
limit: int = None
|
|
103
|
+
folds: Optional[List[str]] = []
|
|
104
|
+
timezoneDisplayOffset: Optional[int] = None
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class Chart(BaseModel):
|
|
108
|
+
visId: str
|
|
109
|
+
name: Optional[str]
|
|
110
|
+
encodings: DraggableFieldState
|
|
111
|
+
config: IVisualConfigNew
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class GraphicWalkerInput (BaseModel):
|
|
115
|
+
dataModel: DataModel = Field(default_factory=lambda: DataModel(data=[], fields=[]))
|
|
116
|
+
is_initial: bool = True
|
|
117
|
+
specList: Optional[List[Any]] = None
|
|
118
|
+
|
|
File without changes
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from typing import TypeAlias, Optional, Dict, Any, Literal
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
config_options: TypeAlias = Literal["in_line", "key_vault"]
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AirbyteConfig(BaseModel):
|
|
9
|
+
source_name: str
|
|
10
|
+
selected_stream: Optional[str] = None
|
|
11
|
+
config_mode: config_options = "in_line"
|
|
12
|
+
mapped_config_spec: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
|
13
|
+
parsed_config: Optional[Any] = None
|
|
14
|
+
connection_name: Optional[str] = None
|
|
15
|
+
version: Optional[str] = None
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def full_source_name(self) -> str:
|
|
19
|
+
return f"source-{self.source_name}"
|
|
20
|
+
|
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
from typing import List, Optional, Literal
|
|
2
|
+
from flowfile_core.schemas import transform_schema
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import os
|
|
5
|
+
from flowfile_core.schemas.analysis_schemas import graphic_walker_schemas as gs_schemas
|
|
6
|
+
from flowfile_core.schemas.external_sources.airbyte_schemas import AirbyteConfig
|
|
7
|
+
from pydantic import BaseModel, Field, model_validator, SecretStr, ConfigDict
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
OutputConnectionClass = Literal['output-0', 'output-1', 'output-2', 'output-3', 'output-4',
|
|
11
|
+
'output-5', 'output-6', 'output-7', 'output-8', 'output-9']
|
|
12
|
+
|
|
13
|
+
InputConnectionClass = Literal['input-0', 'input-1', 'input-2', 'input-3', 'input-4',
|
|
14
|
+
'input-5', 'input-6', 'input-7', 'input-8', 'input-9']
|
|
15
|
+
|
|
16
|
+
InputType = Literal["main", "left", "right"]
|
|
17
|
+
|
|
18
|
+
class NewDirectory(BaseModel):
|
|
19
|
+
source_path: str
|
|
20
|
+
dir_name: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RemoveItem(BaseModel):
|
|
24
|
+
path: str
|
|
25
|
+
id: int = -1
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RemoveItemsInput(BaseModel):
|
|
29
|
+
paths: List[RemoveItem]
|
|
30
|
+
source_path: str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class MinimalFieldInfo(BaseModel):
|
|
34
|
+
name: str
|
|
35
|
+
data_type: str
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ReceivedTableBase(BaseModel):
|
|
39
|
+
id: Optional[int] = None
|
|
40
|
+
name: Optional[str]
|
|
41
|
+
path: str # This can be an absolute or relative path
|
|
42
|
+
directory: Optional[str] = None
|
|
43
|
+
analysis_file_available: bool = False
|
|
44
|
+
status: Optional[str] = None
|
|
45
|
+
file_type: Optional[str] = None
|
|
46
|
+
fields: List[MinimalFieldInfo] = Field(default_factory=list)
|
|
47
|
+
abs_file_path: Optional[str] = None
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def create_from_path(cls, path: str):
|
|
51
|
+
filename = Path(path).name
|
|
52
|
+
# Create an instance of ReceivedTableBase with the extracted filename and path
|
|
53
|
+
return cls(name=filename, path=path)
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def file_path(self) -> str:
|
|
57
|
+
if not self.name in self.path:
|
|
58
|
+
return os.path.join(self.path, self.name)
|
|
59
|
+
else:
|
|
60
|
+
return self.path
|
|
61
|
+
|
|
62
|
+
def set_absolute_filepath(self):
|
|
63
|
+
base_path = Path(self.path)
|
|
64
|
+
# Check if the path is relative, resolve it with the current working directory
|
|
65
|
+
if not base_path.is_absolute():
|
|
66
|
+
base_path = Path.cwd() / base_path
|
|
67
|
+
|
|
68
|
+
if self.name and self.name not in base_path.name:
|
|
69
|
+
base_path = base_path / self.name
|
|
70
|
+
|
|
71
|
+
self.abs_file_path = str(base_path.resolve())
|
|
72
|
+
|
|
73
|
+
@model_validator(mode='after')
|
|
74
|
+
def populate_abs_file_path(self):
|
|
75
|
+
if not self.abs_file_path:
|
|
76
|
+
self.set_absolute_filepath()
|
|
77
|
+
return self
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class ReceivedCsvTable(ReceivedTableBase):
|
|
81
|
+
file_type: str = 'csv'
|
|
82
|
+
reference: str = ''
|
|
83
|
+
starting_from_line: int = 0
|
|
84
|
+
delimiter: str = ','
|
|
85
|
+
has_headers: bool = True
|
|
86
|
+
encoding: Optional[str] = 'utf-8'
|
|
87
|
+
parquet_ref: Optional[str] = None
|
|
88
|
+
row_delimiter: str = '\n'
|
|
89
|
+
quote_char: str = '"'
|
|
90
|
+
infer_schema_length: int = 10_000
|
|
91
|
+
truncate_ragged_lines: bool = False
|
|
92
|
+
ignore_errors: bool = False
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class ReceivedJsonTable(ReceivedCsvTable):
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class ReceivedParquetTable(BaseModel):
|
|
100
|
+
file_type: str = 'parquet'
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class ReceivedExcelTable(ReceivedTableBase):
|
|
104
|
+
sheet_name: Optional[str] = None
|
|
105
|
+
start_row: int = 0 # optional
|
|
106
|
+
start_column: int = 0 # optional
|
|
107
|
+
end_row: int = 0 # optional
|
|
108
|
+
end_column: int = 0 # optional
|
|
109
|
+
has_headers: bool = True # optional
|
|
110
|
+
type_inference: bool = False # optional
|
|
111
|
+
|
|
112
|
+
def validate_range_values(self):
|
|
113
|
+
# Validate that start and end rows/columns are non-negative integers
|
|
114
|
+
for attribute in [self.start_row, self.start_column, self.end_row, self.end_column]:
|
|
115
|
+
if not isinstance(attribute, int) or attribute < 0:
|
|
116
|
+
raise ValueError("Row and column indices must be non-negative integers")
|
|
117
|
+
|
|
118
|
+
# Validate that start is before end if end is specified (non-zero)
|
|
119
|
+
if (self.end_row > 0 and self.start_row > self.end_row) or \
|
|
120
|
+
(self.end_column > 0 and self.start_column > self.end_column):
|
|
121
|
+
raise ValueError("Start row/column must not be greater than end row/column if specified")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class ReceivedTable(ReceivedExcelTable, ReceivedCsvTable, ReceivedParquetTable):
|
|
125
|
+
...
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class OutputCsvTable(BaseModel):
|
|
129
|
+
file_type: str = 'csv'
|
|
130
|
+
delimiter: str = ','
|
|
131
|
+
encoding: str = 'utf-8'
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class OutputParquetTable(BaseModel):
|
|
135
|
+
file_type: str = 'parquet'
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class OutputExcelTable(BaseModel):
|
|
139
|
+
file_type: str = 'excel'
|
|
140
|
+
sheet_name: str = 'Sheet1'
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class OutputSettings(BaseModel):
|
|
144
|
+
name: str
|
|
145
|
+
directory: str
|
|
146
|
+
file_type: str
|
|
147
|
+
fields: Optional[List[str]] = Field(default_factory=list)
|
|
148
|
+
write_mode: str = 'overwrite'
|
|
149
|
+
output_csv_table: OutputCsvTable
|
|
150
|
+
output_parquet_table: OutputParquetTable
|
|
151
|
+
output_excel_table: OutputExcelTable
|
|
152
|
+
abs_file_path: Optional[str] = None
|
|
153
|
+
|
|
154
|
+
def set_absolute_filepath(self):
|
|
155
|
+
base_path = Path(self.directory)
|
|
156
|
+
# Check if the path is relative, resolve it with the current working directory
|
|
157
|
+
if not base_path.is_absolute():
|
|
158
|
+
base_path = Path.cwd() / base_path
|
|
159
|
+
|
|
160
|
+
if self.name and self.name not in base_path.name:
|
|
161
|
+
base_path = base_path / self.name
|
|
162
|
+
|
|
163
|
+
self.abs_file_path = str(base_path.resolve())
|
|
164
|
+
|
|
165
|
+
@model_validator(mode='after')
|
|
166
|
+
def populate_abs_file_path(self):
|
|
167
|
+
if not self.abs_file_path:
|
|
168
|
+
self.set_absolute_filepath()
|
|
169
|
+
return self
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class NodeBase(BaseModel):
|
|
173
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
174
|
+
flow_id: int
|
|
175
|
+
node_id: int
|
|
176
|
+
cache_results: Optional[bool] = False
|
|
177
|
+
pos_x: Optional[float] = 0
|
|
178
|
+
pos_y: Optional[float] = 0
|
|
179
|
+
is_setup: Optional[bool] = True
|
|
180
|
+
description: Optional[str] = ''
|
|
181
|
+
user_id: Optional[int] = None
|
|
182
|
+
|
|
183
|
+
@classmethod
|
|
184
|
+
def overridden_hash(cls):
|
|
185
|
+
if getattr(cls, '__hash__'):
|
|
186
|
+
return BaseModel.__hash__ is not getattr(cls, '__hash__')
|
|
187
|
+
return False
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class NodeSingleInput(NodeBase):
|
|
191
|
+
depending_on_id: Optional[int] = -1
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class NodeMultiInput(NodeBase):
|
|
195
|
+
depending_on_ids: Optional[List[int]] = [-1]
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class NodeSelect(NodeSingleInput):
|
|
199
|
+
keep_missing: bool = True
|
|
200
|
+
select_input: List[transform_schema.SelectInput] = Field(default_factory=list)
|
|
201
|
+
sorted_by: Optional[Literal['none', 'asc', 'desc']] = 'none'
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class NodeFilter(NodeSingleInput):
|
|
205
|
+
filter_input: transform_schema.FilterInput
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class NodeSort(NodeSingleInput):
|
|
209
|
+
sort_input: List[transform_schema.SortByInput] = Field(default_factory=list)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class NodeTextToRows(NodeSingleInput):
|
|
213
|
+
text_to_rows_input: transform_schema.TextToRowsInput
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class NodeSample(NodeSingleInput):
|
|
217
|
+
sample_size: int = 1000
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class NodeRecordId(NodeSingleInput):
|
|
221
|
+
record_id_input: transform_schema.RecordIdInput
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class NodeJoin(NodeMultiInput):
|
|
225
|
+
auto_generate_selection: bool = True
|
|
226
|
+
verify_integrity: bool = True
|
|
227
|
+
join_input: transform_schema.JoinInput
|
|
228
|
+
auto_keep_all: bool = True
|
|
229
|
+
auto_keep_right: bool = True
|
|
230
|
+
auto_keep_left: bool = True
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class NodeCrossJoin(NodeMultiInput):
|
|
234
|
+
auto_generate_selection: bool = True
|
|
235
|
+
verify_integrity: bool = True
|
|
236
|
+
cross_join_input: transform_schema.CrossJoinInput
|
|
237
|
+
auto_keep_all: bool = True
|
|
238
|
+
auto_keep_right: bool = True
|
|
239
|
+
auto_keep_left: bool = True
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class NodeFuzzyMatch(NodeJoin):
|
|
243
|
+
join_input: transform_schema.FuzzyMatchInput
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
class NodeDatasource(NodeBase):
|
|
247
|
+
file_ref: str = None
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class NodeManualInput(NodeBase):
|
|
251
|
+
raw_data: List = None
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class NodeRead(NodeBase):
|
|
255
|
+
received_file: ReceivedTable
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
class DatabaseConnection(BaseModel):
|
|
259
|
+
database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
|
|
260
|
+
username: Optional[str] = None
|
|
261
|
+
password_ref: Optional[str] = None
|
|
262
|
+
host: Optional[str] = None
|
|
263
|
+
port: Optional[int] = None
|
|
264
|
+
database: Optional[str] = None
|
|
265
|
+
url: Optional[str] = None
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
class FullDatabaseConnection(BaseModel):
|
|
269
|
+
connection_name: str
|
|
270
|
+
database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
|
|
271
|
+
username: str
|
|
272
|
+
password: SecretStr
|
|
273
|
+
host: Optional[str] = None
|
|
274
|
+
port: Optional[int] = None
|
|
275
|
+
database: Optional[str] = None
|
|
276
|
+
ssl_enabled: Optional[bool] = False
|
|
277
|
+
url: Optional[str] = None
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class FullDatabaseConnectionInterface(BaseModel):
|
|
281
|
+
connection_name: str
|
|
282
|
+
database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
|
|
283
|
+
username: str
|
|
284
|
+
host: Optional[str] = None
|
|
285
|
+
port: Optional[int] = None
|
|
286
|
+
database: Optional[str] = None
|
|
287
|
+
ssl_enabled: Optional[bool] = False
|
|
288
|
+
url: Optional[str] = None
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
class DatabaseSettings(BaseModel):
|
|
292
|
+
connection_mode: Optional[Literal['inline', 'reference']] = 'inline'
|
|
293
|
+
database_connection: Optional[DatabaseConnection] = None
|
|
294
|
+
database_connection_name: Optional[str] = None
|
|
295
|
+
schema_name: Optional[str] = None
|
|
296
|
+
table_name: Optional[str] = None
|
|
297
|
+
query: Optional[str] = None
|
|
298
|
+
query_mode: Literal['query', 'table', 'reference'] = 'table'
|
|
299
|
+
|
|
300
|
+
@model_validator(mode='after')
|
|
301
|
+
def validate_table_or_query(self):
|
|
302
|
+
if (not self.table_name and not self.query) and self.query_mode == 'inline':
|
|
303
|
+
raise ValueError("Either 'table' or 'query' must be provided")
|
|
304
|
+
return self
|
|
305
|
+
|
|
306
|
+
@model_validator(mode='after')
|
|
307
|
+
def validate_table_or_query(self):
|
|
308
|
+
# Validate that either table_name or query is provided
|
|
309
|
+
if (not self.table_name and not self.query) and self.query_mode == 'inline':
|
|
310
|
+
raise ValueError("Either 'table_name' or 'query' must be provided")
|
|
311
|
+
|
|
312
|
+
# Validate correct connection information based on connection_mode
|
|
313
|
+
if self.connection_mode == 'inline' and self.database_connection is None:
|
|
314
|
+
raise ValueError("When 'connection_mode' is 'inline', 'database_connection' must be provided")
|
|
315
|
+
|
|
316
|
+
if self.connection_mode == 'reference' and not self.database_connection_name:
|
|
317
|
+
raise ValueError("When 'connection_mode' is 'reference', 'database_connection_name' must be provided")
|
|
318
|
+
|
|
319
|
+
return self
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
class DatabaseWriteSettings(BaseModel):
|
|
323
|
+
connection_mode: Optional[Literal['inline', 'reference']] = 'inline'
|
|
324
|
+
database_connection: Optional[DatabaseConnection] = None
|
|
325
|
+
database_connection_name: Optional[str] = None
|
|
326
|
+
table_name: str
|
|
327
|
+
schema_name: Optional[str] = None
|
|
328
|
+
if_exists: Optional[Literal['append', 'replace', 'fail']] = 'append'
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class NodeDatabaseReader(NodeBase):
|
|
332
|
+
database_settings: DatabaseSettings
|
|
333
|
+
fields: Optional[List[MinimalFieldInfo]] = None
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class NodeDatabaseWriter(NodeSingleInput):
|
|
337
|
+
database_write_settings: DatabaseWriteSettings
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
class ExternalSource(BaseModel):
|
|
341
|
+
orientation: str = 'row'
|
|
342
|
+
fields: Optional[List[MinimalFieldInfo]] = None
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
class SampleUsers(ExternalSource):
|
|
346
|
+
SAMPLE_USERS: bool
|
|
347
|
+
class_name: str = "sample_users"
|
|
348
|
+
size: int = 100
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
class GoogleSheet(ExternalSource):
|
|
352
|
+
GOOGLE_SHEET: bool
|
|
353
|
+
class_name: str = "google_sheet"
|
|
354
|
+
access_token: SecretStr = None
|
|
355
|
+
sheet_id: str
|
|
356
|
+
worksheet_name: str
|
|
357
|
+
sheet_name: str
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
class AirbyteReader(AirbyteConfig):
|
|
361
|
+
class_name: Optional[str] = "airbyte_reader"
|
|
362
|
+
fields: Optional[List[MinimalFieldInfo]] = None
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
class AccessToken(BaseModel):
|
|
366
|
+
user_id: str
|
|
367
|
+
access_token: SecretStr = None
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
class NodeExternalSource(NodeBase):
|
|
371
|
+
identifier: str
|
|
372
|
+
source_settings: SampleUsers | GoogleSheet
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
class NodeAirbyteReader(NodeExternalSource):
|
|
376
|
+
identifier: str = 'airbyte'
|
|
377
|
+
source_settings: AirbyteReader
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
class NodeFormula(NodeSingleInput):
|
|
381
|
+
function: transform_schema.FunctionInput = None
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
class NodeGroupBy(NodeSingleInput):
|
|
385
|
+
groupby_input: transform_schema.GroupByInput = None
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
class NodePromise(NodeBase):
|
|
389
|
+
is_setup: bool = False
|
|
390
|
+
node_type: str
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
class NodeInputConnection(BaseModel):
|
|
394
|
+
node_id: int
|
|
395
|
+
connection_class: InputConnectionClass
|
|
396
|
+
|
|
397
|
+
def get_node_input_connection_type(self) -> Literal['main', 'right', 'left']:
|
|
398
|
+
match self.connection_class:
|
|
399
|
+
case 'input-0':
|
|
400
|
+
return 'main'
|
|
401
|
+
case 'input-1':
|
|
402
|
+
return 'right'
|
|
403
|
+
case 'input-2':
|
|
404
|
+
return 'left'
|
|
405
|
+
case _:
|
|
406
|
+
raise ValueError(f"Unexpected connection_class: {self.connection_class}")
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
class NodePivot(NodeSingleInput):
|
|
410
|
+
pivot_input: transform_schema.PivotInput = None
|
|
411
|
+
output_fields: Optional[List[MinimalFieldInfo]] = None
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
class NodeUnpivot(NodeSingleInput):
|
|
415
|
+
unpivot_input: transform_schema.UnpivotInput = None
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
class NodeUnion(NodeMultiInput):
|
|
419
|
+
union_input: transform_schema.UnionInput = Field(default_factory=transform_schema.UnionInput)
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
class NodeOutput(NodeSingleInput):
|
|
423
|
+
output_settings: OutputSettings
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
class NodeOutputConnection(BaseModel):
|
|
427
|
+
node_id: int
|
|
428
|
+
connection_class: OutputConnectionClass
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
class NodeConnection(BaseModel):
|
|
432
|
+
input_connection: NodeInputConnection
|
|
433
|
+
output_connection: NodeOutputConnection
|
|
434
|
+
|
|
435
|
+
@classmethod
|
|
436
|
+
def create_from_simple_input(cls, from_id: int, to_id: int, input_type: InputType = "input-0"):
|
|
437
|
+
|
|
438
|
+
match input_type:
|
|
439
|
+
case "main":
|
|
440
|
+
connection_class: InputConnectionClass = "input-0"
|
|
441
|
+
case "right":
|
|
442
|
+
connection_class: InputConnectionClass = "input-1"
|
|
443
|
+
case "left":
|
|
444
|
+
connection_class: InputConnectionClass = "input-2"
|
|
445
|
+
case _:
|
|
446
|
+
connection_class: InputConnectionClass = "input-0"
|
|
447
|
+
node_input = NodeInputConnection(node_id=to_id, connection_class=connection_class)
|
|
448
|
+
node_output = NodeOutputConnection(node_id=from_id, connection_class='output-0')
|
|
449
|
+
return cls(input_connection=node_input, output_connection=node_output)
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
class NodeDescription(BaseModel):
|
|
453
|
+
description: str = ''
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
class NodeExploreData(NodeBase):
|
|
457
|
+
graphic_walker_input: Optional[gs_schemas.GraphicWalkerInput] = None
|
|
458
|
+
_hash_overrule: int = 0
|
|
459
|
+
|
|
460
|
+
def __hash__(self):
|
|
461
|
+
return 0
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
class NodeGraphSolver(NodeSingleInput):
|
|
465
|
+
graph_solver_input: transform_schema.GraphSolverInput
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
class NodeUnique(NodeSingleInput):
|
|
469
|
+
unique_input: transform_schema.UniqueInput
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
class NodeRecordCount(NodeSingleInput):
|
|
473
|
+
pass
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
class NodePolarsCode(NodeMultiInput):
|
|
477
|
+
polars_code_input: transform_schema.PolarsCodeInput
|