Flowfile 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +27 -6
- flowfile/api.py +1 -0
- flowfile/web/__init__.py +2 -2
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
- flowfile/web/static/assets/CloudConnectionManager-c20a740f.js +783 -0
- flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
- flowfile/web/static/assets/CloudStorageReader-960b400a.js +437 -0
- flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
- flowfile/web/static/assets/CloudStorageWriter-e3decbdd.js +430 -0
- flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-d67e2405.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-9ea35e84.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-9578bfa5.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-19531098.js} +9 -9
- flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-40476474.js} +47141 -43697
- flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-2297ef96.js} +6 -6
- flowfile/web/static/assets/{Filter-f87bb897.js → Filter-f211c03a.js} +8 -8
- flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
- flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-4207ea31.js} +75 -9
- flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-bf120df0.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-5bb7497a.js} +5 -5
- flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-92c81b65.js} +6 -6
- flowfile/web/static/assets/{Join-eec38203.js → Join-4e49a274.js} +23 -15
- flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
- flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-90998ae8.js} +106 -34
- flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
- flowfile/web/static/assets/{Output-3b2ca045.js → Output-81e3e917.js} +4 -4
- flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-a3419842.js} +6 -6
- flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-72710deb.js} +6 -6
- flowfile/web/static/assets/{Read-07acdc9a.js → Read-c4059daf.js} +6 -6
- flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-c2b5e095.js} +5 -5
- flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-10baf191.js} +6 -6
- flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-3ed9a0ae.js} +5 -5
- flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-0d49c0e8.js} +2 -2
- flowfile/web/static/assets/{Select-32b28406.js → Select-8a02a0b3.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-4c0f45f5.js} +1 -1
- flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-f55c9f9d.js} +6 -6
- flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-5dbc2145.js} +8 -8
- flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-a1768e52.js} +2 -2
- flowfile/web/static/assets/{Union-39eecc6c.js → Union-f2aefdc9.js} +5 -5
- flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-46b250da.js} +8 -8
- flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-25ac84cc.js} +5 -5
- flowfile/web/static/assets/api-6ef0dcef.js +80 -0
- flowfile/web/static/assets/{api-44ca9e9c.js → api-a0abbdc7.js} +1 -1
- flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
- flowfile/web/static/assets/{designer-267d44f1.js → designer-13eabd83.js} +36 -34
- flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-b87e7f6f.js} +1 -1
- flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-13564764.js} +1 -1
- flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-fd2cd6f9.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-71e11604.js} +3 -3
- flowfile/web/static/assets/{index-e235a8bc.js → index-f6c15e76.js} +59 -22
- flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-988d9efe.js} +3 -3
- flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-dd636aa2.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-af36165e.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-2847001e.js} +2 -1
- flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-0371da73.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/METADATA +9 -4
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/RECORD +131 -124
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +2 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/auth/jwt.py +39 -0
- flowfile_core/configs/node_store/nodes.py +9 -6
- flowfile_core/configs/settings.py +6 -5
- flowfile_core/database/connection.py +63 -15
- flowfile_core/database/init_db.py +0 -1
- flowfile_core/database/models.py +49 -2
- flowfile_core/flowfile/code_generator/code_generator.py +472 -17
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
- flowfile_core/flowfile/extensions.py +1 -1
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1062 -311
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
- flowfile_core/flowfile/flow_graph.py +718 -253
- flowfile_core/flowfile/flow_graph_utils.py +2 -2
- flowfile_core/flowfile/flow_node/flow_node.py +563 -117
- flowfile_core/flowfile/flow_node/models.py +154 -20
- flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
- flowfile_core/flowfile/handler.py +2 -33
- flowfile_core/flowfile/manage/open_flowfile.py +1 -2
- flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
- flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
- flowfile_core/flowfile/util/calculate_layout.py +0 -2
- flowfile_core/flowfile/utils.py +35 -26
- flowfile_core/main.py +35 -15
- flowfile_core/routes/cloud_connections.py +77 -0
- flowfile_core/routes/logs.py +2 -7
- flowfile_core/routes/public.py +1 -0
- flowfile_core/routes/routes.py +130 -90
- flowfile_core/routes/secrets.py +72 -14
- flowfile_core/schemas/__init__.py +8 -0
- flowfile_core/schemas/cloud_storage_schemas.py +215 -0
- flowfile_core/schemas/input_schema.py +121 -71
- flowfile_core/schemas/output_model.py +19 -3
- flowfile_core/schemas/schemas.py +150 -12
- flowfile_core/schemas/transform_schema.py +175 -35
- flowfile_core/utils/utils.py +40 -1
- flowfile_core/utils/validate_setup.py +41 -0
- flowfile_frame/__init__.py +9 -1
- flowfile_frame/cloud_storage/frame_helpers.py +39 -0
- flowfile_frame/cloud_storage/secret_manager.py +73 -0
- flowfile_frame/expr.py +28 -1
- flowfile_frame/expr.pyi +76 -61
- flowfile_frame/flow_frame.py +481 -208
- flowfile_frame/flow_frame.pyi +140 -91
- flowfile_frame/flow_frame_methods.py +160 -22
- flowfile_frame/group_frame.py +3 -0
- flowfile_frame/utils.py +25 -3
- flowfile_worker/external_sources/s3_source/main.py +216 -0
- flowfile_worker/external_sources/s3_source/models.py +142 -0
- flowfile_worker/funcs.py +51 -6
- flowfile_worker/models.py +22 -2
- flowfile_worker/routes.py +40 -38
- flowfile_worker/utils.py +1 -1
- test_utils/s3/commands.py +46 -0
- test_utils/s3/data_generator.py +292 -0
- test_utils/s3/demo_data_generator.py +186 -0
- test_utils/s3/fixtures.py +214 -0
- flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
- flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
- flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
- flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
- flowfile_core/schemas/defaults.py +0 -9
- flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
- flowfile_core/schemas/models.py +0 -193
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
- flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
- flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
- {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_frame/cloud_storage}/__init__.py +0 -0
- {flowfile_core/schemas/external_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
- {flowfile_worker/external_sources/airbyte_sources → test_utils/s3}/__init__.py +0 -0
|
@@ -3,8 +3,11 @@ from flowfile_core.schemas import transform_schema
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
import os
|
|
5
5
|
from flowfile_core.schemas.analysis_schemas import graphic_walker_schemas as gs_schemas
|
|
6
|
-
from flowfile_core.schemas.
|
|
6
|
+
from flowfile_core.schemas.cloud_storage_schemas import CloudStorageReadSettings, CloudStorageWriteSettings
|
|
7
|
+
from flowfile_core.schemas.schemas import SecretRef
|
|
8
|
+
from flowfile_core.utils.utils import ensure_similarity_dicts, standardize_col_dtype
|
|
7
9
|
from pydantic import BaseModel, Field, model_validator, SecretStr, ConfigDict
|
|
10
|
+
import polars as pl
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
OutputConnectionClass = Literal['output-0', 'output-1', 'output-2', 'output-3', 'output-4',
|
|
@@ -17,26 +20,31 @@ InputType = Literal["main", "left", "right"]
|
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
class NewDirectory(BaseModel):
|
|
23
|
+
"""Defines the information required to create a new directory."""
|
|
20
24
|
source_path: str
|
|
21
25
|
dir_name: str
|
|
22
26
|
|
|
23
27
|
|
|
24
28
|
class RemoveItem(BaseModel):
|
|
29
|
+
"""Represents a single item to be removed from a directory or list."""
|
|
25
30
|
path: str
|
|
26
31
|
id: int = -1
|
|
27
32
|
|
|
28
33
|
|
|
29
34
|
class RemoveItemsInput(BaseModel):
|
|
35
|
+
"""Defines a list of items to be removed."""
|
|
30
36
|
paths: List[RemoveItem]
|
|
31
37
|
source_path: str
|
|
32
38
|
|
|
33
39
|
|
|
34
40
|
class MinimalFieldInfo(BaseModel):
|
|
41
|
+
"""Represents the most basic information about a data field (column)."""
|
|
35
42
|
name: str
|
|
36
|
-
data_type: str
|
|
43
|
+
data_type: str = "String"
|
|
37
44
|
|
|
38
45
|
|
|
39
46
|
class ReceivedTableBase(BaseModel):
|
|
47
|
+
"""Base model for defining a table received from an external source."""
|
|
40
48
|
id: Optional[int] = None
|
|
41
49
|
name: Optional[str]
|
|
42
50
|
path: str # This can be an absolute or relative path
|
|
@@ -49,36 +57,37 @@ class ReceivedTableBase(BaseModel):
|
|
|
49
57
|
|
|
50
58
|
@classmethod
|
|
51
59
|
def create_from_path(cls, path: str):
|
|
60
|
+
"""Creates an instance from a file path string."""
|
|
52
61
|
filename = Path(path).name
|
|
53
|
-
# Create an instance of ReceivedTableBase with the extracted filename and path
|
|
54
62
|
return cls(name=filename, path=path)
|
|
55
63
|
|
|
56
64
|
@property
|
|
57
65
|
def file_path(self) -> str:
|
|
66
|
+
"""Constructs the full file path from the directory and name."""
|
|
58
67
|
if not self.name in self.path:
|
|
59
68
|
return os.path.join(self.path, self.name)
|
|
60
69
|
else:
|
|
61
70
|
return self.path
|
|
62
71
|
|
|
63
72
|
def set_absolute_filepath(self):
|
|
73
|
+
"""Resolves the path to an absolute file path."""
|
|
64
74
|
base_path = Path(self.path).expanduser()
|
|
65
|
-
# Check if the path is relative, resolve it with the current working directory
|
|
66
75
|
if not base_path.is_absolute():
|
|
67
76
|
base_path = Path.cwd() / base_path
|
|
68
|
-
|
|
69
77
|
if self.name and self.name not in base_path.name:
|
|
70
78
|
base_path = base_path / self.name
|
|
71
|
-
|
|
72
79
|
self.abs_file_path = str(base_path.resolve())
|
|
73
80
|
|
|
74
81
|
@model_validator(mode='after')
|
|
75
82
|
def populate_abs_file_path(self):
|
|
83
|
+
"""Ensures the absolute file path is populated after validation."""
|
|
76
84
|
if not self.abs_file_path:
|
|
77
85
|
self.set_absolute_filepath()
|
|
78
86
|
return self
|
|
79
87
|
|
|
80
88
|
|
|
81
89
|
class ReceivedCsvTable(ReceivedTableBase):
|
|
90
|
+
"""Defines settings for reading a CSV file."""
|
|
82
91
|
file_type: str = 'csv'
|
|
83
92
|
reference: str = ''
|
|
84
93
|
starting_from_line: int = 0
|
|
@@ -94,82 +103,88 @@ class ReceivedCsvTable(ReceivedTableBase):
|
|
|
94
103
|
|
|
95
104
|
|
|
96
105
|
class ReceivedJsonTable(ReceivedCsvTable):
|
|
106
|
+
"""Defines settings for reading a JSON file (inherits from CSV settings)."""
|
|
97
107
|
pass
|
|
98
108
|
|
|
99
109
|
|
|
100
110
|
class ReceivedParquetTable(ReceivedTableBase):
|
|
111
|
+
"""Defines settings for reading a Parquet file."""
|
|
101
112
|
file_type: str = 'parquet'
|
|
102
113
|
|
|
103
114
|
|
|
104
115
|
class ReceivedExcelTable(ReceivedTableBase):
|
|
116
|
+
"""Defines settings for reading an Excel file."""
|
|
105
117
|
sheet_name: Optional[str] = None
|
|
106
|
-
start_row: int = 0
|
|
107
|
-
start_column: int = 0
|
|
108
|
-
end_row: int = 0
|
|
109
|
-
end_column: int = 0
|
|
110
|
-
has_headers: bool = True
|
|
111
|
-
type_inference: bool = False
|
|
118
|
+
start_row: int = 0
|
|
119
|
+
start_column: int = 0
|
|
120
|
+
end_row: int = 0
|
|
121
|
+
end_column: int = 0
|
|
122
|
+
has_headers: bool = True
|
|
123
|
+
type_inference: bool = False
|
|
112
124
|
|
|
113
125
|
def validate_range_values(self):
|
|
114
|
-
|
|
126
|
+
"""Validates that the Excel cell range is logical."""
|
|
115
127
|
for attribute in [self.start_row, self.start_column, self.end_row, self.end_column]:
|
|
116
128
|
if not isinstance(attribute, int) or attribute < 0:
|
|
117
129
|
raise ValueError("Row and column indices must be non-negative integers")
|
|
118
|
-
|
|
119
|
-
# Validate that start is before end if end is specified (non-zero)
|
|
120
130
|
if (self.end_row > 0 and self.start_row > self.end_row) or \
|
|
121
|
-
|
|
122
|
-
raise ValueError("Start row/column must not be greater than end row/column
|
|
131
|
+
(self.end_column > 0 and self.start_column > self.end_column):
|
|
132
|
+
raise ValueError("Start row/column must not be greater than end row/column")
|
|
123
133
|
|
|
124
134
|
|
|
125
135
|
class ReceivedTable(ReceivedExcelTable, ReceivedCsvTable, ReceivedParquetTable):
|
|
136
|
+
"""A comprehensive model that can represent any type of received table."""
|
|
126
137
|
...
|
|
127
138
|
|
|
128
139
|
|
|
129
140
|
class OutputCsvTable(BaseModel):
|
|
141
|
+
"""Defines settings for writing a CSV file."""
|
|
130
142
|
file_type: str = 'csv'
|
|
131
143
|
delimiter: str = ','
|
|
132
144
|
encoding: str = 'utf-8'
|
|
133
145
|
|
|
134
146
|
|
|
135
147
|
class OutputParquetTable(BaseModel):
|
|
148
|
+
"""Defines settings for writing a Parquet file."""
|
|
136
149
|
file_type: str = 'parquet'
|
|
137
150
|
|
|
138
151
|
|
|
139
152
|
class OutputExcelTable(BaseModel):
|
|
153
|
+
"""Defines settings for writing an Excel file."""
|
|
140
154
|
file_type: str = 'excel'
|
|
141
155
|
sheet_name: str = 'Sheet1'
|
|
142
156
|
|
|
143
157
|
|
|
144
158
|
class OutputSettings(BaseModel):
|
|
159
|
+
"""Defines the complete settings for an output node."""
|
|
145
160
|
name: str
|
|
146
161
|
directory: str
|
|
147
162
|
file_type: str
|
|
148
163
|
fields: Optional[List[str]] = Field(default_factory=list)
|
|
149
164
|
write_mode: str = 'overwrite'
|
|
150
|
-
output_csv_table: OutputCsvTable
|
|
151
|
-
output_parquet_table: OutputParquetTable
|
|
152
|
-
output_excel_table: OutputExcelTable
|
|
165
|
+
output_csv_table: Optional[OutputCsvTable] = Field(default_factory=OutputCsvTable)
|
|
166
|
+
output_parquet_table: OutputParquetTable = Field(default_factory=OutputParquetTable)
|
|
167
|
+
output_excel_table: OutputExcelTable = Field(default_factory=OutputExcelTable)
|
|
153
168
|
abs_file_path: Optional[str] = None
|
|
154
169
|
|
|
155
170
|
def set_absolute_filepath(self):
|
|
171
|
+
"""Resolves the output directory and name into an absolute path."""
|
|
156
172
|
base_path = Path(self.directory)
|
|
157
|
-
|
|
158
173
|
if not base_path.is_absolute():
|
|
159
174
|
base_path = Path.cwd() / base_path
|
|
160
|
-
|
|
161
175
|
if self.name and self.name not in base_path.name:
|
|
162
176
|
base_path = base_path / self.name
|
|
163
|
-
|
|
164
177
|
self.abs_file_path = str(base_path.resolve())
|
|
165
178
|
|
|
166
179
|
@model_validator(mode='after')
|
|
167
180
|
def populate_abs_file_path(self):
|
|
181
|
+
"""Ensures the absolute file path is populated after validation."""
|
|
168
182
|
self.set_absolute_filepath()
|
|
169
183
|
return self
|
|
170
184
|
|
|
171
185
|
|
|
172
186
|
class NodeBase(BaseModel):
|
|
187
|
+
"""Base model for all nodes in a FlowGraph. Contains common metadata."""
|
|
173
188
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
174
189
|
flow_id: int
|
|
175
190
|
node_id: int
|
|
@@ -181,48 +196,51 @@ class NodeBase(BaseModel):
|
|
|
181
196
|
user_id: Optional[int] = None
|
|
182
197
|
is_flow_output: Optional[bool] = False
|
|
183
198
|
|
|
184
|
-
@classmethod
|
|
185
|
-
def overridden_hash(cls):
|
|
186
|
-
if getattr(cls, '__hash__'):
|
|
187
|
-
return BaseModel.__hash__ is not getattr(cls, '__hash__')
|
|
188
|
-
return False
|
|
189
|
-
|
|
190
199
|
|
|
191
200
|
class NodeSingleInput(NodeBase):
|
|
201
|
+
"""A base model for any node that takes a single data input."""
|
|
192
202
|
depending_on_id: Optional[int] = -1
|
|
193
203
|
|
|
194
204
|
|
|
195
205
|
class NodeMultiInput(NodeBase):
|
|
206
|
+
"""A base model for any node that takes multiple data inputs."""
|
|
196
207
|
depending_on_ids: Optional[List[int]] = [-1]
|
|
197
208
|
|
|
198
209
|
|
|
199
210
|
class NodeSelect(NodeSingleInput):
|
|
211
|
+
"""Settings for a node that selects, renames, and reorders columns."""
|
|
200
212
|
keep_missing: bool = True
|
|
201
213
|
select_input: List[transform_schema.SelectInput] = Field(default_factory=list)
|
|
202
214
|
sorted_by: Optional[Literal['none', 'asc', 'desc']] = 'none'
|
|
203
215
|
|
|
204
216
|
|
|
205
217
|
class NodeFilter(NodeSingleInput):
|
|
218
|
+
"""Settings for a node that filters rows based on a condition."""
|
|
206
219
|
filter_input: transform_schema.FilterInput
|
|
207
220
|
|
|
208
221
|
|
|
209
222
|
class NodeSort(NodeSingleInput):
|
|
223
|
+
"""Settings for a node that sorts the data by one or more columns."""
|
|
210
224
|
sort_input: List[transform_schema.SortByInput] = Field(default_factory=list)
|
|
211
225
|
|
|
212
226
|
|
|
213
227
|
class NodeTextToRows(NodeSingleInput):
|
|
228
|
+
"""Settings for a node that splits a text column into multiple rows."""
|
|
214
229
|
text_to_rows_input: transform_schema.TextToRowsInput
|
|
215
230
|
|
|
216
231
|
|
|
217
232
|
class NodeSample(NodeSingleInput):
|
|
233
|
+
"""Settings for a node that samples a subset of the data."""
|
|
218
234
|
sample_size: int = 1000
|
|
219
235
|
|
|
220
236
|
|
|
221
237
|
class NodeRecordId(NodeSingleInput):
|
|
238
|
+
"""Settings for a node that adds a unique record ID column."""
|
|
222
239
|
record_id_input: transform_schema.RecordIdInput
|
|
223
240
|
|
|
224
241
|
|
|
225
242
|
class NodeJoin(NodeMultiInput):
|
|
243
|
+
"""Settings for a node that performs a standard SQL-style join."""
|
|
226
244
|
auto_generate_selection: bool = True
|
|
227
245
|
verify_integrity: bool = True
|
|
228
246
|
join_input: transform_schema.JoinInput
|
|
@@ -232,6 +250,7 @@ class NodeJoin(NodeMultiInput):
|
|
|
232
250
|
|
|
233
251
|
|
|
234
252
|
class NodeCrossJoin(NodeMultiInput):
|
|
253
|
+
"""Settings for a node that performs a cross join."""
|
|
235
254
|
auto_generate_selection: bool = True
|
|
236
255
|
verify_integrity: bool = True
|
|
237
256
|
cross_join_input: transform_schema.CrossJoinInput
|
|
@@ -241,31 +260,52 @@ class NodeCrossJoin(NodeMultiInput):
|
|
|
241
260
|
|
|
242
261
|
|
|
243
262
|
class NodeFuzzyMatch(NodeJoin):
|
|
263
|
+
"""Settings for a node that performs a fuzzy join based on string similarity."""
|
|
244
264
|
join_input: transform_schema.FuzzyMatchInput
|
|
245
265
|
|
|
246
266
|
|
|
247
267
|
class NodeDatasource(NodeBase):
|
|
268
|
+
"""Base settings for a node that acts as a data source."""
|
|
248
269
|
file_ref: str = None
|
|
249
270
|
|
|
250
271
|
|
|
251
272
|
class RawData(BaseModel):
|
|
273
|
+
"""Represents data in a raw, columnar format for manual input."""
|
|
252
274
|
columns: List[MinimalFieldInfo] = None
|
|
253
|
-
data: List[List]
|
|
275
|
+
data: List[List]
|
|
276
|
+
|
|
277
|
+
@classmethod
|
|
278
|
+
def from_pylist(cls, pylist: List[dict]):
|
|
279
|
+
"""Creates a RawData object from a list of Python dictionaries."""
|
|
280
|
+
if len(pylist) == 0:
|
|
281
|
+
return cls(columns=[], data=[])
|
|
282
|
+
pylist = ensure_similarity_dicts(pylist)
|
|
283
|
+
values = [standardize_col_dtype([vv for vv in c]) for c in
|
|
284
|
+
zip(*(r.values() for r in pylist))]
|
|
285
|
+
data_types = (pl.DataType.from_python(type(next((v for v in column_values), None))) for column_values in values)
|
|
286
|
+
columns = [MinimalFieldInfo(name=c, data_type=str(next(data_types))) for c in pylist[0].keys()]
|
|
287
|
+
return cls(columns=columns, data=values)
|
|
288
|
+
|
|
289
|
+
def to_pylist(self) -> List[dict]:
|
|
290
|
+
"""Converts the RawData object back into a list of Python dictionaries."""
|
|
291
|
+
return [{c.name: self.data[ci][ri] for ci, c in enumerate(self.columns)} for ri in range(len(self.data[0]))]
|
|
254
292
|
|
|
255
293
|
|
|
256
294
|
class NodeManualInput(NodeBase):
|
|
257
|
-
|
|
295
|
+
"""Settings for a node that allows direct data entry in the UI."""
|
|
258
296
|
raw_data_format: Optional[RawData] = None
|
|
259
297
|
|
|
260
298
|
|
|
261
299
|
class NodeRead(NodeBase):
|
|
300
|
+
"""Settings for a node that reads data from a file."""
|
|
262
301
|
received_file: ReceivedTable
|
|
263
302
|
|
|
264
303
|
|
|
265
304
|
class DatabaseConnection(BaseModel):
|
|
266
|
-
|
|
305
|
+
"""Defines the connection parameters for a database."""
|
|
306
|
+
database_type: str = "postgresql"
|
|
267
307
|
username: Optional[str] = None
|
|
268
|
-
password_ref: Optional[
|
|
308
|
+
password_ref: Optional[SecretRef] = None
|
|
269
309
|
host: Optional[str] = None
|
|
270
310
|
port: Optional[int] = None
|
|
271
311
|
database: Optional[str] = None
|
|
@@ -273,8 +313,9 @@ class DatabaseConnection(BaseModel):
|
|
|
273
313
|
|
|
274
314
|
|
|
275
315
|
class FullDatabaseConnection(BaseModel):
|
|
316
|
+
"""A complete database connection model including the secret password."""
|
|
276
317
|
connection_name: str
|
|
277
|
-
database_type: str = "postgresql"
|
|
318
|
+
database_type: str = "postgresql"
|
|
278
319
|
username: str
|
|
279
320
|
password: SecretStr
|
|
280
321
|
host: Optional[str] = None
|
|
@@ -285,8 +326,9 @@ class FullDatabaseConnection(BaseModel):
|
|
|
285
326
|
|
|
286
327
|
|
|
287
328
|
class FullDatabaseConnectionInterface(BaseModel):
|
|
329
|
+
"""A database connection model intended for UI display, omitting the password."""
|
|
288
330
|
connection_name: str
|
|
289
|
-
database_type: str = "postgresql"
|
|
331
|
+
database_type: str = "postgresql"
|
|
290
332
|
username: str
|
|
291
333
|
host: Optional[str] = None
|
|
292
334
|
port: Optional[int] = None
|
|
@@ -296,6 +338,7 @@ class FullDatabaseConnectionInterface(BaseModel):
|
|
|
296
338
|
|
|
297
339
|
|
|
298
340
|
class DatabaseSettings(BaseModel):
|
|
341
|
+
"""Defines settings for reading from a database, either via table or query."""
|
|
299
342
|
connection_mode: Optional[Literal['inline', 'reference']] = 'inline'
|
|
300
343
|
database_connection: Optional[DatabaseConnection] = None
|
|
301
344
|
database_connection_name: Optional[str] = None
|
|
@@ -321,6 +364,7 @@ class DatabaseSettings(BaseModel):
|
|
|
321
364
|
|
|
322
365
|
|
|
323
366
|
class DatabaseWriteSettings(BaseModel):
|
|
367
|
+
"""Defines settings for writing data to a database table."""
|
|
324
368
|
connection_mode: Optional[Literal['inline', 'reference']] = 'inline'
|
|
325
369
|
database_connection: Optional[DatabaseConnection] = None
|
|
326
370
|
database_connection_name: Optional[str] = None
|
|
@@ -330,140 +374,146 @@ class DatabaseWriteSettings(BaseModel):
|
|
|
330
374
|
|
|
331
375
|
|
|
332
376
|
class NodeDatabaseReader(NodeBase):
|
|
377
|
+
"""Settings for a node that reads from a database."""
|
|
333
378
|
database_settings: DatabaseSettings
|
|
334
379
|
fields: Optional[List[MinimalFieldInfo]] = None
|
|
335
380
|
|
|
336
381
|
|
|
337
382
|
class NodeDatabaseWriter(NodeSingleInput):
|
|
383
|
+
"""Settings for a node that writes data to a database."""
|
|
338
384
|
database_write_settings: DatabaseWriteSettings
|
|
339
385
|
|
|
340
386
|
|
|
387
|
+
class NodeCloudStorageReader(NodeBase):
|
|
388
|
+
"""Settings for a node that reads from a cloud storage service (S3, GCS, etc.)."""
|
|
389
|
+
cloud_storage_settings: CloudStorageReadSettings
|
|
390
|
+
fields: Optional[List[MinimalFieldInfo]] = None
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
class NodeCloudStorageWriter(NodeSingleInput):
|
|
394
|
+
"""Settings for a node that writes to a cloud storage service."""
|
|
395
|
+
cloud_storage_settings: CloudStorageWriteSettings
|
|
396
|
+
|
|
397
|
+
|
|
341
398
|
class ExternalSource(BaseModel):
|
|
399
|
+
"""Base model for data coming from a predefined external source."""
|
|
342
400
|
orientation: str = 'row'
|
|
343
401
|
fields: Optional[List[MinimalFieldInfo]] = None
|
|
344
402
|
|
|
345
403
|
|
|
346
404
|
class SampleUsers(ExternalSource):
|
|
405
|
+
"""Settings for generating a sample dataset of users."""
|
|
347
406
|
SAMPLE_USERS: bool
|
|
348
407
|
class_name: str = "sample_users"
|
|
349
408
|
size: int = 100
|
|
350
409
|
|
|
351
410
|
|
|
352
|
-
class AirbyteReader(AirbyteConfig):
|
|
353
|
-
class_name: Optional[str] = "airbyte_reader"
|
|
354
|
-
fields: Optional[List[MinimalFieldInfo]] = None
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
class AccessToken(BaseModel):
|
|
358
|
-
user_id: str
|
|
359
|
-
access_token: SecretStr = None
|
|
360
|
-
|
|
361
|
-
|
|
362
411
|
class NodeExternalSource(NodeBase):
|
|
412
|
+
"""Settings for a node that connects to a registered external data source."""
|
|
363
413
|
identifier: str
|
|
364
414
|
source_settings: SampleUsers
|
|
365
415
|
|
|
366
416
|
|
|
367
|
-
class NodeAirbyteReader(NodeExternalSource):
|
|
368
|
-
identifier: str = 'airbyte'
|
|
369
|
-
source_settings: AirbyteReader
|
|
370
|
-
|
|
371
|
-
|
|
372
417
|
class NodeFormula(NodeSingleInput):
|
|
418
|
+
"""Settings for a node that applies a formula to create/modify a column."""
|
|
373
419
|
function: transform_schema.FunctionInput = None
|
|
374
420
|
|
|
375
421
|
|
|
376
422
|
class NodeGroupBy(NodeSingleInput):
|
|
423
|
+
"""Settings for a node that performs a group-by and aggregation operation."""
|
|
377
424
|
groupby_input: transform_schema.GroupByInput = None
|
|
378
425
|
|
|
379
426
|
|
|
380
427
|
class NodePromise(NodeBase):
|
|
428
|
+
"""A placeholder node for an operation that has not yet been configured."""
|
|
381
429
|
is_setup: bool = False
|
|
382
430
|
node_type: str
|
|
383
431
|
|
|
384
432
|
|
|
385
433
|
class NodeInputConnection(BaseModel):
|
|
434
|
+
"""Represents the input side of a connection between two nodes."""
|
|
386
435
|
node_id: int
|
|
387
436
|
connection_class: InputConnectionClass
|
|
388
437
|
|
|
389
438
|
def get_node_input_connection_type(self) -> Literal['main', 'right', 'left']:
|
|
439
|
+
"""Determines the semantic type of the input (e.g., for a join)."""
|
|
390
440
|
match self.connection_class:
|
|
391
|
-
case 'input-0':
|
|
392
|
-
|
|
393
|
-
case 'input-
|
|
394
|
-
|
|
395
|
-
case 'input-2':
|
|
396
|
-
return 'left'
|
|
397
|
-
case _:
|
|
398
|
-
raise ValueError(f"Unexpected connection_class: {self.connection_class}")
|
|
441
|
+
case 'input-0': return 'main'
|
|
442
|
+
case 'input-1': return 'right'
|
|
443
|
+
case 'input-2': return 'left'
|
|
444
|
+
case _: raise ValueError(f"Unexpected connection_class: {self.connection_class}")
|
|
399
445
|
|
|
400
446
|
|
|
401
447
|
class NodePivot(NodeSingleInput):
|
|
448
|
+
"""Settings for a node that pivots data from a long to a wide format."""
|
|
402
449
|
pivot_input: transform_schema.PivotInput = None
|
|
403
450
|
output_fields: Optional[List[MinimalFieldInfo]] = None
|
|
404
451
|
|
|
405
452
|
|
|
406
453
|
class NodeUnpivot(NodeSingleInput):
|
|
454
|
+
"""Settings for a node that unpivots data from a wide to a long format."""
|
|
407
455
|
unpivot_input: transform_schema.UnpivotInput = None
|
|
408
456
|
|
|
409
457
|
|
|
410
458
|
class NodeUnion(NodeMultiInput):
|
|
459
|
+
"""Settings for a node that concatenates multiple data inputs."""
|
|
411
460
|
union_input: transform_schema.UnionInput = Field(default_factory=transform_schema.UnionInput)
|
|
412
461
|
|
|
413
462
|
|
|
414
463
|
class NodeOutput(NodeSingleInput):
|
|
464
|
+
"""Settings for a node that writes its input to a file."""
|
|
415
465
|
output_settings: OutputSettings
|
|
416
466
|
|
|
417
467
|
|
|
418
468
|
class NodeOutputConnection(BaseModel):
|
|
469
|
+
"""Represents the output side of a connection between two nodes."""
|
|
419
470
|
node_id: int
|
|
420
471
|
connection_class: OutputConnectionClass
|
|
421
472
|
|
|
422
473
|
|
|
423
474
|
class NodeConnection(BaseModel):
|
|
475
|
+
"""Represents a connection (edge) between two nodes in the graph."""
|
|
424
476
|
input_connection: NodeInputConnection
|
|
425
477
|
output_connection: NodeOutputConnection
|
|
426
478
|
|
|
427
479
|
@classmethod
|
|
428
480
|
def create_from_simple_input(cls, from_id: int, to_id: int, input_type: InputType = "input-0"):
|
|
429
|
-
|
|
481
|
+
"""Creates a standard connection between two nodes."""
|
|
430
482
|
match input_type:
|
|
431
|
-
case "main":
|
|
432
|
-
|
|
433
|
-
case "
|
|
434
|
-
|
|
435
|
-
case "left":
|
|
436
|
-
connection_class: InputConnectionClass = "input-2"
|
|
437
|
-
case _:
|
|
438
|
-
connection_class: InputConnectionClass = "input-0"
|
|
483
|
+
case "main": connection_class: InputConnectionClass = "input-0"
|
|
484
|
+
case "right": connection_class: InputConnectionClass = "input-1"
|
|
485
|
+
case "left": connection_class: InputConnectionClass = "input-2"
|
|
486
|
+
case _: connection_class: InputConnectionClass = "input-0"
|
|
439
487
|
node_input = NodeInputConnection(node_id=to_id, connection_class=connection_class)
|
|
440
488
|
node_output = NodeOutputConnection(node_id=from_id, connection_class='output-0')
|
|
441
489
|
return cls(input_connection=node_input, output_connection=node_output)
|
|
442
490
|
|
|
443
491
|
|
|
444
492
|
class NodeDescription(BaseModel):
|
|
493
|
+
"""A simple model for updating a node's description text."""
|
|
445
494
|
description: str = ''
|
|
446
495
|
|
|
447
496
|
|
|
448
497
|
class NodeExploreData(NodeBase):
|
|
498
|
+
"""Settings for a node that provides an interactive data exploration interface."""
|
|
449
499
|
graphic_walker_input: Optional[gs_schemas.GraphicWalkerInput] = None
|
|
450
|
-
_hash_overrule: int = 0
|
|
451
|
-
|
|
452
|
-
def __hash__(self):
|
|
453
|
-
return 0
|
|
454
500
|
|
|
455
501
|
|
|
456
502
|
class NodeGraphSolver(NodeSingleInput):
|
|
503
|
+
"""Settings for a node that solves graph-based problems (e.g., connected components)."""
|
|
457
504
|
graph_solver_input: transform_schema.GraphSolverInput
|
|
458
505
|
|
|
459
506
|
|
|
460
507
|
class NodeUnique(NodeSingleInput):
|
|
508
|
+
"""Settings for a node that returns the unique rows from the data."""
|
|
461
509
|
unique_input: transform_schema.UniqueInput
|
|
462
510
|
|
|
463
511
|
|
|
464
512
|
class NodeRecordCount(NodeSingleInput):
|
|
513
|
+
"""Settings for a node that counts the number of records."""
|
|
465
514
|
pass
|
|
466
515
|
|
|
467
516
|
|
|
468
517
|
class NodePolarsCode(NodeMultiInput):
|
|
518
|
+
"""Settings for a node that executes arbitrary user-provided Polars code."""
|
|
469
519
|
polars_code_input: transform_schema.PolarsCodeInput
|
|
@@ -5,8 +5,9 @@ import time
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class NodeResult(BaseModel):
|
|
8
|
+
"""Represents the execution result of a single node in a FlowGraph run."""
|
|
8
9
|
node_id: int
|
|
9
|
-
node_name: str = None
|
|
10
|
+
node_name: Optional[str] = None
|
|
10
11
|
start_timestamp: float = Field(default_factory=time.time)
|
|
11
12
|
end_timestamp: float = 0
|
|
12
13
|
success: Optional[bool] = None
|
|
@@ -16,6 +17,7 @@ class NodeResult(BaseModel):
|
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class RunInformation(BaseModel):
|
|
20
|
+
"""Contains summary information about a complete FlowGraph execution."""
|
|
19
21
|
flow_id: int
|
|
20
22
|
start_time: Optional[datetime] = Field(default_factory=datetime.now)
|
|
21
23
|
end_time: Optional[datetime] = None
|
|
@@ -26,6 +28,7 @@ class RunInformation(BaseModel):
|
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
class BaseItem(BaseModel):
|
|
31
|
+
"""A base model for any item in a file system, like a file or directory."""
|
|
29
32
|
name: str
|
|
30
33
|
path: str
|
|
31
34
|
size: Optional[int] = None
|
|
@@ -37,6 +40,7 @@ class BaseItem(BaseModel):
|
|
|
37
40
|
|
|
38
41
|
|
|
39
42
|
class FileColumn(BaseModel):
|
|
43
|
+
"""Represents detailed schema and statistics for a single column (field)."""
|
|
40
44
|
name: str
|
|
41
45
|
data_type: str
|
|
42
46
|
is_unique: bool
|
|
@@ -49,6 +53,7 @@ class FileColumn(BaseModel):
|
|
|
49
53
|
|
|
50
54
|
|
|
51
55
|
class TableExample(BaseModel):
|
|
56
|
+
"""Represents a preview of a table, including schema and sample data."""
|
|
52
57
|
node_id: int
|
|
53
58
|
number_of_records: int
|
|
54
59
|
number_of_columns: int
|
|
@@ -59,6 +64,10 @@ class TableExample(BaseModel):
|
|
|
59
64
|
|
|
60
65
|
|
|
61
66
|
class NodeData(BaseModel):
|
|
67
|
+
"""A comprehensive model holding the complete state and data for a single node.
|
|
68
|
+
|
|
69
|
+
This includes its input/output data previews, settings, and run status.
|
|
70
|
+
"""
|
|
62
71
|
flow_id: int
|
|
63
72
|
node_id: int
|
|
64
73
|
flow_type: str
|
|
@@ -74,19 +83,23 @@ class NodeData(BaseModel):
|
|
|
74
83
|
|
|
75
84
|
|
|
76
85
|
class OutputFile(BaseItem):
|
|
86
|
+
"""Represents a single file in an output directory, extending BaseItem."""
|
|
77
87
|
ext: Optional[str] = None
|
|
78
88
|
mimetype: Optional[str] = None
|
|
79
89
|
|
|
80
90
|
|
|
81
91
|
class OutputFiles(BaseItem):
|
|
92
|
+
"""Represents a collection of files, typically within a directory."""
|
|
82
93
|
files: List[OutputFile] = Field(default_factory=list)
|
|
83
94
|
|
|
84
95
|
|
|
85
96
|
class OutputTree(OutputFiles):
|
|
97
|
+
"""Represents a directory tree, including subdirectories."""
|
|
86
98
|
directories: List[OutputFiles] = Field(default_factory=list)
|
|
87
99
|
|
|
88
100
|
|
|
89
101
|
class ItemInfo(OutputFile):
|
|
102
|
+
"""Provides detailed information about a single item in an output directory."""
|
|
90
103
|
id: int = -1
|
|
91
104
|
type: str
|
|
92
105
|
analysis_file_available: bool = False
|
|
@@ -95,21 +108,24 @@ class ItemInfo(OutputFile):
|
|
|
95
108
|
|
|
96
109
|
|
|
97
110
|
class OutputDir(BaseItem):
|
|
111
|
+
"""Represents the contents of a single output directory."""
|
|
98
112
|
all_items: List[str]
|
|
99
113
|
items: List[ItemInfo]
|
|
100
114
|
|
|
101
115
|
|
|
102
116
|
class ExpressionRef(BaseModel):
|
|
117
|
+
"""A reference to a single Polars expression, including its name and docstring."""
|
|
103
118
|
name: str
|
|
104
119
|
doc: Optional[str]
|
|
105
120
|
|
|
106
121
|
|
|
107
122
|
class ExpressionsOverview(BaseModel):
|
|
123
|
+
"""Represents a categorized list of available Polars expressions."""
|
|
108
124
|
expression_type: str
|
|
109
125
|
expressions: List[ExpressionRef]
|
|
110
126
|
|
|
111
127
|
|
|
112
128
|
class InstantFuncResult(BaseModel):
|
|
129
|
+
"""Represents the result of a function that is expected to execute instantly."""
|
|
113
130
|
success: Optional[bool] = None
|
|
114
|
-
result: str
|
|
115
|
-
|
|
131
|
+
result: str
|