Flowfile 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +27 -6
- flowfile/api.py +1 -0
- flowfile/web/__init__.py +2 -2
- flowfile/web/static/assets/{CloudConnectionManager-d004942f.js → CloudConnectionManager-c20a740f.js} +3 -4
- flowfile/web/static/assets/{CloudStorageReader-eccf9fc2.js → CloudStorageReader-960b400a.js} +7 -7
- flowfile/web/static/assets/{CloudStorageWriter-b1ba6bba.js → CloudStorageWriter-e3decbdd.js} +7 -7
- flowfile/web/static/assets/{CrossJoin-68981877.js → CrossJoin-d67e2405.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-0b06649c.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-8349a426.js → DatabaseManager-9ea35e84.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-905344f8.js → DatabaseReader-9578bfa5.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-9f5b8638.js → DatabaseWriter-19531098.js} +9 -9
- flowfile/web/static/assets/{ExploreData-131a6d53.js → ExploreData-40476474.js} +47141 -43697
- flowfile/web/static/assets/{ExternalSource-e3549dcc.js → ExternalSource-2297ef96.js} +6 -6
- flowfile/web/static/assets/{Filter-6e0730ae.js → Filter-f211c03a.js} +8 -8
- flowfile/web/static/assets/{Formula-02f033e6.js → Formula-4207ea31.js} +8 -8
- flowfile/web/static/assets/{FuzzyMatch-54c14036.js → FuzzyMatch-bf120df0.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-08a3f499.js → GraphSolver-5bb7497a.js} +5 -5
- flowfile/web/static/assets/{GroupBy-2ae38139.js → GroupBy-92c81b65.js} +6 -6
- flowfile/web/static/assets/{Join-493b9772.js → Join-4e49a274.js} +9 -9
- flowfile/web/static/assets/{ManualInput-4373d163.js → ManualInput-90998ae8.js} +5 -5
- flowfile/web/static/assets/{Output-b534f3c7.js → Output-81e3e917.js} +4 -4
- flowfile/web/static/assets/{Pivot-2968ff65.js → Pivot-a3419842.js} +6 -6
- flowfile/web/static/assets/{PolarsCode-65136536.js → PolarsCode-72710deb.js} +6 -6
- flowfile/web/static/assets/{Read-c56339ed.js → Read-c4059daf.js} +6 -6
- flowfile/web/static/assets/{RecordCount-1c641a5e.js → RecordCount-c2b5e095.js} +5 -5
- flowfile/web/static/assets/{RecordId-df308b8f.js → RecordId-10baf191.js} +6 -6
- flowfile/web/static/assets/{Sample-293e8a64.js → Sample-3ed9a0ae.js} +5 -5
- flowfile/web/static/assets/{SecretManager-03911655.js → SecretManager-0d49c0e8.js} +2 -2
- flowfile/web/static/assets/{Select-3058a13d.js → Select-8a02a0b3.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-fbf4fb39.js → SettingsSection-4c0f45f5.js} +1 -1
- flowfile/web/static/assets/{Sort-a29bbaf7.js → Sort-f55c9f9d.js} +6 -6
- flowfile/web/static/assets/{TextToRows-c7d7760e.js → TextToRows-5dbc2145.js} +8 -8
- flowfile/web/static/assets/{UnavailableFields-118f1d20.js → UnavailableFields-a1768e52.js} +2 -2
- flowfile/web/static/assets/{Union-f0589571.js → Union-f2aefdc9.js} +5 -5
- flowfile/web/static/assets/{Unique-7329a207.js → Unique-46b250da.js} +8 -8
- flowfile/web/static/assets/{Unpivot-30b0be15.js → Unpivot-25ac84cc.js} +5 -5
- flowfile/web/static/assets/{api-fb67319c.js → api-6ef0dcef.js} +1 -1
- flowfile/web/static/assets/{api-602fb95c.js → api-a0abbdc7.js} +1 -1
- flowfile/web/static/assets/{designer-94a6bf4d.js → designer-13eabd83.js} +4 -4
- flowfile/web/static/assets/{documentation-a224831e.js → documentation-b87e7f6f.js} +1 -1
- flowfile/web/static/assets/{dropDown-c2d2aa97.js → dropDown-13564764.js} +1 -1
- flowfile/web/static/assets/{fullEditor-921ac5fd.js → fullEditor-fd2cd6f9.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-7013cc94.js → genericNodeSettings-71e11604.js} +3 -3
- flowfile/web/static/assets/{index-3a75211d.js → index-f6c15e76.js} +46 -22
- flowfile/web/static/assets/{nodeTitle-a63d4680.js → nodeTitle-988d9efe.js} +3 -3
- flowfile/web/static/assets/{secretApi-763aec6e.js → secretApi-dd636aa2.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-08464729.js → selectDynamic-af36165e.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-f15a5f87.js → vue-codemirror.esm-2847001e.js} +1 -1
- flowfile/web/static/assets/{vue-content-loader.es-93bd09d7.js → vue-content-loader.es-0371da73.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/METADATA +2 -2
- {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/RECORD +96 -94
- flowfile_core/__init__.py +1 -0
- flowfile_core/auth/jwt.py +39 -0
- flowfile_core/configs/node_store/nodes.py +1 -0
- flowfile_core/configs/settings.py +6 -5
- flowfile_core/flowfile/code_generator/code_generator.py +71 -0
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +1 -1
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +597 -309
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
- flowfile_core/flowfile/flow_graph.py +619 -191
- flowfile_core/flowfile/flow_graph_utils.py +2 -2
- flowfile_core/flowfile/flow_node/flow_node.py +500 -89
- flowfile_core/flowfile/flow_node/models.py +125 -20
- flowfile_core/flowfile/handler.py +2 -33
- flowfile_core/flowfile/manage/open_flowfile.py +1 -2
- flowfile_core/flowfile/util/calculate_layout.py +0 -2
- flowfile_core/flowfile/utils.py +36 -5
- flowfile_core/main.py +32 -13
- flowfile_core/routes/cloud_connections.py +7 -11
- flowfile_core/routes/logs.py +2 -6
- flowfile_core/routes/public.py +1 -0
- flowfile_core/routes/routes.py +127 -51
- flowfile_core/routes/secrets.py +72 -14
- flowfile_core/schemas/__init__.py +8 -0
- flowfile_core/schemas/input_schema.py +92 -64
- flowfile_core/schemas/output_model.py +19 -3
- flowfile_core/schemas/schemas.py +144 -11
- flowfile_core/schemas/transform_schema.py +82 -17
- flowfile_frame/__init__.py +9 -1
- flowfile_frame/cloud_storage/__init__.py +0 -0
- flowfile_frame/cloud_storage/frame_helpers.py +39 -0
- flowfile_frame/cloud_storage/secret_manager.py +73 -0
- flowfile_frame/expr.py +28 -1
- flowfile_frame/expr.pyi +76 -61
- flowfile_frame/flow_frame.py +232 -110
- flowfile_frame/flow_frame.pyi +140 -91
- flowfile_frame/flow_frame_methods.py +150 -12
- flowfile_frame/group_frame.py +3 -0
- flowfile_frame/utils.py +25 -3
- test_utils/s3/data_generator.py +1 -0
- test_utils/s3/demo_data_generator.py +186 -0
- test_utils/s3/fixtures.py +6 -1
- flowfile_core/schemas/defaults.py +0 -9
- flowfile_core/schemas/models.py +0 -193
- {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
- {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
- {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +0 -0
|
@@ -20,26 +20,31 @@ InputType = Literal["main", "left", "right"]
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class NewDirectory(BaseModel):
|
|
23
|
+
"""Defines the information required to create a new directory."""
|
|
23
24
|
source_path: str
|
|
24
25
|
dir_name: str
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class RemoveItem(BaseModel):
|
|
29
|
+
"""Represents a single item to be removed from a directory or list."""
|
|
28
30
|
path: str
|
|
29
31
|
id: int = -1
|
|
30
32
|
|
|
31
33
|
|
|
32
34
|
class RemoveItemsInput(BaseModel):
|
|
35
|
+
"""Defines a list of items to be removed."""
|
|
33
36
|
paths: List[RemoveItem]
|
|
34
37
|
source_path: str
|
|
35
38
|
|
|
36
39
|
|
|
37
40
|
class MinimalFieldInfo(BaseModel):
|
|
41
|
+
"""Represents the most basic information about a data field (column)."""
|
|
38
42
|
name: str
|
|
39
43
|
data_type: str = "String"
|
|
40
44
|
|
|
41
45
|
|
|
42
46
|
class ReceivedTableBase(BaseModel):
|
|
47
|
+
"""Base model for defining a table received from an external source."""
|
|
43
48
|
id: Optional[int] = None
|
|
44
49
|
name: Optional[str]
|
|
45
50
|
path: str # This can be an absolute or relative path
|
|
@@ -52,36 +57,37 @@ class ReceivedTableBase(BaseModel):
|
|
|
52
57
|
|
|
53
58
|
@classmethod
|
|
54
59
|
def create_from_path(cls, path: str):
|
|
60
|
+
"""Creates an instance from a file path string."""
|
|
55
61
|
filename = Path(path).name
|
|
56
|
-
# Create an instance of ReceivedTableBase with the extracted filename and path
|
|
57
62
|
return cls(name=filename, path=path)
|
|
58
63
|
|
|
59
64
|
@property
|
|
60
65
|
def file_path(self) -> str:
|
|
66
|
+
"""Constructs the full file path from the directory and name."""
|
|
61
67
|
if not self.name in self.path:
|
|
62
68
|
return os.path.join(self.path, self.name)
|
|
63
69
|
else:
|
|
64
70
|
return self.path
|
|
65
71
|
|
|
66
72
|
def set_absolute_filepath(self):
|
|
73
|
+
"""Resolves the path to an absolute file path."""
|
|
67
74
|
base_path = Path(self.path).expanduser()
|
|
68
|
-
# Check if the path is relative, resolve it with the current working directory
|
|
69
75
|
if not base_path.is_absolute():
|
|
70
76
|
base_path = Path.cwd() / base_path
|
|
71
|
-
|
|
72
77
|
if self.name and self.name not in base_path.name:
|
|
73
78
|
base_path = base_path / self.name
|
|
74
|
-
|
|
75
79
|
self.abs_file_path = str(base_path.resolve())
|
|
76
80
|
|
|
77
81
|
@model_validator(mode='after')
|
|
78
82
|
def populate_abs_file_path(self):
|
|
83
|
+
"""Ensures the absolute file path is populated after validation."""
|
|
79
84
|
if not self.abs_file_path:
|
|
80
85
|
self.set_absolute_filepath()
|
|
81
86
|
return self
|
|
82
87
|
|
|
83
88
|
|
|
84
89
|
class ReceivedCsvTable(ReceivedTableBase):
|
|
90
|
+
"""Defines settings for reading a CSV file."""
|
|
85
91
|
file_type: str = 'csv'
|
|
86
92
|
reference: str = ''
|
|
87
93
|
starting_from_line: int = 0
|
|
@@ -97,82 +103,88 @@ class ReceivedCsvTable(ReceivedTableBase):
|
|
|
97
103
|
|
|
98
104
|
|
|
99
105
|
class ReceivedJsonTable(ReceivedCsvTable):
|
|
106
|
+
"""Defines settings for reading a JSON file (inherits from CSV settings)."""
|
|
100
107
|
pass
|
|
101
108
|
|
|
102
109
|
|
|
103
110
|
class ReceivedParquetTable(ReceivedTableBase):
|
|
111
|
+
"""Defines settings for reading a Parquet file."""
|
|
104
112
|
file_type: str = 'parquet'
|
|
105
113
|
|
|
106
114
|
|
|
107
115
|
class ReceivedExcelTable(ReceivedTableBase):
|
|
116
|
+
"""Defines settings for reading an Excel file."""
|
|
108
117
|
sheet_name: Optional[str] = None
|
|
109
|
-
start_row: int = 0
|
|
110
|
-
start_column: int = 0
|
|
111
|
-
end_row: int = 0
|
|
112
|
-
end_column: int = 0
|
|
113
|
-
has_headers: bool = True
|
|
114
|
-
type_inference: bool = False
|
|
118
|
+
start_row: int = 0
|
|
119
|
+
start_column: int = 0
|
|
120
|
+
end_row: int = 0
|
|
121
|
+
end_column: int = 0
|
|
122
|
+
has_headers: bool = True
|
|
123
|
+
type_inference: bool = False
|
|
115
124
|
|
|
116
125
|
def validate_range_values(self):
|
|
117
|
-
|
|
126
|
+
"""Validates that the Excel cell range is logical."""
|
|
118
127
|
for attribute in [self.start_row, self.start_column, self.end_row, self.end_column]:
|
|
119
128
|
if not isinstance(attribute, int) or attribute < 0:
|
|
120
129
|
raise ValueError("Row and column indices must be non-negative integers")
|
|
121
|
-
|
|
122
|
-
# Validate that start is before end if end is specified (non-zero)
|
|
123
130
|
if (self.end_row > 0 and self.start_row > self.end_row) or \
|
|
124
|
-
|
|
125
|
-
raise ValueError("Start row/column must not be greater than end row/column
|
|
131
|
+
(self.end_column > 0 and self.start_column > self.end_column):
|
|
132
|
+
raise ValueError("Start row/column must not be greater than end row/column")
|
|
126
133
|
|
|
127
134
|
|
|
128
135
|
class ReceivedTable(ReceivedExcelTable, ReceivedCsvTable, ReceivedParquetTable):
|
|
136
|
+
"""A comprehensive model that can represent any type of received table."""
|
|
129
137
|
...
|
|
130
138
|
|
|
131
139
|
|
|
132
140
|
class OutputCsvTable(BaseModel):
|
|
141
|
+
"""Defines settings for writing a CSV file."""
|
|
133
142
|
file_type: str = 'csv'
|
|
134
143
|
delimiter: str = ','
|
|
135
144
|
encoding: str = 'utf-8'
|
|
136
145
|
|
|
137
146
|
|
|
138
147
|
class OutputParquetTable(BaseModel):
|
|
148
|
+
"""Defines settings for writing a Parquet file."""
|
|
139
149
|
file_type: str = 'parquet'
|
|
140
150
|
|
|
141
151
|
|
|
142
152
|
class OutputExcelTable(BaseModel):
|
|
153
|
+
"""Defines settings for writing an Excel file."""
|
|
143
154
|
file_type: str = 'excel'
|
|
144
155
|
sheet_name: str = 'Sheet1'
|
|
145
156
|
|
|
146
157
|
|
|
147
158
|
class OutputSettings(BaseModel):
|
|
159
|
+
"""Defines the complete settings for an output node."""
|
|
148
160
|
name: str
|
|
149
161
|
directory: str
|
|
150
162
|
file_type: str
|
|
151
163
|
fields: Optional[List[str]] = Field(default_factory=list)
|
|
152
164
|
write_mode: str = 'overwrite'
|
|
153
|
-
output_csv_table: OutputCsvTable
|
|
154
|
-
output_parquet_table: OutputParquetTable
|
|
155
|
-
output_excel_table: OutputExcelTable
|
|
165
|
+
output_csv_table: Optional[OutputCsvTable] = Field(default_factory=OutputCsvTable)
|
|
166
|
+
output_parquet_table: OutputParquetTable = Field(default_factory=OutputParquetTable)
|
|
167
|
+
output_excel_table: OutputExcelTable = Field(default_factory=OutputExcelTable)
|
|
156
168
|
abs_file_path: Optional[str] = None
|
|
157
169
|
|
|
158
170
|
def set_absolute_filepath(self):
|
|
171
|
+
"""Resolves the output directory and name into an absolute path."""
|
|
159
172
|
base_path = Path(self.directory)
|
|
160
|
-
|
|
161
173
|
if not base_path.is_absolute():
|
|
162
174
|
base_path = Path.cwd() / base_path
|
|
163
|
-
|
|
164
175
|
if self.name and self.name not in base_path.name:
|
|
165
176
|
base_path = base_path / self.name
|
|
166
|
-
|
|
167
177
|
self.abs_file_path = str(base_path.resolve())
|
|
168
178
|
|
|
169
179
|
@model_validator(mode='after')
|
|
170
180
|
def populate_abs_file_path(self):
|
|
181
|
+
"""Ensures the absolute file path is populated after validation."""
|
|
171
182
|
self.set_absolute_filepath()
|
|
172
183
|
return self
|
|
173
184
|
|
|
174
185
|
|
|
175
186
|
class NodeBase(BaseModel):
|
|
187
|
+
"""Base model for all nodes in a FlowGraph. Contains common metadata."""
|
|
176
188
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
177
189
|
flow_id: int
|
|
178
190
|
node_id: int
|
|
@@ -184,48 +196,51 @@ class NodeBase(BaseModel):
|
|
|
184
196
|
user_id: Optional[int] = None
|
|
185
197
|
is_flow_output: Optional[bool] = False
|
|
186
198
|
|
|
187
|
-
@classmethod
|
|
188
|
-
def overridden_hash(cls):
|
|
189
|
-
if getattr(cls, '__hash__'):
|
|
190
|
-
return BaseModel.__hash__ is not getattr(cls, '__hash__')
|
|
191
|
-
return False
|
|
192
|
-
|
|
193
199
|
|
|
194
200
|
class NodeSingleInput(NodeBase):
|
|
201
|
+
"""A base model for any node that takes a single data input."""
|
|
195
202
|
depending_on_id: Optional[int] = -1
|
|
196
203
|
|
|
197
204
|
|
|
198
205
|
class NodeMultiInput(NodeBase):
|
|
206
|
+
"""A base model for any node that takes multiple data inputs."""
|
|
199
207
|
depending_on_ids: Optional[List[int]] = [-1]
|
|
200
208
|
|
|
201
209
|
|
|
202
210
|
class NodeSelect(NodeSingleInput):
|
|
211
|
+
"""Settings for a node that selects, renames, and reorders columns."""
|
|
203
212
|
keep_missing: bool = True
|
|
204
213
|
select_input: List[transform_schema.SelectInput] = Field(default_factory=list)
|
|
205
214
|
sorted_by: Optional[Literal['none', 'asc', 'desc']] = 'none'
|
|
206
215
|
|
|
207
216
|
|
|
208
217
|
class NodeFilter(NodeSingleInput):
|
|
218
|
+
"""Settings for a node that filters rows based on a condition."""
|
|
209
219
|
filter_input: transform_schema.FilterInput
|
|
210
220
|
|
|
211
221
|
|
|
212
222
|
class NodeSort(NodeSingleInput):
|
|
223
|
+
"""Settings for a node that sorts the data by one or more columns."""
|
|
213
224
|
sort_input: List[transform_schema.SortByInput] = Field(default_factory=list)
|
|
214
225
|
|
|
215
226
|
|
|
216
227
|
class NodeTextToRows(NodeSingleInput):
|
|
228
|
+
"""Settings for a node that splits a text column into multiple rows."""
|
|
217
229
|
text_to_rows_input: transform_schema.TextToRowsInput
|
|
218
230
|
|
|
219
231
|
|
|
220
232
|
class NodeSample(NodeSingleInput):
|
|
233
|
+
"""Settings for a node that samples a subset of the data."""
|
|
221
234
|
sample_size: int = 1000
|
|
222
235
|
|
|
223
236
|
|
|
224
237
|
class NodeRecordId(NodeSingleInput):
|
|
238
|
+
"""Settings for a node that adds a unique record ID column."""
|
|
225
239
|
record_id_input: transform_schema.RecordIdInput
|
|
226
240
|
|
|
227
241
|
|
|
228
242
|
class NodeJoin(NodeMultiInput):
|
|
243
|
+
"""Settings for a node that performs a standard SQL-style join."""
|
|
229
244
|
auto_generate_selection: bool = True
|
|
230
245
|
verify_integrity: bool = True
|
|
231
246
|
join_input: transform_schema.JoinInput
|
|
@@ -235,6 +250,7 @@ class NodeJoin(NodeMultiInput):
|
|
|
235
250
|
|
|
236
251
|
|
|
237
252
|
class NodeCrossJoin(NodeMultiInput):
|
|
253
|
+
"""Settings for a node that performs a cross join."""
|
|
238
254
|
auto_generate_selection: bool = True
|
|
239
255
|
verify_integrity: bool = True
|
|
240
256
|
cross_join_input: transform_schema.CrossJoinInput
|
|
@@ -244,47 +260,50 @@ class NodeCrossJoin(NodeMultiInput):
|
|
|
244
260
|
|
|
245
261
|
|
|
246
262
|
class NodeFuzzyMatch(NodeJoin):
|
|
263
|
+
"""Settings for a node that performs a fuzzy join based on string similarity."""
|
|
247
264
|
join_input: transform_schema.FuzzyMatchInput
|
|
248
265
|
|
|
249
266
|
|
|
250
267
|
class NodeDatasource(NodeBase):
|
|
268
|
+
"""Base settings for a node that acts as a data source."""
|
|
251
269
|
file_ref: str = None
|
|
252
270
|
|
|
253
271
|
|
|
254
272
|
class RawData(BaseModel):
|
|
273
|
+
"""Represents data in a raw, columnar format for manual input."""
|
|
255
274
|
columns: List[MinimalFieldInfo] = None
|
|
256
275
|
data: List[List]
|
|
257
276
|
|
|
258
|
-
@classmethod
|
|
259
|
-
def from_columns(cls, columns: List[str], data: List[List]):
|
|
260
|
-
return cls(columns=[MinimalFieldInfo(name=column) for column in columns], data=data)
|
|
261
|
-
|
|
262
277
|
@classmethod
|
|
263
278
|
def from_pylist(cls, pylist: List[dict]):
|
|
279
|
+
"""Creates a RawData object from a list of Python dictionaries."""
|
|
264
280
|
if len(pylist) == 0:
|
|
265
281
|
return cls(columns=[], data=[])
|
|
266
282
|
pylist = ensure_similarity_dicts(pylist)
|
|
267
283
|
values = [standardize_col_dtype([vv for vv in c]) for c in
|
|
268
284
|
zip(*(r.values() for r in pylist))]
|
|
269
|
-
|
|
270
285
|
data_types = (pl.DataType.from_python(type(next((v for v in column_values), None))) for column_values in values)
|
|
271
286
|
columns = [MinimalFieldInfo(name=c, data_type=str(next(data_types))) for c in pylist[0].keys()]
|
|
272
287
|
return cls(columns=columns, data=values)
|
|
273
288
|
|
|
274
|
-
def to_pylist(self):
|
|
289
|
+
def to_pylist(self) -> List[dict]:
|
|
290
|
+
"""Converts the RawData object back into a list of Python dictionaries."""
|
|
275
291
|
return [{c.name: self.data[ci][ri] for ci, c in enumerate(self.columns)} for ri in range(len(self.data[0]))]
|
|
276
292
|
|
|
277
293
|
|
|
278
294
|
class NodeManualInput(NodeBase):
|
|
295
|
+
"""Settings for a node that allows direct data entry in the UI."""
|
|
279
296
|
raw_data_format: Optional[RawData] = None
|
|
280
297
|
|
|
281
298
|
|
|
282
299
|
class NodeRead(NodeBase):
|
|
300
|
+
"""Settings for a node that reads data from a file."""
|
|
283
301
|
received_file: ReceivedTable
|
|
284
302
|
|
|
285
303
|
|
|
286
304
|
class DatabaseConnection(BaseModel):
|
|
287
|
-
|
|
305
|
+
"""Defines the connection parameters for a database."""
|
|
306
|
+
database_type: str = "postgresql"
|
|
288
307
|
username: Optional[str] = None
|
|
289
308
|
password_ref: Optional[SecretRef] = None
|
|
290
309
|
host: Optional[str] = None
|
|
@@ -294,8 +313,9 @@ class DatabaseConnection(BaseModel):
|
|
|
294
313
|
|
|
295
314
|
|
|
296
315
|
class FullDatabaseConnection(BaseModel):
|
|
316
|
+
"""A complete database connection model including the secret password."""
|
|
297
317
|
connection_name: str
|
|
298
|
-
database_type: str = "postgresql"
|
|
318
|
+
database_type: str = "postgresql"
|
|
299
319
|
username: str
|
|
300
320
|
password: SecretStr
|
|
301
321
|
host: Optional[str] = None
|
|
@@ -306,8 +326,9 @@ class FullDatabaseConnection(BaseModel):
|
|
|
306
326
|
|
|
307
327
|
|
|
308
328
|
class FullDatabaseConnectionInterface(BaseModel):
|
|
329
|
+
"""A database connection model intended for UI display, omitting the password."""
|
|
309
330
|
connection_name: str
|
|
310
|
-
database_type: str = "postgresql"
|
|
331
|
+
database_type: str = "postgresql"
|
|
311
332
|
username: str
|
|
312
333
|
host: Optional[str] = None
|
|
313
334
|
port: Optional[int] = None
|
|
@@ -317,6 +338,7 @@ class FullDatabaseConnectionInterface(BaseModel):
|
|
|
317
338
|
|
|
318
339
|
|
|
319
340
|
class DatabaseSettings(BaseModel):
|
|
341
|
+
"""Defines settings for reading from a database, either via table or query."""
|
|
320
342
|
connection_mode: Optional[Literal['inline', 'reference']] = 'inline'
|
|
321
343
|
database_connection: Optional[DatabaseConnection] = None
|
|
322
344
|
database_connection_name: Optional[str] = None
|
|
@@ -342,6 +364,7 @@ class DatabaseSettings(BaseModel):
|
|
|
342
364
|
|
|
343
365
|
|
|
344
366
|
class DatabaseWriteSettings(BaseModel):
|
|
367
|
+
"""Defines settings for writing data to a database table."""
|
|
345
368
|
connection_mode: Optional[Literal['inline', 'reference']] = 'inline'
|
|
346
369
|
database_connection: Optional[DatabaseConnection] = None
|
|
347
370
|
database_connection_name: Optional[str] = None
|
|
@@ -351,141 +374,146 @@ class DatabaseWriteSettings(BaseModel):
|
|
|
351
374
|
|
|
352
375
|
|
|
353
376
|
class NodeDatabaseReader(NodeBase):
|
|
377
|
+
"""Settings for a node that reads from a database."""
|
|
354
378
|
database_settings: DatabaseSettings
|
|
355
379
|
fields: Optional[List[MinimalFieldInfo]] = None
|
|
356
380
|
|
|
357
381
|
|
|
358
382
|
class NodeDatabaseWriter(NodeSingleInput):
|
|
383
|
+
"""Settings for a node that writes data to a database."""
|
|
359
384
|
database_write_settings: DatabaseWriteSettings
|
|
360
385
|
|
|
361
386
|
|
|
362
387
|
class NodeCloudStorageReader(NodeBase):
|
|
363
|
-
"""
|
|
388
|
+
"""Settings for a node that reads from a cloud storage service (S3, GCS, etc.)."""
|
|
364
389
|
cloud_storage_settings: CloudStorageReadSettings
|
|
365
390
|
fields: Optional[List[MinimalFieldInfo]] = None
|
|
366
391
|
|
|
367
392
|
|
|
368
393
|
class NodeCloudStorageWriter(NodeSingleInput):
|
|
369
|
-
"""
|
|
394
|
+
"""Settings for a node that writes to a cloud storage service."""
|
|
370
395
|
cloud_storage_settings: CloudStorageWriteSettings
|
|
371
396
|
|
|
372
397
|
|
|
373
398
|
class ExternalSource(BaseModel):
|
|
399
|
+
"""Base model for data coming from a predefined external source."""
|
|
374
400
|
orientation: str = 'row'
|
|
375
401
|
fields: Optional[List[MinimalFieldInfo]] = None
|
|
376
402
|
|
|
377
403
|
|
|
378
404
|
class SampleUsers(ExternalSource):
|
|
405
|
+
"""Settings for generating a sample dataset of users."""
|
|
379
406
|
SAMPLE_USERS: bool
|
|
380
407
|
class_name: str = "sample_users"
|
|
381
408
|
size: int = 100
|
|
382
409
|
|
|
383
410
|
|
|
384
|
-
class AccessToken(BaseModel):
|
|
385
|
-
user_id: str
|
|
386
|
-
access_token: SecretStr = None
|
|
387
|
-
|
|
388
|
-
|
|
389
411
|
class NodeExternalSource(NodeBase):
|
|
412
|
+
"""Settings for a node that connects to a registered external data source."""
|
|
390
413
|
identifier: str
|
|
391
414
|
source_settings: SampleUsers
|
|
392
415
|
|
|
393
416
|
|
|
394
417
|
class NodeFormula(NodeSingleInput):
|
|
418
|
+
"""Settings for a node that applies a formula to create/modify a column."""
|
|
395
419
|
function: transform_schema.FunctionInput = None
|
|
396
420
|
|
|
397
421
|
|
|
398
422
|
class NodeGroupBy(NodeSingleInput):
|
|
423
|
+
"""Settings for a node that performs a group-by and aggregation operation."""
|
|
399
424
|
groupby_input: transform_schema.GroupByInput = None
|
|
400
425
|
|
|
401
426
|
|
|
402
427
|
class NodePromise(NodeBase):
|
|
428
|
+
"""A placeholder node for an operation that has not yet been configured."""
|
|
403
429
|
is_setup: bool = False
|
|
404
430
|
node_type: str
|
|
405
431
|
|
|
406
432
|
|
|
407
433
|
class NodeInputConnection(BaseModel):
|
|
434
|
+
"""Represents the input side of a connection between two nodes."""
|
|
408
435
|
node_id: int
|
|
409
436
|
connection_class: InputConnectionClass
|
|
410
437
|
|
|
411
438
|
def get_node_input_connection_type(self) -> Literal['main', 'right', 'left']:
|
|
439
|
+
"""Determines the semantic type of the input (e.g., for a join)."""
|
|
412
440
|
match self.connection_class:
|
|
413
|
-
case 'input-0':
|
|
414
|
-
|
|
415
|
-
case 'input-
|
|
416
|
-
|
|
417
|
-
case 'input-2':
|
|
418
|
-
return 'left'
|
|
419
|
-
case _:
|
|
420
|
-
raise ValueError(f"Unexpected connection_class: {self.connection_class}")
|
|
441
|
+
case 'input-0': return 'main'
|
|
442
|
+
case 'input-1': return 'right'
|
|
443
|
+
case 'input-2': return 'left'
|
|
444
|
+
case _: raise ValueError(f"Unexpected connection_class: {self.connection_class}")
|
|
421
445
|
|
|
422
446
|
|
|
423
447
|
class NodePivot(NodeSingleInput):
|
|
448
|
+
"""Settings for a node that pivots data from a long to a wide format."""
|
|
424
449
|
pivot_input: transform_schema.PivotInput = None
|
|
425
450
|
output_fields: Optional[List[MinimalFieldInfo]] = None
|
|
426
451
|
|
|
427
452
|
|
|
428
453
|
class NodeUnpivot(NodeSingleInput):
|
|
454
|
+
"""Settings for a node that unpivots data from a wide to a long format."""
|
|
429
455
|
unpivot_input: transform_schema.UnpivotInput = None
|
|
430
456
|
|
|
431
457
|
|
|
432
458
|
class NodeUnion(NodeMultiInput):
|
|
459
|
+
"""Settings for a node that concatenates multiple data inputs."""
|
|
433
460
|
union_input: transform_schema.UnionInput = Field(default_factory=transform_schema.UnionInput)
|
|
434
461
|
|
|
435
462
|
|
|
436
463
|
class NodeOutput(NodeSingleInput):
|
|
464
|
+
"""Settings for a node that writes its input to a file."""
|
|
437
465
|
output_settings: OutputSettings
|
|
438
466
|
|
|
439
467
|
|
|
440
468
|
class NodeOutputConnection(BaseModel):
|
|
469
|
+
"""Represents the output side of a connection between two nodes."""
|
|
441
470
|
node_id: int
|
|
442
471
|
connection_class: OutputConnectionClass
|
|
443
472
|
|
|
444
473
|
|
|
445
474
|
class NodeConnection(BaseModel):
|
|
475
|
+
"""Represents a connection (edge) between two nodes in the graph."""
|
|
446
476
|
input_connection: NodeInputConnection
|
|
447
477
|
output_connection: NodeOutputConnection
|
|
448
478
|
|
|
449
479
|
@classmethod
|
|
450
480
|
def create_from_simple_input(cls, from_id: int, to_id: int, input_type: InputType = "input-0"):
|
|
451
|
-
|
|
481
|
+
"""Creates a standard connection between two nodes."""
|
|
452
482
|
match input_type:
|
|
453
|
-
case "main":
|
|
454
|
-
|
|
455
|
-
case "
|
|
456
|
-
|
|
457
|
-
case "left":
|
|
458
|
-
connection_class: InputConnectionClass = "input-2"
|
|
459
|
-
case _:
|
|
460
|
-
connection_class: InputConnectionClass = "input-0"
|
|
483
|
+
case "main": connection_class: InputConnectionClass = "input-0"
|
|
484
|
+
case "right": connection_class: InputConnectionClass = "input-1"
|
|
485
|
+
case "left": connection_class: InputConnectionClass = "input-2"
|
|
486
|
+
case _: connection_class: InputConnectionClass = "input-0"
|
|
461
487
|
node_input = NodeInputConnection(node_id=to_id, connection_class=connection_class)
|
|
462
488
|
node_output = NodeOutputConnection(node_id=from_id, connection_class='output-0')
|
|
463
489
|
return cls(input_connection=node_input, output_connection=node_output)
|
|
464
490
|
|
|
465
491
|
|
|
466
492
|
class NodeDescription(BaseModel):
|
|
493
|
+
"""A simple model for updating a node's description text."""
|
|
467
494
|
description: str = ''
|
|
468
495
|
|
|
469
496
|
|
|
470
497
|
class NodeExploreData(NodeBase):
|
|
498
|
+
"""Settings for a node that provides an interactive data exploration interface."""
|
|
471
499
|
graphic_walker_input: Optional[gs_schemas.GraphicWalkerInput] = None
|
|
472
|
-
_hash_overrule: int = 0
|
|
473
|
-
|
|
474
|
-
def __hash__(self):
|
|
475
|
-
return 0
|
|
476
500
|
|
|
477
501
|
|
|
478
502
|
class NodeGraphSolver(NodeSingleInput):
|
|
503
|
+
"""Settings for a node that solves graph-based problems (e.g., connected components)."""
|
|
479
504
|
graph_solver_input: transform_schema.GraphSolverInput
|
|
480
505
|
|
|
481
506
|
|
|
482
507
|
class NodeUnique(NodeSingleInput):
|
|
508
|
+
"""Settings for a node that returns the unique rows from the data."""
|
|
483
509
|
unique_input: transform_schema.UniqueInput
|
|
484
510
|
|
|
485
511
|
|
|
486
512
|
class NodeRecordCount(NodeSingleInput):
|
|
513
|
+
"""Settings for a node that counts the number of records."""
|
|
487
514
|
pass
|
|
488
515
|
|
|
489
516
|
|
|
490
517
|
class NodePolarsCode(NodeMultiInput):
|
|
518
|
+
"""Settings for a node that executes arbitrary user-provided Polars code."""
|
|
491
519
|
polars_code_input: transform_schema.PolarsCodeInput
|
|
@@ -5,8 +5,9 @@ import time
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class NodeResult(BaseModel):
|
|
8
|
+
"""Represents the execution result of a single node in a FlowGraph run."""
|
|
8
9
|
node_id: int
|
|
9
|
-
node_name: str = None
|
|
10
|
+
node_name: Optional[str] = None
|
|
10
11
|
start_timestamp: float = Field(default_factory=time.time)
|
|
11
12
|
end_timestamp: float = 0
|
|
12
13
|
success: Optional[bool] = None
|
|
@@ -16,6 +17,7 @@ class NodeResult(BaseModel):
|
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class RunInformation(BaseModel):
|
|
20
|
+
"""Contains summary information about a complete FlowGraph execution."""
|
|
19
21
|
flow_id: int
|
|
20
22
|
start_time: Optional[datetime] = Field(default_factory=datetime.now)
|
|
21
23
|
end_time: Optional[datetime] = None
|
|
@@ -26,6 +28,7 @@ class RunInformation(BaseModel):
|
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
class BaseItem(BaseModel):
|
|
31
|
+
"""A base model for any item in a file system, like a file or directory."""
|
|
29
32
|
name: str
|
|
30
33
|
path: str
|
|
31
34
|
size: Optional[int] = None
|
|
@@ -37,6 +40,7 @@ class BaseItem(BaseModel):
|
|
|
37
40
|
|
|
38
41
|
|
|
39
42
|
class FileColumn(BaseModel):
|
|
43
|
+
"""Represents detailed schema and statistics for a single column (field)."""
|
|
40
44
|
name: str
|
|
41
45
|
data_type: str
|
|
42
46
|
is_unique: bool
|
|
@@ -49,6 +53,7 @@ class FileColumn(BaseModel):
|
|
|
49
53
|
|
|
50
54
|
|
|
51
55
|
class TableExample(BaseModel):
|
|
56
|
+
"""Represents a preview of a table, including schema and sample data."""
|
|
52
57
|
node_id: int
|
|
53
58
|
number_of_records: int
|
|
54
59
|
number_of_columns: int
|
|
@@ -59,6 +64,10 @@ class TableExample(BaseModel):
|
|
|
59
64
|
|
|
60
65
|
|
|
61
66
|
class NodeData(BaseModel):
|
|
67
|
+
"""A comprehensive model holding the complete state and data for a single node.
|
|
68
|
+
|
|
69
|
+
This includes its input/output data previews, settings, and run status.
|
|
70
|
+
"""
|
|
62
71
|
flow_id: int
|
|
63
72
|
node_id: int
|
|
64
73
|
flow_type: str
|
|
@@ -74,19 +83,23 @@ class NodeData(BaseModel):
|
|
|
74
83
|
|
|
75
84
|
|
|
76
85
|
class OutputFile(BaseItem):
|
|
86
|
+
"""Represents a single file in an output directory, extending BaseItem."""
|
|
77
87
|
ext: Optional[str] = None
|
|
78
88
|
mimetype: Optional[str] = None
|
|
79
89
|
|
|
80
90
|
|
|
81
91
|
class OutputFiles(BaseItem):
|
|
92
|
+
"""Represents a collection of files, typically within a directory."""
|
|
82
93
|
files: List[OutputFile] = Field(default_factory=list)
|
|
83
94
|
|
|
84
95
|
|
|
85
96
|
class OutputTree(OutputFiles):
|
|
97
|
+
"""Represents a directory tree, including subdirectories."""
|
|
86
98
|
directories: List[OutputFiles] = Field(default_factory=list)
|
|
87
99
|
|
|
88
100
|
|
|
89
101
|
class ItemInfo(OutputFile):
|
|
102
|
+
"""Provides detailed information about a single item in an output directory."""
|
|
90
103
|
id: int = -1
|
|
91
104
|
type: str
|
|
92
105
|
analysis_file_available: bool = False
|
|
@@ -95,21 +108,24 @@ class ItemInfo(OutputFile):
|
|
|
95
108
|
|
|
96
109
|
|
|
97
110
|
class OutputDir(BaseItem):
|
|
111
|
+
"""Represents the contents of a single output directory."""
|
|
98
112
|
all_items: List[str]
|
|
99
113
|
items: List[ItemInfo]
|
|
100
114
|
|
|
101
115
|
|
|
102
116
|
class ExpressionRef(BaseModel):
|
|
117
|
+
"""A reference to a single Polars expression, including its name and docstring."""
|
|
103
118
|
name: str
|
|
104
119
|
doc: Optional[str]
|
|
105
120
|
|
|
106
121
|
|
|
107
122
|
class ExpressionsOverview(BaseModel):
|
|
123
|
+
"""Represents a categorized list of available Polars expressions."""
|
|
108
124
|
expression_type: str
|
|
109
125
|
expressions: List[ExpressionRef]
|
|
110
126
|
|
|
111
127
|
|
|
112
128
|
class InstantFuncResult(BaseModel):
|
|
129
|
+
"""Represents the result of a function that is expected to execute instantly."""
|
|
113
130
|
success: Optional[bool] = None
|
|
114
|
-
result: str
|
|
115
|
-
|
|
131
|
+
result: str
|