Flowfile 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (98) hide show
  1. flowfile/__init__.py +27 -6
  2. flowfile/api.py +1 -0
  3. flowfile/web/__init__.py +2 -2
  4. flowfile/web/static/assets/{CloudConnectionManager-d004942f.js → CloudConnectionManager-c20a740f.js} +3 -4
  5. flowfile/web/static/assets/{CloudStorageReader-eccf9fc2.js → CloudStorageReader-960b400a.js} +7 -7
  6. flowfile/web/static/assets/{CloudStorageWriter-b1ba6bba.js → CloudStorageWriter-e3decbdd.js} +7 -7
  7. flowfile/web/static/assets/{CrossJoin-68981877.js → CrossJoin-d67e2405.js} +8 -8
  8. flowfile/web/static/assets/{DatabaseConnectionSettings-0b06649c.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
  9. flowfile/web/static/assets/{DatabaseManager-8349a426.js → DatabaseManager-9ea35e84.js} +2 -2
  10. flowfile/web/static/assets/{DatabaseReader-905344f8.js → DatabaseReader-9578bfa5.js} +9 -9
  11. flowfile/web/static/assets/{DatabaseWriter-9f5b8638.js → DatabaseWriter-19531098.js} +9 -9
  12. flowfile/web/static/assets/{ExploreData-131a6d53.js → ExploreData-40476474.js} +47141 -43697
  13. flowfile/web/static/assets/{ExternalSource-e3549dcc.js → ExternalSource-2297ef96.js} +6 -6
  14. flowfile/web/static/assets/{Filter-6e0730ae.js → Filter-f211c03a.js} +8 -8
  15. flowfile/web/static/assets/{Formula-02f033e6.js → Formula-4207ea31.js} +8 -8
  16. flowfile/web/static/assets/{FuzzyMatch-54c14036.js → FuzzyMatch-bf120df0.js} +9 -9
  17. flowfile/web/static/assets/{GraphSolver-08a3f499.js → GraphSolver-5bb7497a.js} +5 -5
  18. flowfile/web/static/assets/{GroupBy-2ae38139.js → GroupBy-92c81b65.js} +6 -6
  19. flowfile/web/static/assets/{Join-493b9772.js → Join-4e49a274.js} +9 -9
  20. flowfile/web/static/assets/{ManualInput-4373d163.js → ManualInput-90998ae8.js} +5 -5
  21. flowfile/web/static/assets/{Output-b534f3c7.js → Output-81e3e917.js} +4 -4
  22. flowfile/web/static/assets/{Pivot-2968ff65.js → Pivot-a3419842.js} +6 -6
  23. flowfile/web/static/assets/{PolarsCode-65136536.js → PolarsCode-72710deb.js} +6 -6
  24. flowfile/web/static/assets/{Read-c56339ed.js → Read-c4059daf.js} +6 -6
  25. flowfile/web/static/assets/{RecordCount-1c641a5e.js → RecordCount-c2b5e095.js} +5 -5
  26. flowfile/web/static/assets/{RecordId-df308b8f.js → RecordId-10baf191.js} +6 -6
  27. flowfile/web/static/assets/{Sample-293e8a64.js → Sample-3ed9a0ae.js} +5 -5
  28. flowfile/web/static/assets/{SecretManager-03911655.js → SecretManager-0d49c0e8.js} +2 -2
  29. flowfile/web/static/assets/{Select-3058a13d.js → Select-8a02a0b3.js} +8 -8
  30. flowfile/web/static/assets/{SettingsSection-fbf4fb39.js → SettingsSection-4c0f45f5.js} +1 -1
  31. flowfile/web/static/assets/{Sort-a29bbaf7.js → Sort-f55c9f9d.js} +6 -6
  32. flowfile/web/static/assets/{TextToRows-c7d7760e.js → TextToRows-5dbc2145.js} +8 -8
  33. flowfile/web/static/assets/{UnavailableFields-118f1d20.js → UnavailableFields-a1768e52.js} +2 -2
  34. flowfile/web/static/assets/{Union-f0589571.js → Union-f2aefdc9.js} +5 -5
  35. flowfile/web/static/assets/{Unique-7329a207.js → Unique-46b250da.js} +8 -8
  36. flowfile/web/static/assets/{Unpivot-30b0be15.js → Unpivot-25ac84cc.js} +5 -5
  37. flowfile/web/static/assets/{api-fb67319c.js → api-6ef0dcef.js} +1 -1
  38. flowfile/web/static/assets/{api-602fb95c.js → api-a0abbdc7.js} +1 -1
  39. flowfile/web/static/assets/{designer-94a6bf4d.js → designer-13eabd83.js} +4 -4
  40. flowfile/web/static/assets/{documentation-a224831e.js → documentation-b87e7f6f.js} +1 -1
  41. flowfile/web/static/assets/{dropDown-c2d2aa97.js → dropDown-13564764.js} +1 -1
  42. flowfile/web/static/assets/{fullEditor-921ac5fd.js → fullEditor-fd2cd6f9.js} +2 -2
  43. flowfile/web/static/assets/{genericNodeSettings-7013cc94.js → genericNodeSettings-71e11604.js} +3 -3
  44. flowfile/web/static/assets/{index-3a75211d.js → index-f6c15e76.js} +46 -22
  45. flowfile/web/static/assets/{nodeTitle-a63d4680.js → nodeTitle-988d9efe.js} +3 -3
  46. flowfile/web/static/assets/{secretApi-763aec6e.js → secretApi-dd636aa2.js} +1 -1
  47. flowfile/web/static/assets/{selectDynamic-08464729.js → selectDynamic-af36165e.js} +3 -3
  48. flowfile/web/static/assets/{vue-codemirror.esm-f15a5f87.js → vue-codemirror.esm-2847001e.js} +1 -1
  49. flowfile/web/static/assets/{vue-content-loader.es-93bd09d7.js → vue-content-loader.es-0371da73.js} +1 -1
  50. flowfile/web/static/index.html +1 -1
  51. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/METADATA +2 -2
  52. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/RECORD +96 -94
  53. flowfile_core/__init__.py +1 -0
  54. flowfile_core/auth/jwt.py +39 -0
  55. flowfile_core/configs/node_store/nodes.py +1 -0
  56. flowfile_core/configs/settings.py +6 -5
  57. flowfile_core/flowfile/code_generator/code_generator.py +71 -0
  58. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +1 -1
  59. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +597 -309
  60. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
  61. flowfile_core/flowfile/flow_graph.py +619 -191
  62. flowfile_core/flowfile/flow_graph_utils.py +2 -2
  63. flowfile_core/flowfile/flow_node/flow_node.py +500 -89
  64. flowfile_core/flowfile/flow_node/models.py +125 -20
  65. flowfile_core/flowfile/handler.py +2 -33
  66. flowfile_core/flowfile/manage/open_flowfile.py +1 -2
  67. flowfile_core/flowfile/util/calculate_layout.py +0 -2
  68. flowfile_core/flowfile/utils.py +36 -5
  69. flowfile_core/main.py +32 -13
  70. flowfile_core/routes/cloud_connections.py +7 -11
  71. flowfile_core/routes/logs.py +2 -6
  72. flowfile_core/routes/public.py +1 -0
  73. flowfile_core/routes/routes.py +127 -51
  74. flowfile_core/routes/secrets.py +72 -14
  75. flowfile_core/schemas/__init__.py +8 -0
  76. flowfile_core/schemas/input_schema.py +92 -64
  77. flowfile_core/schemas/output_model.py +19 -3
  78. flowfile_core/schemas/schemas.py +144 -11
  79. flowfile_core/schemas/transform_schema.py +82 -17
  80. flowfile_frame/__init__.py +9 -1
  81. flowfile_frame/cloud_storage/__init__.py +0 -0
  82. flowfile_frame/cloud_storage/frame_helpers.py +39 -0
  83. flowfile_frame/cloud_storage/secret_manager.py +73 -0
  84. flowfile_frame/expr.py +28 -1
  85. flowfile_frame/expr.pyi +76 -61
  86. flowfile_frame/flow_frame.py +232 -110
  87. flowfile_frame/flow_frame.pyi +140 -91
  88. flowfile_frame/flow_frame_methods.py +150 -12
  89. flowfile_frame/group_frame.py +3 -0
  90. flowfile_frame/utils.py +25 -3
  91. test_utils/s3/data_generator.py +1 -0
  92. test_utils/s3/demo_data_generator.py +186 -0
  93. test_utils/s3/fixtures.py +6 -1
  94. flowfile_core/schemas/defaults.py +0 -9
  95. flowfile_core/schemas/models.py +0 -193
  96. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
  97. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
  98. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +0 -0
@@ -20,26 +20,31 @@ InputType = Literal["main", "left", "right"]
20
20
 
21
21
 
22
22
  class NewDirectory(BaseModel):
23
+ """Defines the information required to create a new directory."""
23
24
  source_path: str
24
25
  dir_name: str
25
26
 
26
27
 
27
28
  class RemoveItem(BaseModel):
29
+ """Represents a single item to be removed from a directory or list."""
28
30
  path: str
29
31
  id: int = -1
30
32
 
31
33
 
32
34
  class RemoveItemsInput(BaseModel):
35
+ """Defines a list of items to be removed."""
33
36
  paths: List[RemoveItem]
34
37
  source_path: str
35
38
 
36
39
 
37
40
  class MinimalFieldInfo(BaseModel):
41
+ """Represents the most basic information about a data field (column)."""
38
42
  name: str
39
43
  data_type: str = "String"
40
44
 
41
45
 
42
46
  class ReceivedTableBase(BaseModel):
47
+ """Base model for defining a table received from an external source."""
43
48
  id: Optional[int] = None
44
49
  name: Optional[str]
45
50
  path: str # This can be an absolute or relative path
@@ -52,36 +57,37 @@ class ReceivedTableBase(BaseModel):
52
57
 
53
58
  @classmethod
54
59
  def create_from_path(cls, path: str):
60
+ """Creates an instance from a file path string."""
55
61
  filename = Path(path).name
56
- # Create an instance of ReceivedTableBase with the extracted filename and path
57
62
  return cls(name=filename, path=path)
58
63
 
59
64
  @property
60
65
  def file_path(self) -> str:
66
+ """Constructs the full file path from the directory and name."""
61
67
  if not self.name in self.path:
62
68
  return os.path.join(self.path, self.name)
63
69
  else:
64
70
  return self.path
65
71
 
66
72
  def set_absolute_filepath(self):
73
+ """Resolves the path to an absolute file path."""
67
74
  base_path = Path(self.path).expanduser()
68
- # Check if the path is relative, resolve it with the current working directory
69
75
  if not base_path.is_absolute():
70
76
  base_path = Path.cwd() / base_path
71
-
72
77
  if self.name and self.name not in base_path.name:
73
78
  base_path = base_path / self.name
74
-
75
79
  self.abs_file_path = str(base_path.resolve())
76
80
 
77
81
  @model_validator(mode='after')
78
82
  def populate_abs_file_path(self):
83
+ """Ensures the absolute file path is populated after validation."""
79
84
  if not self.abs_file_path:
80
85
  self.set_absolute_filepath()
81
86
  return self
82
87
 
83
88
 
84
89
  class ReceivedCsvTable(ReceivedTableBase):
90
+ """Defines settings for reading a CSV file."""
85
91
  file_type: str = 'csv'
86
92
  reference: str = ''
87
93
  starting_from_line: int = 0
@@ -97,82 +103,88 @@ class ReceivedCsvTable(ReceivedTableBase):
97
103
 
98
104
 
99
105
  class ReceivedJsonTable(ReceivedCsvTable):
106
+ """Defines settings for reading a JSON file (inherits from CSV settings)."""
100
107
  pass
101
108
 
102
109
 
103
110
  class ReceivedParquetTable(ReceivedTableBase):
111
+ """Defines settings for reading a Parquet file."""
104
112
  file_type: str = 'parquet'
105
113
 
106
114
 
107
115
  class ReceivedExcelTable(ReceivedTableBase):
116
+ """Defines settings for reading an Excel file."""
108
117
  sheet_name: Optional[str] = None
109
- start_row: int = 0 # optional
110
- start_column: int = 0 # optional
111
- end_row: int = 0 # optional
112
- end_column: int = 0 # optional
113
- has_headers: bool = True # optional
114
- type_inference: bool = False # optional
118
+ start_row: int = 0
119
+ start_column: int = 0
120
+ end_row: int = 0
121
+ end_column: int = 0
122
+ has_headers: bool = True
123
+ type_inference: bool = False
115
124
 
116
125
  def validate_range_values(self):
117
- # Validate that start and end rows/columns are non-negative integers
126
+ """Validates that the Excel cell range is logical."""
118
127
  for attribute in [self.start_row, self.start_column, self.end_row, self.end_column]:
119
128
  if not isinstance(attribute, int) or attribute < 0:
120
129
  raise ValueError("Row and column indices must be non-negative integers")
121
-
122
- # Validate that start is before end if end is specified (non-zero)
123
130
  if (self.end_row > 0 and self.start_row > self.end_row) or \
124
- (self.end_column > 0 and self.start_column > self.end_column):
125
- raise ValueError("Start row/column must not be greater than end row/column if specified")
131
+ (self.end_column > 0 and self.start_column > self.end_column):
132
+ raise ValueError("Start row/column must not be greater than end row/column")
126
133
 
127
134
 
128
135
  class ReceivedTable(ReceivedExcelTable, ReceivedCsvTable, ReceivedParquetTable):
136
+ """A comprehensive model that can represent any type of received table."""
129
137
  ...
130
138
 
131
139
 
132
140
  class OutputCsvTable(BaseModel):
141
+ """Defines settings for writing a CSV file."""
133
142
  file_type: str = 'csv'
134
143
  delimiter: str = ','
135
144
  encoding: str = 'utf-8'
136
145
 
137
146
 
138
147
  class OutputParquetTable(BaseModel):
148
+ """Defines settings for writing a Parquet file."""
139
149
  file_type: str = 'parquet'
140
150
 
141
151
 
142
152
  class OutputExcelTable(BaseModel):
153
+ """Defines settings for writing an Excel file."""
143
154
  file_type: str = 'excel'
144
155
  sheet_name: str = 'Sheet1'
145
156
 
146
157
 
147
158
  class OutputSettings(BaseModel):
159
+ """Defines the complete settings for an output node."""
148
160
  name: str
149
161
  directory: str
150
162
  file_type: str
151
163
  fields: Optional[List[str]] = Field(default_factory=list)
152
164
  write_mode: str = 'overwrite'
153
- output_csv_table: OutputCsvTable
154
- output_parquet_table: OutputParquetTable
155
- output_excel_table: OutputExcelTable
165
+ output_csv_table: Optional[OutputCsvTable] = Field(default_factory=OutputCsvTable)
166
+ output_parquet_table: OutputParquetTable = Field(default_factory=OutputParquetTable)
167
+ output_excel_table: OutputExcelTable = Field(default_factory=OutputExcelTable)
156
168
  abs_file_path: Optional[str] = None
157
169
 
158
170
  def set_absolute_filepath(self):
171
+ """Resolves the output directory and name into an absolute path."""
159
172
  base_path = Path(self.directory)
160
-
161
173
  if not base_path.is_absolute():
162
174
  base_path = Path.cwd() / base_path
163
-
164
175
  if self.name and self.name not in base_path.name:
165
176
  base_path = base_path / self.name
166
-
167
177
  self.abs_file_path = str(base_path.resolve())
168
178
 
169
179
  @model_validator(mode='after')
170
180
  def populate_abs_file_path(self):
181
+ """Ensures the absolute file path is populated after validation."""
171
182
  self.set_absolute_filepath()
172
183
  return self
173
184
 
174
185
 
175
186
  class NodeBase(BaseModel):
187
+ """Base model for all nodes in a FlowGraph. Contains common metadata."""
176
188
  model_config = ConfigDict(arbitrary_types_allowed=True)
177
189
  flow_id: int
178
190
  node_id: int
@@ -184,48 +196,51 @@ class NodeBase(BaseModel):
184
196
  user_id: Optional[int] = None
185
197
  is_flow_output: Optional[bool] = False
186
198
 
187
- @classmethod
188
- def overridden_hash(cls):
189
- if getattr(cls, '__hash__'):
190
- return BaseModel.__hash__ is not getattr(cls, '__hash__')
191
- return False
192
-
193
199
 
194
200
  class NodeSingleInput(NodeBase):
201
+ """A base model for any node that takes a single data input."""
195
202
  depending_on_id: Optional[int] = -1
196
203
 
197
204
 
198
205
  class NodeMultiInput(NodeBase):
206
+ """A base model for any node that takes multiple data inputs."""
199
207
  depending_on_ids: Optional[List[int]] = [-1]
200
208
 
201
209
 
202
210
  class NodeSelect(NodeSingleInput):
211
+ """Settings for a node that selects, renames, and reorders columns."""
203
212
  keep_missing: bool = True
204
213
  select_input: List[transform_schema.SelectInput] = Field(default_factory=list)
205
214
  sorted_by: Optional[Literal['none', 'asc', 'desc']] = 'none'
206
215
 
207
216
 
208
217
  class NodeFilter(NodeSingleInput):
218
+ """Settings for a node that filters rows based on a condition."""
209
219
  filter_input: transform_schema.FilterInput
210
220
 
211
221
 
212
222
  class NodeSort(NodeSingleInput):
223
+ """Settings for a node that sorts the data by one or more columns."""
213
224
  sort_input: List[transform_schema.SortByInput] = Field(default_factory=list)
214
225
 
215
226
 
216
227
  class NodeTextToRows(NodeSingleInput):
228
+ """Settings for a node that splits a text column into multiple rows."""
217
229
  text_to_rows_input: transform_schema.TextToRowsInput
218
230
 
219
231
 
220
232
  class NodeSample(NodeSingleInput):
233
+ """Settings for a node that samples a subset of the data."""
221
234
  sample_size: int = 1000
222
235
 
223
236
 
224
237
  class NodeRecordId(NodeSingleInput):
238
+ """Settings for a node that adds a unique record ID column."""
225
239
  record_id_input: transform_schema.RecordIdInput
226
240
 
227
241
 
228
242
  class NodeJoin(NodeMultiInput):
243
+ """Settings for a node that performs a standard SQL-style join."""
229
244
  auto_generate_selection: bool = True
230
245
  verify_integrity: bool = True
231
246
  join_input: transform_schema.JoinInput
@@ -235,6 +250,7 @@ class NodeJoin(NodeMultiInput):
235
250
 
236
251
 
237
252
  class NodeCrossJoin(NodeMultiInput):
253
+ """Settings for a node that performs a cross join."""
238
254
  auto_generate_selection: bool = True
239
255
  verify_integrity: bool = True
240
256
  cross_join_input: transform_schema.CrossJoinInput
@@ -244,47 +260,50 @@ class NodeCrossJoin(NodeMultiInput):
244
260
 
245
261
 
246
262
  class NodeFuzzyMatch(NodeJoin):
263
+ """Settings for a node that performs a fuzzy join based on string similarity."""
247
264
  join_input: transform_schema.FuzzyMatchInput
248
265
 
249
266
 
250
267
  class NodeDatasource(NodeBase):
268
+ """Base settings for a node that acts as a data source."""
251
269
  file_ref: str = None
252
270
 
253
271
 
254
272
  class RawData(BaseModel):
273
+ """Represents data in a raw, columnar format for manual input."""
255
274
  columns: List[MinimalFieldInfo] = None
256
275
  data: List[List]
257
276
 
258
- @classmethod
259
- def from_columns(cls, columns: List[str], data: List[List]):
260
- return cls(columns=[MinimalFieldInfo(name=column) for column in columns], data=data)
261
-
262
277
  @classmethod
263
278
  def from_pylist(cls, pylist: List[dict]):
279
+ """Creates a RawData object from a list of Python dictionaries."""
264
280
  if len(pylist) == 0:
265
281
  return cls(columns=[], data=[])
266
282
  pylist = ensure_similarity_dicts(pylist)
267
283
  values = [standardize_col_dtype([vv for vv in c]) for c in
268
284
  zip(*(r.values() for r in pylist))]
269
-
270
285
  data_types = (pl.DataType.from_python(type(next((v for v in column_values), None))) for column_values in values)
271
286
  columns = [MinimalFieldInfo(name=c, data_type=str(next(data_types))) for c in pylist[0].keys()]
272
287
  return cls(columns=columns, data=values)
273
288
 
274
- def to_pylist(self):
289
+ def to_pylist(self) -> List[dict]:
290
+ """Converts the RawData object back into a list of Python dictionaries."""
275
291
  return [{c.name: self.data[ci][ri] for ci, c in enumerate(self.columns)} for ri in range(len(self.data[0]))]
276
292
 
277
293
 
278
294
  class NodeManualInput(NodeBase):
295
+ """Settings for a node that allows direct data entry in the UI."""
279
296
  raw_data_format: Optional[RawData] = None
280
297
 
281
298
 
282
299
  class NodeRead(NodeBase):
300
+ """Settings for a node that reads data from a file."""
283
301
  received_file: ReceivedTable
284
302
 
285
303
 
286
304
  class DatabaseConnection(BaseModel):
287
- database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
305
+ """Defines the connection parameters for a database."""
306
+ database_type: str = "postgresql"
288
307
  username: Optional[str] = None
289
308
  password_ref: Optional[SecretRef] = None
290
309
  host: Optional[str] = None
@@ -294,8 +313,9 @@ class DatabaseConnection(BaseModel):
294
313
 
295
314
 
296
315
  class FullDatabaseConnection(BaseModel):
316
+ """A complete database connection model including the secret password."""
297
317
  connection_name: str
298
- database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
318
+ database_type: str = "postgresql"
299
319
  username: str
300
320
  password: SecretStr
301
321
  host: Optional[str] = None
@@ -306,8 +326,9 @@ class FullDatabaseConnection(BaseModel):
306
326
 
307
327
 
308
328
  class FullDatabaseConnectionInterface(BaseModel):
329
+ """A database connection model intended for UI display, omitting the password."""
309
330
  connection_name: str
310
- database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
331
+ database_type: str = "postgresql"
311
332
  username: str
312
333
  host: Optional[str] = None
313
334
  port: Optional[int] = None
@@ -317,6 +338,7 @@ class FullDatabaseConnectionInterface(BaseModel):
317
338
 
318
339
 
319
340
  class DatabaseSettings(BaseModel):
341
+ """Defines settings for reading from a database, either via table or query."""
320
342
  connection_mode: Optional[Literal['inline', 'reference']] = 'inline'
321
343
  database_connection: Optional[DatabaseConnection] = None
322
344
  database_connection_name: Optional[str] = None
@@ -342,6 +364,7 @@ class DatabaseSettings(BaseModel):
342
364
 
343
365
 
344
366
  class DatabaseWriteSettings(BaseModel):
367
+ """Defines settings for writing data to a database table."""
345
368
  connection_mode: Optional[Literal['inline', 'reference']] = 'inline'
346
369
  database_connection: Optional[DatabaseConnection] = None
347
370
  database_connection_name: Optional[str] = None
@@ -351,141 +374,146 @@ class DatabaseWriteSettings(BaseModel):
351
374
 
352
375
 
353
376
  class NodeDatabaseReader(NodeBase):
377
+ """Settings for a node that reads from a database."""
354
378
  database_settings: DatabaseSettings
355
379
  fields: Optional[List[MinimalFieldInfo]] = None
356
380
 
357
381
 
358
382
  class NodeDatabaseWriter(NodeSingleInput):
383
+ """Settings for a node that writes data to a database."""
359
384
  database_write_settings: DatabaseWriteSettings
360
385
 
361
386
 
362
387
  class NodeCloudStorageReader(NodeBase):
363
- """Cloud storage source node"""
388
+ """Settings for a node that reads from a cloud storage service (S3, GCS, etc.)."""
364
389
  cloud_storage_settings: CloudStorageReadSettings
365
390
  fields: Optional[List[MinimalFieldInfo]] = None
366
391
 
367
392
 
368
393
  class NodeCloudStorageWriter(NodeSingleInput):
369
- """Cloud storage destination node"""
394
+ """Settings for a node that writes to a cloud storage service."""
370
395
  cloud_storage_settings: CloudStorageWriteSettings
371
396
 
372
397
 
373
398
  class ExternalSource(BaseModel):
399
+ """Base model for data coming from a predefined external source."""
374
400
  orientation: str = 'row'
375
401
  fields: Optional[List[MinimalFieldInfo]] = None
376
402
 
377
403
 
378
404
  class SampleUsers(ExternalSource):
405
+ """Settings for generating a sample dataset of users."""
379
406
  SAMPLE_USERS: bool
380
407
  class_name: str = "sample_users"
381
408
  size: int = 100
382
409
 
383
410
 
384
- class AccessToken(BaseModel):
385
- user_id: str
386
- access_token: SecretStr = None
387
-
388
-
389
411
  class NodeExternalSource(NodeBase):
412
+ """Settings for a node that connects to a registered external data source."""
390
413
  identifier: str
391
414
  source_settings: SampleUsers
392
415
 
393
416
 
394
417
  class NodeFormula(NodeSingleInput):
418
+ """Settings for a node that applies a formula to create/modify a column."""
395
419
  function: transform_schema.FunctionInput = None
396
420
 
397
421
 
398
422
  class NodeGroupBy(NodeSingleInput):
423
+ """Settings for a node that performs a group-by and aggregation operation."""
399
424
  groupby_input: transform_schema.GroupByInput = None
400
425
 
401
426
 
402
427
  class NodePromise(NodeBase):
428
+ """A placeholder node for an operation that has not yet been configured."""
403
429
  is_setup: bool = False
404
430
  node_type: str
405
431
 
406
432
 
407
433
  class NodeInputConnection(BaseModel):
434
+ """Represents the input side of a connection between two nodes."""
408
435
  node_id: int
409
436
  connection_class: InputConnectionClass
410
437
 
411
438
  def get_node_input_connection_type(self) -> Literal['main', 'right', 'left']:
439
+ """Determines the semantic type of the input (e.g., for a join)."""
412
440
  match self.connection_class:
413
- case 'input-0':
414
- return 'main'
415
- case 'input-1':
416
- return 'right'
417
- case 'input-2':
418
- return 'left'
419
- case _:
420
- raise ValueError(f"Unexpected connection_class: {self.connection_class}")
441
+ case 'input-0': return 'main'
442
+ case 'input-1': return 'right'
443
+ case 'input-2': return 'left'
444
+ case _: raise ValueError(f"Unexpected connection_class: {self.connection_class}")
421
445
 
422
446
 
423
447
  class NodePivot(NodeSingleInput):
448
+ """Settings for a node that pivots data from a long to a wide format."""
424
449
  pivot_input: transform_schema.PivotInput = None
425
450
  output_fields: Optional[List[MinimalFieldInfo]] = None
426
451
 
427
452
 
428
453
  class NodeUnpivot(NodeSingleInput):
454
+ """Settings for a node that unpivots data from a wide to a long format."""
429
455
  unpivot_input: transform_schema.UnpivotInput = None
430
456
 
431
457
 
432
458
  class NodeUnion(NodeMultiInput):
459
+ """Settings for a node that concatenates multiple data inputs."""
433
460
  union_input: transform_schema.UnionInput = Field(default_factory=transform_schema.UnionInput)
434
461
 
435
462
 
436
463
  class NodeOutput(NodeSingleInput):
464
+ """Settings for a node that writes its input to a file."""
437
465
  output_settings: OutputSettings
438
466
 
439
467
 
440
468
  class NodeOutputConnection(BaseModel):
469
+ """Represents the output side of a connection between two nodes."""
441
470
  node_id: int
442
471
  connection_class: OutputConnectionClass
443
472
 
444
473
 
445
474
  class NodeConnection(BaseModel):
475
+ """Represents a connection (edge) between two nodes in the graph."""
446
476
  input_connection: NodeInputConnection
447
477
  output_connection: NodeOutputConnection
448
478
 
449
479
  @classmethod
450
480
  def create_from_simple_input(cls, from_id: int, to_id: int, input_type: InputType = "input-0"):
451
-
481
+ """Creates a standard connection between two nodes."""
452
482
  match input_type:
453
- case "main":
454
- connection_class: InputConnectionClass = "input-0"
455
- case "right":
456
- connection_class: InputConnectionClass = "input-1"
457
- case "left":
458
- connection_class: InputConnectionClass = "input-2"
459
- case _:
460
- connection_class: InputConnectionClass = "input-0"
483
+ case "main": connection_class: InputConnectionClass = "input-0"
484
+ case "right": connection_class: InputConnectionClass = "input-1"
485
+ case "left": connection_class: InputConnectionClass = "input-2"
486
+ case _: connection_class: InputConnectionClass = "input-0"
461
487
  node_input = NodeInputConnection(node_id=to_id, connection_class=connection_class)
462
488
  node_output = NodeOutputConnection(node_id=from_id, connection_class='output-0')
463
489
  return cls(input_connection=node_input, output_connection=node_output)
464
490
 
465
491
 
466
492
  class NodeDescription(BaseModel):
493
+ """A simple model for updating a node's description text."""
467
494
  description: str = ''
468
495
 
469
496
 
470
497
  class NodeExploreData(NodeBase):
498
+ """Settings for a node that provides an interactive data exploration interface."""
471
499
  graphic_walker_input: Optional[gs_schemas.GraphicWalkerInput] = None
472
- _hash_overrule: int = 0
473
-
474
- def __hash__(self):
475
- return 0
476
500
 
477
501
 
478
502
  class NodeGraphSolver(NodeSingleInput):
503
+ """Settings for a node that solves graph-based problems (e.g., connected components)."""
479
504
  graph_solver_input: transform_schema.GraphSolverInput
480
505
 
481
506
 
482
507
  class NodeUnique(NodeSingleInput):
508
+ """Settings for a node that returns the unique rows from the data."""
483
509
  unique_input: transform_schema.UniqueInput
484
510
 
485
511
 
486
512
  class NodeRecordCount(NodeSingleInput):
513
+ """Settings for a node that counts the number of records."""
487
514
  pass
488
515
 
489
516
 
490
517
  class NodePolarsCode(NodeMultiInput):
518
+ """Settings for a node that executes arbitrary user-provided Polars code."""
491
519
  polars_code_input: transform_schema.PolarsCodeInput
@@ -5,8 +5,9 @@ import time
5
5
 
6
6
 
7
7
  class NodeResult(BaseModel):
8
+ """Represents the execution result of a single node in a FlowGraph run."""
8
9
  node_id: int
9
- node_name: str = None
10
+ node_name: Optional[str] = None
10
11
  start_timestamp: float = Field(default_factory=time.time)
11
12
  end_timestamp: float = 0
12
13
  success: Optional[bool] = None
@@ -16,6 +17,7 @@ class NodeResult(BaseModel):
16
17
 
17
18
 
18
19
  class RunInformation(BaseModel):
20
+ """Contains summary information about a complete FlowGraph execution."""
19
21
  flow_id: int
20
22
  start_time: Optional[datetime] = Field(default_factory=datetime.now)
21
23
  end_time: Optional[datetime] = None
@@ -26,6 +28,7 @@ class RunInformation(BaseModel):
26
28
 
27
29
 
28
30
  class BaseItem(BaseModel):
31
+ """A base model for any item in a file system, like a file or directory."""
29
32
  name: str
30
33
  path: str
31
34
  size: Optional[int] = None
@@ -37,6 +40,7 @@ class BaseItem(BaseModel):
37
40
 
38
41
 
39
42
  class FileColumn(BaseModel):
43
+ """Represents detailed schema and statistics for a single column (field)."""
40
44
  name: str
41
45
  data_type: str
42
46
  is_unique: bool
@@ -49,6 +53,7 @@ class FileColumn(BaseModel):
49
53
 
50
54
 
51
55
  class TableExample(BaseModel):
56
+ """Represents a preview of a table, including schema and sample data."""
52
57
  node_id: int
53
58
  number_of_records: int
54
59
  number_of_columns: int
@@ -59,6 +64,10 @@ class TableExample(BaseModel):
59
64
 
60
65
 
61
66
  class NodeData(BaseModel):
67
+ """A comprehensive model holding the complete state and data for a single node.
68
+
69
+ This includes its input/output data previews, settings, and run status.
70
+ """
62
71
  flow_id: int
63
72
  node_id: int
64
73
  flow_type: str
@@ -74,19 +83,23 @@ class NodeData(BaseModel):
74
83
 
75
84
 
76
85
  class OutputFile(BaseItem):
86
+ """Represents a single file in an output directory, extending BaseItem."""
77
87
  ext: Optional[str] = None
78
88
  mimetype: Optional[str] = None
79
89
 
80
90
 
81
91
  class OutputFiles(BaseItem):
92
+ """Represents a collection of files, typically within a directory."""
82
93
  files: List[OutputFile] = Field(default_factory=list)
83
94
 
84
95
 
85
96
  class OutputTree(OutputFiles):
97
+ """Represents a directory tree, including subdirectories."""
86
98
  directories: List[OutputFiles] = Field(default_factory=list)
87
99
 
88
100
 
89
101
  class ItemInfo(OutputFile):
102
+ """Provides detailed information about a single item in an output directory."""
90
103
  id: int = -1
91
104
  type: str
92
105
  analysis_file_available: bool = False
@@ -95,21 +108,24 @@ class ItemInfo(OutputFile):
95
108
 
96
109
 
97
110
  class OutputDir(BaseItem):
111
+ """Represents the contents of a single output directory."""
98
112
  all_items: List[str]
99
113
  items: List[ItemInfo]
100
114
 
101
115
 
102
116
  class ExpressionRef(BaseModel):
117
+ """A reference to a single Polars expression, including its name and docstring."""
103
118
  name: str
104
119
  doc: Optional[str]
105
120
 
106
121
 
107
122
  class ExpressionsOverview(BaseModel):
123
+ """Represents a categorized list of available Polars expressions."""
108
124
  expression_type: str
109
125
  expressions: List[ExpressionRef]
110
126
 
111
127
 
112
128
  class InstantFuncResult(BaseModel):
129
+ """Represents the result of a function that is expected to execute instantly."""
113
130
  success: Optional[bool] = None
114
- result: str
115
-
131
+ result: str