Flowfile 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (145) hide show
  1. flowfile/__init__.py +27 -6
  2. flowfile/api.py +1 -0
  3. flowfile/web/__init__.py +2 -2
  4. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
  5. flowfile/web/static/assets/CloudConnectionManager-c20a740f.js +783 -0
  6. flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
  7. flowfile/web/static/assets/CloudStorageReader-960b400a.js +437 -0
  8. flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
  9. flowfile/web/static/assets/CloudStorageWriter-e3decbdd.js +430 -0
  10. flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-d67e2405.js} +8 -8
  11. flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
  12. flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-9ea35e84.js} +2 -2
  13. flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-9578bfa5.js} +9 -9
  14. flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-19531098.js} +9 -9
  15. flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-40476474.js} +47141 -43697
  16. flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-2297ef96.js} +6 -6
  17. flowfile/web/static/assets/{Filter-f87bb897.js → Filter-f211c03a.js} +8 -8
  18. flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
  19. flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-4207ea31.js} +75 -9
  20. flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-bf120df0.js} +9 -9
  21. flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-5bb7497a.js} +5 -5
  22. flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-92c81b65.js} +6 -6
  23. flowfile/web/static/assets/{Join-eec38203.js → Join-4e49a274.js} +23 -15
  24. flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
  25. flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-90998ae8.js} +106 -34
  26. flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
  27. flowfile/web/static/assets/{Output-3b2ca045.js → Output-81e3e917.js} +4 -4
  28. flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-a3419842.js} +6 -6
  29. flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-72710deb.js} +6 -6
  30. flowfile/web/static/assets/{Read-07acdc9a.js → Read-c4059daf.js} +6 -6
  31. flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-c2b5e095.js} +5 -5
  32. flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-10baf191.js} +6 -6
  33. flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-3ed9a0ae.js} +5 -5
  34. flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-0d49c0e8.js} +2 -2
  35. flowfile/web/static/assets/{Select-32b28406.js → Select-8a02a0b3.js} +8 -8
  36. flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-4c0f45f5.js} +1 -1
  37. flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-f55c9f9d.js} +6 -6
  38. flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-5dbc2145.js} +8 -8
  39. flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-a1768e52.js} +2 -2
  40. flowfile/web/static/assets/{Union-39eecc6c.js → Union-f2aefdc9.js} +5 -5
  41. flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-46b250da.js} +8 -8
  42. flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-25ac84cc.js} +5 -5
  43. flowfile/web/static/assets/api-6ef0dcef.js +80 -0
  44. flowfile/web/static/assets/{api-44ca9e9c.js → api-a0abbdc7.js} +1 -1
  45. flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
  46. flowfile/web/static/assets/{designer-267d44f1.js → designer-13eabd83.js} +36 -34
  47. flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-b87e7f6f.js} +1 -1
  48. flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-13564764.js} +1 -1
  49. flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-fd2cd6f9.js} +2 -2
  50. flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-71e11604.js} +3 -3
  51. flowfile/web/static/assets/{index-e235a8bc.js → index-f6c15e76.js} +59 -22
  52. flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-988d9efe.js} +3 -3
  53. flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-dd636aa2.js} +1 -1
  54. flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-af36165e.js} +3 -3
  55. flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-2847001e.js} +2 -1
  56. flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-0371da73.js} +1 -1
  57. flowfile/web/static/index.html +1 -1
  58. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/METADATA +9 -4
  59. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/RECORD +131 -124
  60. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +2 -0
  61. flowfile_core/__init__.py +3 -0
  62. flowfile_core/auth/jwt.py +39 -0
  63. flowfile_core/configs/node_store/nodes.py +9 -6
  64. flowfile_core/configs/settings.py +6 -5
  65. flowfile_core/database/connection.py +63 -15
  66. flowfile_core/database/init_db.py +0 -1
  67. flowfile_core/database/models.py +49 -2
  68. flowfile_core/flowfile/code_generator/code_generator.py +472 -17
  69. flowfile_core/flowfile/connection_manager/models.py +1 -1
  70. flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
  71. flowfile_core/flowfile/extensions.py +1 -1
  72. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
  73. flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
  74. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1062 -311
  75. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
  76. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
  77. flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
  78. flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
  79. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
  80. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
  81. flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
  82. flowfile_core/flowfile/flow_graph.py +718 -253
  83. flowfile_core/flowfile/flow_graph_utils.py +2 -2
  84. flowfile_core/flowfile/flow_node/flow_node.py +563 -117
  85. flowfile_core/flowfile/flow_node/models.py +154 -20
  86. flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
  87. flowfile_core/flowfile/handler.py +2 -33
  88. flowfile_core/flowfile/manage/open_flowfile.py +1 -2
  89. flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
  90. flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
  91. flowfile_core/flowfile/util/calculate_layout.py +0 -2
  92. flowfile_core/flowfile/utils.py +35 -26
  93. flowfile_core/main.py +35 -15
  94. flowfile_core/routes/cloud_connections.py +77 -0
  95. flowfile_core/routes/logs.py +2 -7
  96. flowfile_core/routes/public.py +1 -0
  97. flowfile_core/routes/routes.py +130 -90
  98. flowfile_core/routes/secrets.py +72 -14
  99. flowfile_core/schemas/__init__.py +8 -0
  100. flowfile_core/schemas/cloud_storage_schemas.py +215 -0
  101. flowfile_core/schemas/input_schema.py +121 -71
  102. flowfile_core/schemas/output_model.py +19 -3
  103. flowfile_core/schemas/schemas.py +150 -12
  104. flowfile_core/schemas/transform_schema.py +175 -35
  105. flowfile_core/utils/utils.py +40 -1
  106. flowfile_core/utils/validate_setup.py +41 -0
  107. flowfile_frame/__init__.py +9 -1
  108. flowfile_frame/cloud_storage/frame_helpers.py +39 -0
  109. flowfile_frame/cloud_storage/secret_manager.py +73 -0
  110. flowfile_frame/expr.py +28 -1
  111. flowfile_frame/expr.pyi +76 -61
  112. flowfile_frame/flow_frame.py +481 -208
  113. flowfile_frame/flow_frame.pyi +140 -91
  114. flowfile_frame/flow_frame_methods.py +160 -22
  115. flowfile_frame/group_frame.py +3 -0
  116. flowfile_frame/utils.py +25 -3
  117. flowfile_worker/external_sources/s3_source/main.py +216 -0
  118. flowfile_worker/external_sources/s3_source/models.py +142 -0
  119. flowfile_worker/funcs.py +51 -6
  120. flowfile_worker/models.py +22 -2
  121. flowfile_worker/routes.py +40 -38
  122. flowfile_worker/utils.py +1 -1
  123. test_utils/s3/commands.py +46 -0
  124. test_utils/s3/data_generator.py +292 -0
  125. test_utils/s3/demo_data_generator.py +186 -0
  126. test_utils/s3/fixtures.py +214 -0
  127. flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
  128. flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
  129. flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
  130. flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
  131. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
  132. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
  133. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
  134. flowfile_core/schemas/defaults.py +0 -9
  135. flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
  136. flowfile_core/schemas/models.py +0 -193
  137. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
  138. flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
  139. flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
  140. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  141. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
  142. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
  143. {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_frame/cloud_storage}/__init__.py +0 -0
  144. {flowfile_core/schemas/external_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
  145. {flowfile_worker/external_sources/airbyte_sources → test_utils/s3}/__init__.py +0 -0
@@ -3,8 +3,11 @@ from flowfile_core.schemas import transform_schema
3
3
  from pathlib import Path
4
4
  import os
5
5
  from flowfile_core.schemas.analysis_schemas import graphic_walker_schemas as gs_schemas
6
- from flowfile_core.schemas.external_sources.airbyte_schemas import AirbyteConfig
6
+ from flowfile_core.schemas.cloud_storage_schemas import CloudStorageReadSettings, CloudStorageWriteSettings
7
+ from flowfile_core.schemas.schemas import SecretRef
8
+ from flowfile_core.utils.utils import ensure_similarity_dicts, standardize_col_dtype
7
9
  from pydantic import BaseModel, Field, model_validator, SecretStr, ConfigDict
10
+ import polars as pl
8
11
 
9
12
 
10
13
  OutputConnectionClass = Literal['output-0', 'output-1', 'output-2', 'output-3', 'output-4',
@@ -17,26 +20,31 @@ InputType = Literal["main", "left", "right"]
17
20
 
18
21
 
19
22
  class NewDirectory(BaseModel):
23
+ """Defines the information required to create a new directory."""
20
24
  source_path: str
21
25
  dir_name: str
22
26
 
23
27
 
24
28
  class RemoveItem(BaseModel):
29
+ """Represents a single item to be removed from a directory or list."""
25
30
  path: str
26
31
  id: int = -1
27
32
 
28
33
 
29
34
  class RemoveItemsInput(BaseModel):
35
+ """Defines a list of items to be removed."""
30
36
  paths: List[RemoveItem]
31
37
  source_path: str
32
38
 
33
39
 
34
40
  class MinimalFieldInfo(BaseModel):
41
+ """Represents the most basic information about a data field (column)."""
35
42
  name: str
36
- data_type: str
43
+ data_type: str = "String"
37
44
 
38
45
 
39
46
  class ReceivedTableBase(BaseModel):
47
+ """Base model for defining a table received from an external source."""
40
48
  id: Optional[int] = None
41
49
  name: Optional[str]
42
50
  path: str # This can be an absolute or relative path
@@ -49,36 +57,37 @@ class ReceivedTableBase(BaseModel):
49
57
 
50
58
  @classmethod
51
59
  def create_from_path(cls, path: str):
60
+ """Creates an instance from a file path string."""
52
61
  filename = Path(path).name
53
- # Create an instance of ReceivedTableBase with the extracted filename and path
54
62
  return cls(name=filename, path=path)
55
63
 
56
64
  @property
57
65
  def file_path(self) -> str:
66
+ """Constructs the full file path from the directory and name."""
58
67
  if not self.name in self.path:
59
68
  return os.path.join(self.path, self.name)
60
69
  else:
61
70
  return self.path
62
71
 
63
72
  def set_absolute_filepath(self):
73
+ """Resolves the path to an absolute file path."""
64
74
  base_path = Path(self.path).expanduser()
65
- # Check if the path is relative, resolve it with the current working directory
66
75
  if not base_path.is_absolute():
67
76
  base_path = Path.cwd() / base_path
68
-
69
77
  if self.name and self.name not in base_path.name:
70
78
  base_path = base_path / self.name
71
-
72
79
  self.abs_file_path = str(base_path.resolve())
73
80
 
74
81
  @model_validator(mode='after')
75
82
  def populate_abs_file_path(self):
83
+ """Ensures the absolute file path is populated after validation."""
76
84
  if not self.abs_file_path:
77
85
  self.set_absolute_filepath()
78
86
  return self
79
87
 
80
88
 
81
89
  class ReceivedCsvTable(ReceivedTableBase):
90
+ """Defines settings for reading a CSV file."""
82
91
  file_type: str = 'csv'
83
92
  reference: str = ''
84
93
  starting_from_line: int = 0
@@ -94,82 +103,88 @@ class ReceivedCsvTable(ReceivedTableBase):
94
103
 
95
104
 
96
105
  class ReceivedJsonTable(ReceivedCsvTable):
106
+ """Defines settings for reading a JSON file (inherits from CSV settings)."""
97
107
  pass
98
108
 
99
109
 
100
110
  class ReceivedParquetTable(ReceivedTableBase):
111
+ """Defines settings for reading a Parquet file."""
101
112
  file_type: str = 'parquet'
102
113
 
103
114
 
104
115
  class ReceivedExcelTable(ReceivedTableBase):
116
+ """Defines settings for reading an Excel file."""
105
117
  sheet_name: Optional[str] = None
106
- start_row: int = 0 # optional
107
- start_column: int = 0 # optional
108
- end_row: int = 0 # optional
109
- end_column: int = 0 # optional
110
- has_headers: bool = True # optional
111
- type_inference: bool = False # optional
118
+ start_row: int = 0
119
+ start_column: int = 0
120
+ end_row: int = 0
121
+ end_column: int = 0
122
+ has_headers: bool = True
123
+ type_inference: bool = False
112
124
 
113
125
  def validate_range_values(self):
114
- # Validate that start and end rows/columns are non-negative integers
126
+ """Validates that the Excel cell range is logical."""
115
127
  for attribute in [self.start_row, self.start_column, self.end_row, self.end_column]:
116
128
  if not isinstance(attribute, int) or attribute < 0:
117
129
  raise ValueError("Row and column indices must be non-negative integers")
118
-
119
- # Validate that start is before end if end is specified (non-zero)
120
130
  if (self.end_row > 0 and self.start_row > self.end_row) or \
121
- (self.end_column > 0 and self.start_column > self.end_column):
122
- raise ValueError("Start row/column must not be greater than end row/column if specified")
131
+ (self.end_column > 0 and self.start_column > self.end_column):
132
+ raise ValueError("Start row/column must not be greater than end row/column")
123
133
 
124
134
 
125
135
  class ReceivedTable(ReceivedExcelTable, ReceivedCsvTable, ReceivedParquetTable):
136
+ """A comprehensive model that can represent any type of received table."""
126
137
  ...
127
138
 
128
139
 
129
140
  class OutputCsvTable(BaseModel):
141
+ """Defines settings for writing a CSV file."""
130
142
  file_type: str = 'csv'
131
143
  delimiter: str = ','
132
144
  encoding: str = 'utf-8'
133
145
 
134
146
 
135
147
  class OutputParquetTable(BaseModel):
148
+ """Defines settings for writing a Parquet file."""
136
149
  file_type: str = 'parquet'
137
150
 
138
151
 
139
152
  class OutputExcelTable(BaseModel):
153
+ """Defines settings for writing an Excel file."""
140
154
  file_type: str = 'excel'
141
155
  sheet_name: str = 'Sheet1'
142
156
 
143
157
 
144
158
  class OutputSettings(BaseModel):
159
+ """Defines the complete settings for an output node."""
145
160
  name: str
146
161
  directory: str
147
162
  file_type: str
148
163
  fields: Optional[List[str]] = Field(default_factory=list)
149
164
  write_mode: str = 'overwrite'
150
- output_csv_table: OutputCsvTable
151
- output_parquet_table: OutputParquetTable
152
- output_excel_table: OutputExcelTable
165
+ output_csv_table: Optional[OutputCsvTable] = Field(default_factory=OutputCsvTable)
166
+ output_parquet_table: OutputParquetTable = Field(default_factory=OutputParquetTable)
167
+ output_excel_table: OutputExcelTable = Field(default_factory=OutputExcelTable)
153
168
  abs_file_path: Optional[str] = None
154
169
 
155
170
  def set_absolute_filepath(self):
171
+ """Resolves the output directory and name into an absolute path."""
156
172
  base_path = Path(self.directory)
157
-
158
173
  if not base_path.is_absolute():
159
174
  base_path = Path.cwd() / base_path
160
-
161
175
  if self.name and self.name not in base_path.name:
162
176
  base_path = base_path / self.name
163
-
164
177
  self.abs_file_path = str(base_path.resolve())
165
178
 
166
179
  @model_validator(mode='after')
167
180
  def populate_abs_file_path(self):
181
+ """Ensures the absolute file path is populated after validation."""
168
182
  self.set_absolute_filepath()
169
183
  return self
170
184
 
171
185
 
172
186
  class NodeBase(BaseModel):
187
+ """Base model for all nodes in a FlowGraph. Contains common metadata."""
173
188
  model_config = ConfigDict(arbitrary_types_allowed=True)
174
189
  flow_id: int
175
190
  node_id: int
@@ -181,48 +196,51 @@ class NodeBase(BaseModel):
181
196
  user_id: Optional[int] = None
182
197
  is_flow_output: Optional[bool] = False
183
198
 
184
- @classmethod
185
- def overridden_hash(cls):
186
- if getattr(cls, '__hash__'):
187
- return BaseModel.__hash__ is not getattr(cls, '__hash__')
188
- return False
189
-
190
199
 
191
200
  class NodeSingleInput(NodeBase):
201
+ """A base model for any node that takes a single data input."""
192
202
  depending_on_id: Optional[int] = -1
193
203
 
194
204
 
195
205
  class NodeMultiInput(NodeBase):
206
+ """A base model for any node that takes multiple data inputs."""
196
207
  depending_on_ids: Optional[List[int]] = [-1]
197
208
 
198
209
 
199
210
  class NodeSelect(NodeSingleInput):
211
+ """Settings for a node that selects, renames, and reorders columns."""
200
212
  keep_missing: bool = True
201
213
  select_input: List[transform_schema.SelectInput] = Field(default_factory=list)
202
214
  sorted_by: Optional[Literal['none', 'asc', 'desc']] = 'none'
203
215
 
204
216
 
205
217
  class NodeFilter(NodeSingleInput):
218
+ """Settings for a node that filters rows based on a condition."""
206
219
  filter_input: transform_schema.FilterInput
207
220
 
208
221
 
209
222
  class NodeSort(NodeSingleInput):
223
+ """Settings for a node that sorts the data by one or more columns."""
210
224
  sort_input: List[transform_schema.SortByInput] = Field(default_factory=list)
211
225
 
212
226
 
213
227
  class NodeTextToRows(NodeSingleInput):
228
+ """Settings for a node that splits a text column into multiple rows."""
214
229
  text_to_rows_input: transform_schema.TextToRowsInput
215
230
 
216
231
 
217
232
  class NodeSample(NodeSingleInput):
233
+ """Settings for a node that samples a subset of the data."""
218
234
  sample_size: int = 1000
219
235
 
220
236
 
221
237
  class NodeRecordId(NodeSingleInput):
238
+ """Settings for a node that adds a unique record ID column."""
222
239
  record_id_input: transform_schema.RecordIdInput
223
240
 
224
241
 
225
242
  class NodeJoin(NodeMultiInput):
243
+ """Settings for a node that performs a standard SQL-style join."""
226
244
  auto_generate_selection: bool = True
227
245
  verify_integrity: bool = True
228
246
  join_input: transform_schema.JoinInput
@@ -232,6 +250,7 @@ class NodeJoin(NodeMultiInput):
232
250
 
233
251
 
234
252
  class NodeCrossJoin(NodeMultiInput):
253
+ """Settings for a node that performs a cross join."""
235
254
  auto_generate_selection: bool = True
236
255
  verify_integrity: bool = True
237
256
  cross_join_input: transform_schema.CrossJoinInput
@@ -241,31 +260,52 @@ class NodeCrossJoin(NodeMultiInput):
241
260
 
242
261
 
243
262
  class NodeFuzzyMatch(NodeJoin):
263
+ """Settings for a node that performs a fuzzy join based on string similarity."""
244
264
  join_input: transform_schema.FuzzyMatchInput
245
265
 
246
266
 
247
267
  class NodeDatasource(NodeBase):
268
+ """Base settings for a node that acts as a data source."""
248
269
  file_ref: str = None
249
270
 
250
271
 
251
272
  class RawData(BaseModel):
273
+ """Represents data in a raw, columnar format for manual input."""
252
274
  columns: List[MinimalFieldInfo] = None
253
- data: List[List] # List of list where each inner list is a column of data. This ensures more efficient storage
275
+ data: List[List]
276
+
277
+ @classmethod
278
+ def from_pylist(cls, pylist: List[dict]):
279
+ """Creates a RawData object from a list of Python dictionaries."""
280
+ if len(pylist) == 0:
281
+ return cls(columns=[], data=[])
282
+ pylist = ensure_similarity_dicts(pylist)
283
+ values = [standardize_col_dtype([vv for vv in c]) for c in
284
+ zip(*(r.values() for r in pylist))]
285
+ data_types = (pl.DataType.from_python(type(next((v for v in column_values), None))) for column_values in values)
286
+ columns = [MinimalFieldInfo(name=c, data_type=str(next(data_types))) for c in pylist[0].keys()]
287
+ return cls(columns=columns, data=values)
288
+
289
+ def to_pylist(self) -> List[dict]:
290
+ """Converts the RawData object back into a list of Python dictionaries."""
291
+ return [{c.name: self.data[ci][ri] for ci, c in enumerate(self.columns)} for ri in range(len(self.data[0]))]
254
292
 
255
293
 
256
294
  class NodeManualInput(NodeBase):
257
- raw_data: Optional[List] = None
295
+ """Settings for a node that allows direct data entry in the UI."""
258
296
  raw_data_format: Optional[RawData] = None
259
297
 
260
298
 
261
299
  class NodeRead(NodeBase):
300
+ """Settings for a node that reads data from a file."""
262
301
  received_file: ReceivedTable
263
302
 
264
303
 
265
304
  class DatabaseConnection(BaseModel):
266
- database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
305
+ """Defines the connection parameters for a database."""
306
+ database_type: str = "postgresql"
267
307
  username: Optional[str] = None
268
- password_ref: Optional[str] = None
308
+ password_ref: Optional[SecretRef] = None
269
309
  host: Optional[str] = None
270
310
  port: Optional[int] = None
271
311
  database: Optional[str] = None
@@ -273,8 +313,9 @@ class DatabaseConnection(BaseModel):
273
313
 
274
314
 
275
315
  class FullDatabaseConnection(BaseModel):
316
+ """A complete database connection model including the secret password."""
276
317
  connection_name: str
277
- database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
318
+ database_type: str = "postgresql"
278
319
  username: str
279
320
  password: SecretStr
280
321
  host: Optional[str] = None
@@ -285,8 +326,9 @@ class FullDatabaseConnection(BaseModel):
285
326
 
286
327
 
287
328
  class FullDatabaseConnectionInterface(BaseModel):
329
+ """A database connection model intended for UI display, omitting the password."""
288
330
  connection_name: str
289
- database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
331
+ database_type: str = "postgresql"
290
332
  username: str
291
333
  host: Optional[str] = None
292
334
  port: Optional[int] = None
@@ -296,6 +338,7 @@ class FullDatabaseConnectionInterface(BaseModel):
296
338
 
297
339
 
298
340
  class DatabaseSettings(BaseModel):
341
+ """Defines settings for reading from a database, either via table or query."""
299
342
  connection_mode: Optional[Literal['inline', 'reference']] = 'inline'
300
343
  database_connection: Optional[DatabaseConnection] = None
301
344
  database_connection_name: Optional[str] = None
@@ -321,6 +364,7 @@ class DatabaseSettings(BaseModel):
321
364
 
322
365
 
323
366
  class DatabaseWriteSettings(BaseModel):
367
+ """Defines settings for writing data to a database table."""
324
368
  connection_mode: Optional[Literal['inline', 'reference']] = 'inline'
325
369
  database_connection: Optional[DatabaseConnection] = None
326
370
  database_connection_name: Optional[str] = None
@@ -330,140 +374,146 @@ class DatabaseWriteSettings(BaseModel):
330
374
 
331
375
 
332
376
  class NodeDatabaseReader(NodeBase):
377
+ """Settings for a node that reads from a database."""
333
378
  database_settings: DatabaseSettings
334
379
  fields: Optional[List[MinimalFieldInfo]] = None
335
380
 
336
381
 
337
382
  class NodeDatabaseWriter(NodeSingleInput):
383
+ """Settings for a node that writes data to a database."""
338
384
  database_write_settings: DatabaseWriteSettings
339
385
 
340
386
 
387
+ class NodeCloudStorageReader(NodeBase):
388
+ """Settings for a node that reads from a cloud storage service (S3, GCS, etc.)."""
389
+ cloud_storage_settings: CloudStorageReadSettings
390
+ fields: Optional[List[MinimalFieldInfo]] = None
391
+
392
+
393
+ class NodeCloudStorageWriter(NodeSingleInput):
394
+ """Settings for a node that writes to a cloud storage service."""
395
+ cloud_storage_settings: CloudStorageWriteSettings
396
+
397
+
341
398
  class ExternalSource(BaseModel):
399
+ """Base model for data coming from a predefined external source."""
342
400
  orientation: str = 'row'
343
401
  fields: Optional[List[MinimalFieldInfo]] = None
344
402
 
345
403
 
346
404
  class SampleUsers(ExternalSource):
405
+ """Settings for generating a sample dataset of users."""
347
406
  SAMPLE_USERS: bool
348
407
  class_name: str = "sample_users"
349
408
  size: int = 100
350
409
 
351
410
 
352
- class AirbyteReader(AirbyteConfig):
353
- class_name: Optional[str] = "airbyte_reader"
354
- fields: Optional[List[MinimalFieldInfo]] = None
355
-
356
-
357
- class AccessToken(BaseModel):
358
- user_id: str
359
- access_token: SecretStr = None
360
-
361
-
362
411
  class NodeExternalSource(NodeBase):
412
+ """Settings for a node that connects to a registered external data source."""
363
413
  identifier: str
364
414
  source_settings: SampleUsers
365
415
 
366
416
 
367
- class NodeAirbyteReader(NodeExternalSource):
368
- identifier: str = 'airbyte'
369
- source_settings: AirbyteReader
370
-
371
-
372
417
  class NodeFormula(NodeSingleInput):
418
+ """Settings for a node that applies a formula to create/modify a column."""
373
419
  function: transform_schema.FunctionInput = None
374
420
 
375
421
 
376
422
  class NodeGroupBy(NodeSingleInput):
423
+ """Settings for a node that performs a group-by and aggregation operation."""
377
424
  groupby_input: transform_schema.GroupByInput = None
378
425
 
379
426
 
380
427
  class NodePromise(NodeBase):
428
+ """A placeholder node for an operation that has not yet been configured."""
381
429
  is_setup: bool = False
382
430
  node_type: str
383
431
 
384
432
 
385
433
  class NodeInputConnection(BaseModel):
434
+ """Represents the input side of a connection between two nodes."""
386
435
  node_id: int
387
436
  connection_class: InputConnectionClass
388
437
 
389
438
  def get_node_input_connection_type(self) -> Literal['main', 'right', 'left']:
439
+ """Determines the semantic type of the input (e.g., for a join)."""
390
440
  match self.connection_class:
391
- case 'input-0':
392
- return 'main'
393
- case 'input-1':
394
- return 'right'
395
- case 'input-2':
396
- return 'left'
397
- case _:
398
- raise ValueError(f"Unexpected connection_class: {self.connection_class}")
441
+ case 'input-0': return 'main'
442
+ case 'input-1': return 'right'
443
+ case 'input-2': return 'left'
444
+ case _: raise ValueError(f"Unexpected connection_class: {self.connection_class}")
399
445
 
400
446
 
401
447
  class NodePivot(NodeSingleInput):
448
+ """Settings for a node that pivots data from a long to a wide format."""
402
449
  pivot_input: transform_schema.PivotInput = None
403
450
  output_fields: Optional[List[MinimalFieldInfo]] = None
404
451
 
405
452
 
406
453
  class NodeUnpivot(NodeSingleInput):
454
+ """Settings for a node that unpivots data from a wide to a long format."""
407
455
  unpivot_input: transform_schema.UnpivotInput = None
408
456
 
409
457
 
410
458
  class NodeUnion(NodeMultiInput):
459
+ """Settings for a node that concatenates multiple data inputs."""
411
460
  union_input: transform_schema.UnionInput = Field(default_factory=transform_schema.UnionInput)
412
461
 
413
462
 
414
463
  class NodeOutput(NodeSingleInput):
464
+ """Settings for a node that writes its input to a file."""
415
465
  output_settings: OutputSettings
416
466
 
417
467
 
418
468
  class NodeOutputConnection(BaseModel):
469
+ """Represents the output side of a connection between two nodes."""
419
470
  node_id: int
420
471
  connection_class: OutputConnectionClass
421
472
 
422
473
 
423
474
  class NodeConnection(BaseModel):
475
+ """Represents a connection (edge) between two nodes in the graph."""
424
476
  input_connection: NodeInputConnection
425
477
  output_connection: NodeOutputConnection
426
478
 
427
479
  @classmethod
428
480
  def create_from_simple_input(cls, from_id: int, to_id: int, input_type: InputType = "input-0"):
429
-
481
+ """Creates a standard connection between two nodes."""
430
482
  match input_type:
431
- case "main":
432
- connection_class: InputConnectionClass = "input-0"
433
- case "right":
434
- connection_class: InputConnectionClass = "input-1"
435
- case "left":
436
- connection_class: InputConnectionClass = "input-2"
437
- case _:
438
- connection_class: InputConnectionClass = "input-0"
483
+ case "main": connection_class: InputConnectionClass = "input-0"
484
+ case "right": connection_class: InputConnectionClass = "input-1"
485
+ case "left": connection_class: InputConnectionClass = "input-2"
486
+ case _: connection_class: InputConnectionClass = "input-0"
439
487
  node_input = NodeInputConnection(node_id=to_id, connection_class=connection_class)
440
488
  node_output = NodeOutputConnection(node_id=from_id, connection_class='output-0')
441
489
  return cls(input_connection=node_input, output_connection=node_output)
442
490
 
443
491
 
444
492
  class NodeDescription(BaseModel):
493
+ """A simple model for updating a node's description text."""
445
494
  description: str = ''
446
495
 
447
496
 
448
497
  class NodeExploreData(NodeBase):
498
+ """Settings for a node that provides an interactive data exploration interface."""
449
499
  graphic_walker_input: Optional[gs_schemas.GraphicWalkerInput] = None
450
- _hash_overrule: int = 0
451
-
452
- def __hash__(self):
453
- return 0
454
500
 
455
501
 
456
502
  class NodeGraphSolver(NodeSingleInput):
503
+ """Settings for a node that solves graph-based problems (e.g., connected components)."""
457
504
  graph_solver_input: transform_schema.GraphSolverInput
458
505
 
459
506
 
460
507
  class NodeUnique(NodeSingleInput):
508
+ """Settings for a node that returns the unique rows from the data."""
461
509
  unique_input: transform_schema.UniqueInput
462
510
 
463
511
 
464
512
  class NodeRecordCount(NodeSingleInput):
513
+ """Settings for a node that counts the number of records."""
465
514
  pass
466
515
 
467
516
 
468
517
  class NodePolarsCode(NodeMultiInput):
518
+ """Settings for a node that executes arbitrary user-provided Polars code."""
469
519
  polars_code_input: transform_schema.PolarsCodeInput
@@ -5,8 +5,9 @@ import time
5
5
 
6
6
 
7
7
  class NodeResult(BaseModel):
8
+ """Represents the execution result of a single node in a FlowGraph run."""
8
9
  node_id: int
9
- node_name: str = None
10
+ node_name: Optional[str] = None
10
11
  start_timestamp: float = Field(default_factory=time.time)
11
12
  end_timestamp: float = 0
12
13
  success: Optional[bool] = None
@@ -16,6 +17,7 @@ class NodeResult(BaseModel):
16
17
 
17
18
 
18
19
  class RunInformation(BaseModel):
20
+ """Contains summary information about a complete FlowGraph execution."""
19
21
  flow_id: int
20
22
  start_time: Optional[datetime] = Field(default_factory=datetime.now)
21
23
  end_time: Optional[datetime] = None
@@ -26,6 +28,7 @@ class RunInformation(BaseModel):
26
28
 
27
29
 
28
30
  class BaseItem(BaseModel):
31
+ """A base model for any item in a file system, like a file or directory."""
29
32
  name: str
30
33
  path: str
31
34
  size: Optional[int] = None
@@ -37,6 +40,7 @@ class BaseItem(BaseModel):
37
40
 
38
41
 
39
42
  class FileColumn(BaseModel):
43
+ """Represents detailed schema and statistics for a single column (field)."""
40
44
  name: str
41
45
  data_type: str
42
46
  is_unique: bool
@@ -49,6 +53,7 @@ class FileColumn(BaseModel):
49
53
 
50
54
 
51
55
  class TableExample(BaseModel):
56
+ """Represents a preview of a table, including schema and sample data."""
52
57
  node_id: int
53
58
  number_of_records: int
54
59
  number_of_columns: int
@@ -59,6 +64,10 @@ class TableExample(BaseModel):
59
64
 
60
65
 
61
66
  class NodeData(BaseModel):
67
+ """A comprehensive model holding the complete state and data for a single node.
68
+
69
+ This includes its input/output data previews, settings, and run status.
70
+ """
62
71
  flow_id: int
63
72
  node_id: int
64
73
  flow_type: str
@@ -74,19 +83,23 @@ class NodeData(BaseModel):
74
83
 
75
84
 
76
85
  class OutputFile(BaseItem):
86
+ """Represents a single file in an output directory, extending BaseItem."""
77
87
  ext: Optional[str] = None
78
88
  mimetype: Optional[str] = None
79
89
 
80
90
 
81
91
  class OutputFiles(BaseItem):
92
+ """Represents a collection of files, typically within a directory."""
82
93
  files: List[OutputFile] = Field(default_factory=list)
83
94
 
84
95
 
85
96
  class OutputTree(OutputFiles):
97
+ """Represents a directory tree, including subdirectories."""
86
98
  directories: List[OutputFiles] = Field(default_factory=list)
87
99
 
88
100
 
89
101
  class ItemInfo(OutputFile):
102
+ """Provides detailed information about a single item in an output directory."""
90
103
  id: int = -1
91
104
  type: str
92
105
  analysis_file_available: bool = False
@@ -95,21 +108,24 @@ class ItemInfo(OutputFile):
95
108
 
96
109
 
97
110
  class OutputDir(BaseItem):
111
+ """Represents the contents of a single output directory."""
98
112
  all_items: List[str]
99
113
  items: List[ItemInfo]
100
114
 
101
115
 
102
116
  class ExpressionRef(BaseModel):
117
+ """A reference to a single Polars expression, including its name and docstring."""
103
118
  name: str
104
119
  doc: Optional[str]
105
120
 
106
121
 
107
122
  class ExpressionsOverview(BaseModel):
123
+ """Represents a categorized list of available Polars expressions."""
108
124
  expression_type: str
109
125
  expressions: List[ExpressionRef]
110
126
 
111
127
 
112
128
  class InstantFuncResult(BaseModel):
129
+ """Represents the result of a function that is expected to execute instantly."""
113
130
  success: Optional[bool] = None
114
- result: str
115
-
131
+ result: str