Flowfile 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (171) hide show
  1. build_backends/__init__.py +0 -0
  2. build_backends/main.py +313 -0
  3. build_backends/main_prd.py +202 -0
  4. flowfile/__init__.py +71 -0
  5. flowfile/__main__.py +24 -0
  6. flowfile-0.2.2.dist-info/LICENSE +21 -0
  7. flowfile-0.2.2.dist-info/METADATA +225 -0
  8. flowfile-0.2.2.dist-info/RECORD +171 -0
  9. flowfile-0.2.2.dist-info/WHEEL +4 -0
  10. flowfile-0.2.2.dist-info/entry_points.txt +9 -0
  11. flowfile_core/__init__.py +13 -0
  12. flowfile_core/auth/__init__.py +0 -0
  13. flowfile_core/auth/jwt.py +140 -0
  14. flowfile_core/auth/models.py +40 -0
  15. flowfile_core/auth/secrets.py +178 -0
  16. flowfile_core/configs/__init__.py +35 -0
  17. flowfile_core/configs/flow_logger.py +433 -0
  18. flowfile_core/configs/node_store/__init__.py +0 -0
  19. flowfile_core/configs/node_store/nodes.py +98 -0
  20. flowfile_core/configs/settings.py +120 -0
  21. flowfile_core/database/__init__.py +0 -0
  22. flowfile_core/database/connection.py +51 -0
  23. flowfile_core/database/init_db.py +45 -0
  24. flowfile_core/database/models.py +41 -0
  25. flowfile_core/fileExplorer/__init__.py +0 -0
  26. flowfile_core/fileExplorer/funcs.py +259 -0
  27. flowfile_core/fileExplorer/utils.py +53 -0
  28. flowfile_core/flowfile/FlowfileFlow.py +1403 -0
  29. flowfile_core/flowfile/__init__.py +0 -0
  30. flowfile_core/flowfile/_extensions/__init__.py +0 -0
  31. flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
  32. flowfile_core/flowfile/analytics/__init__.py +0 -0
  33. flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
  34. flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
  35. flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
  36. flowfile_core/flowfile/analytics/utils.py +9 -0
  37. flowfile_core/flowfile/connection_manager/__init__.py +3 -0
  38. flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
  39. flowfile_core/flowfile/connection_manager/models.py +10 -0
  40. flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
  41. flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
  42. flowfile_core/flowfile/database_connection_manager/models.py +15 -0
  43. flowfile_core/flowfile/extensions.py +36 -0
  44. flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
  45. flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
  46. flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
  47. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
  48. flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
  49. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
  50. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
  51. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
  52. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
  53. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
  54. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
  55. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
  56. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
  57. flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
  58. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
  59. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
  60. flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
  61. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
  62. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
  63. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
  64. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
  65. flowfile_core/flowfile/flow_data_engine/types.py +0 -0
  66. flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
  67. flowfile_core/flowfile/flow_node/__init__.py +0 -0
  68. flowfile_core/flowfile/flow_node/flow_node.py +771 -0
  69. flowfile_core/flowfile/flow_node/models.py +111 -0
  70. flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
  71. flowfile_core/flowfile/handler.py +123 -0
  72. flowfile_core/flowfile/manage/__init__.py +0 -0
  73. flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
  74. flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
  75. flowfile_core/flowfile/manage/open_flowfile.py +136 -0
  76. flowfile_core/flowfile/setting_generator/__init__.py +2 -0
  77. flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
  78. flowfile_core/flowfile/setting_generator/settings.py +176 -0
  79. flowfile_core/flowfile/sources/__init__.py +0 -0
  80. flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
  81. flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
  82. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
  83. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
  84. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
  85. flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
  86. flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
  87. flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
  88. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
  89. flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
  90. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
  91. flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
  92. flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
  93. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
  94. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
  95. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
  96. flowfile_core/flowfile/util/__init__.py +0 -0
  97. flowfile_core/flowfile/util/calculate_layout.py +137 -0
  98. flowfile_core/flowfile/util/execution_orderer.py +141 -0
  99. flowfile_core/flowfile/utils.py +106 -0
  100. flowfile_core/main.py +138 -0
  101. flowfile_core/routes/__init__.py +0 -0
  102. flowfile_core/routes/auth.py +34 -0
  103. flowfile_core/routes/logs.py +163 -0
  104. flowfile_core/routes/public.py +10 -0
  105. flowfile_core/routes/routes.py +601 -0
  106. flowfile_core/routes/secrets.py +85 -0
  107. flowfile_core/run_lock.py +11 -0
  108. flowfile_core/schemas/__init__.py +0 -0
  109. flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
  110. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
  111. flowfile_core/schemas/defaults.py +9 -0
  112. flowfile_core/schemas/external_sources/__init__.py +0 -0
  113. flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
  114. flowfile_core/schemas/input_schema.py +477 -0
  115. flowfile_core/schemas/models.py +193 -0
  116. flowfile_core/schemas/output_model.py +115 -0
  117. flowfile_core/schemas/schemas.py +106 -0
  118. flowfile_core/schemas/transform_schema.py +569 -0
  119. flowfile_core/secrets/__init__.py +0 -0
  120. flowfile_core/secrets/secrets.py +64 -0
  121. flowfile_core/utils/__init__.py +0 -0
  122. flowfile_core/utils/arrow_reader.py +247 -0
  123. flowfile_core/utils/excel_file_manager.py +18 -0
  124. flowfile_core/utils/fileManager.py +45 -0
  125. flowfile_core/utils/fl_executor.py +38 -0
  126. flowfile_core/utils/utils.py +8 -0
  127. flowfile_frame/__init__.py +56 -0
  128. flowfile_frame/__main__.py +12 -0
  129. flowfile_frame/adapters.py +17 -0
  130. flowfile_frame/expr.py +1163 -0
  131. flowfile_frame/flow_frame.py +2093 -0
  132. flowfile_frame/group_frame.py +199 -0
  133. flowfile_frame/join.py +75 -0
  134. flowfile_frame/selectors.py +242 -0
  135. flowfile_frame/utils.py +184 -0
  136. flowfile_worker/__init__.py +55 -0
  137. flowfile_worker/configs.py +95 -0
  138. flowfile_worker/create/__init__.py +37 -0
  139. flowfile_worker/create/funcs.py +146 -0
  140. flowfile_worker/create/models.py +86 -0
  141. flowfile_worker/create/pl_types.py +35 -0
  142. flowfile_worker/create/read_excel_tables.py +110 -0
  143. flowfile_worker/create/utils.py +84 -0
  144. flowfile_worker/external_sources/__init__.py +0 -0
  145. flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
  146. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
  147. flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
  148. flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
  149. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  150. flowfile_worker/external_sources/sql_source/__init__.py +0 -0
  151. flowfile_worker/external_sources/sql_source/main.py +56 -0
  152. flowfile_worker/external_sources/sql_source/models.py +72 -0
  153. flowfile_worker/flow_logger.py +58 -0
  154. flowfile_worker/funcs.py +327 -0
  155. flowfile_worker/main.py +108 -0
  156. flowfile_worker/models.py +95 -0
  157. flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
  158. flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
  159. flowfile_worker/polars_fuzzy_match/models.py +36 -0
  160. flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
  161. flowfile_worker/polars_fuzzy_match/process.py +86 -0
  162. flowfile_worker/polars_fuzzy_match/utils.py +50 -0
  163. flowfile_worker/process_manager.py +36 -0
  164. flowfile_worker/routes.py +440 -0
  165. flowfile_worker/secrets.py +148 -0
  166. flowfile_worker/spawner.py +187 -0
  167. flowfile_worker/utils.py +25 -0
  168. test_utils/__init__.py +3 -0
  169. test_utils/postgres/__init__.py +1 -0
  170. test_utils/postgres/commands.py +109 -0
  171. test_utils/postgres/fixtures.py +417 -0
File without changes
@@ -0,0 +1,118 @@
1
+ from typing import Any, Dict, List, Optional, Literal
2
+ from pydantic import BaseModel, Field, model_validator
3
+
4
+
5
+ class GeoRole(BaseModel):
6
+ # Placeholder for geo role specifics
7
+ role_type: str # Example attribute
8
+
9
+
10
+ class Expression(BaseModel):
11
+ # Placeholder for expression specifics
12
+ expression: str # Example attribute
13
+
14
+
15
+ AnalyticTypeLit = Literal['measure', 'dimension']
16
+
17
+
18
+ class IField(BaseModel):
19
+ fid: str
20
+ name: str
21
+ basename: Optional[str] = None
22
+ semanticType: str
23
+ analyticType: AnalyticTypeLit
24
+ cmp: Optional[str] = None
25
+ geoRole: Optional[GeoRole] = None
26
+ computed: Optional[bool] = None
27
+ expression: Optional[str] = None
28
+ timeUnit: Optional[str] = None
29
+ path: Optional[List[str]] = None
30
+ offset: Optional[int] = None
31
+ aggName: Optional[str] = None
32
+ aggregated: Optional[bool] = None
33
+
34
+ @model_validator(mode='after')
35
+ def set_default_aggname(self):
36
+ if self.aggName is None and self.analyticType == 'measure':
37
+ self.aggName = "sum"
38
+ return self
39
+
40
+ def model_dump_dict(self):
41
+ d = self.model_dump(exclude_none=True)
42
+ d['offset'] = None
43
+ return d
44
+
45
+
46
+ class ViewField(IField):
47
+ sort: Optional[str] = None
48
+
49
+
50
+ class FilterField(ViewField):
51
+ rule: Any
52
+ enableAgg: Optional[bool] = False
53
+
54
+
55
+ class DraggableFieldState(BaseModel):
56
+ dimensions: List[ViewField]
57
+ measures: List[ViewField]
58
+ rows: List[ViewField]
59
+ columns: List[ViewField]
60
+ color: List[ViewField]
61
+ opacity: List[ViewField]
62
+ size: List[ViewField]
63
+ shape: List[ViewField]
64
+ theta: List[ViewField]
65
+ radius: List[ViewField]
66
+ longitude: List[ViewField]
67
+ latitude: List[ViewField]
68
+ geoId: List[ViewField]
69
+ details: List[ViewField]
70
+ filters: List[FilterField]
71
+ text: List[ViewField]
72
+
73
+
74
+ class ConfigScale(BaseModel):
75
+ rangeMax: Optional[int]
76
+ rangeMin: Optional[int]
77
+ domainMin: Optional[int]
78
+ domainMax: Optional[int]
79
+
80
+
81
+ class MutField(BaseModel):
82
+ fid: str
83
+ key: Optional[str] = None
84
+ name: Optional[str] = None
85
+ basename: Optional[str] = None
86
+ disable: Optional[bool] = False
87
+ semanticType: str
88
+ analyticType: AnalyticTypeLit
89
+ path: Optional[List[str]] = None
90
+ offset: Optional[int] = None
91
+
92
+
93
+ class DataModel(BaseModel):
94
+ data: List[Dict[str, Any]]
95
+ fields: List[MutField]
96
+
97
+
98
+ class IVisualConfigNew (BaseModel):
99
+ defaultAggregated: bool
100
+ geoms: List[str]
101
+ coordSystem: Optional[str]
102
+ limit: int = None
103
+ folds: Optional[List[str]] = []
104
+ timezoneDisplayOffset: Optional[int] = None
105
+
106
+
107
+ class Chart(BaseModel):
108
+ visId: str
109
+ name: Optional[str]
110
+ encodings: DraggableFieldState
111
+ config: IVisualConfigNew
112
+
113
+
114
+ class GraphicWalkerInput (BaseModel):
115
+ dataModel: DataModel = Field(default_factory=lambda: DataModel(data=[], fields=[]))
116
+ is_initial: bool = True
117
+ specList: Optional[List[Any]] = None
118
+
@@ -0,0 +1,9 @@
1
+ from schemas import transform_schema
2
+ from pydantic import Field, BaseModel
3
+
4
+
5
+ default_union_input = transform_schema.UnionInput
6
+
7
+
8
+ class F(BaseModel):
9
+ f: transform_schema.UnionInput = Field(default_factory=default_union_input)
File without changes
@@ -0,0 +1,20 @@
1
+ from typing import TypeAlias, Optional, Dict, Any, Literal
2
+ from pydantic import BaseModel, Field
3
+
4
+
5
+ config_options: TypeAlias = Literal["in_line", "key_vault"]
6
+
7
+
8
+ class AirbyteConfig(BaseModel):
9
+ source_name: str
10
+ selected_stream: Optional[str] = None
11
+ config_mode: config_options = "in_line"
12
+ mapped_config_spec: Optional[Dict[str, Any]] = Field(default_factory=dict)
13
+ parsed_config: Optional[Any] = None
14
+ connection_name: Optional[str] = None
15
+ version: Optional[str] = None
16
+
17
+ @property
18
+ def full_source_name(self) -> str:
19
+ return f"source-{self.source_name}"
20
+
@@ -0,0 +1,477 @@
1
+ from typing import List, Optional, Literal
2
+ from flowfile_core.schemas import transform_schema
3
+ from pathlib import Path
4
+ import os
5
+ from flowfile_core.schemas.analysis_schemas import graphic_walker_schemas as gs_schemas
6
+ from flowfile_core.schemas.external_sources.airbyte_schemas import AirbyteConfig
7
+ from pydantic import BaseModel, Field, model_validator, SecretStr, ConfigDict
8
+
9
+
10
+ OutputConnectionClass = Literal['output-0', 'output-1', 'output-2', 'output-3', 'output-4',
11
+ 'output-5', 'output-6', 'output-7', 'output-8', 'output-9']
12
+
13
+ InputConnectionClass = Literal['input-0', 'input-1', 'input-2', 'input-3', 'input-4',
14
+ 'input-5', 'input-6', 'input-7', 'input-8', 'input-9']
15
+
16
+ InputType = Literal["main", "left", "right"]
17
+
18
+ class NewDirectory(BaseModel):
19
+ source_path: str
20
+ dir_name: str
21
+
22
+
23
+ class RemoveItem(BaseModel):
24
+ path: str
25
+ id: int = -1
26
+
27
+
28
+ class RemoveItemsInput(BaseModel):
29
+ paths: List[RemoveItem]
30
+ source_path: str
31
+
32
+
33
+ class MinimalFieldInfo(BaseModel):
34
+ name: str
35
+ data_type: str
36
+
37
+
38
+ class ReceivedTableBase(BaseModel):
39
+ id: Optional[int] = None
40
+ name: Optional[str]
41
+ path: str # This can be an absolute or relative path
42
+ directory: Optional[str] = None
43
+ analysis_file_available: bool = False
44
+ status: Optional[str] = None
45
+ file_type: Optional[str] = None
46
+ fields: List[MinimalFieldInfo] = Field(default_factory=list)
47
+ abs_file_path: Optional[str] = None
48
+
49
+ @classmethod
50
+ def create_from_path(cls, path: str):
51
+ filename = Path(path).name
52
+ # Create an instance of ReceivedTableBase with the extracted filename and path
53
+ return cls(name=filename, path=path)
54
+
55
+ @property
56
+ def file_path(self) -> str:
57
+ if not self.name in self.path:
58
+ return os.path.join(self.path, self.name)
59
+ else:
60
+ return self.path
61
+
62
+ def set_absolute_filepath(self):
63
+ base_path = Path(self.path)
64
+ # Check if the path is relative, resolve it with the current working directory
65
+ if not base_path.is_absolute():
66
+ base_path = Path.cwd() / base_path
67
+
68
+ if self.name and self.name not in base_path.name:
69
+ base_path = base_path / self.name
70
+
71
+ self.abs_file_path = str(base_path.resolve())
72
+
73
+ @model_validator(mode='after')
74
+ def populate_abs_file_path(self):
75
+ if not self.abs_file_path:
76
+ self.set_absolute_filepath()
77
+ return self
78
+
79
+
80
+ class ReceivedCsvTable(ReceivedTableBase):
81
+ file_type: str = 'csv'
82
+ reference: str = ''
83
+ starting_from_line: int = 0
84
+ delimiter: str = ','
85
+ has_headers: bool = True
86
+ encoding: Optional[str] = 'utf-8'
87
+ parquet_ref: Optional[str] = None
88
+ row_delimiter: str = '\n'
89
+ quote_char: str = '"'
90
+ infer_schema_length: int = 10_000
91
+ truncate_ragged_lines: bool = False
92
+ ignore_errors: bool = False
93
+
94
+
95
+ class ReceivedJsonTable(ReceivedCsvTable):
96
+ pass
97
+
98
+
99
+ class ReceivedParquetTable(BaseModel):
100
+ file_type: str = 'parquet'
101
+
102
+
103
+ class ReceivedExcelTable(ReceivedTableBase):
104
+ sheet_name: Optional[str] = None
105
+ start_row: int = 0 # optional
106
+ start_column: int = 0 # optional
107
+ end_row: int = 0 # optional
108
+ end_column: int = 0 # optional
109
+ has_headers: bool = True # optional
110
+ type_inference: bool = False # optional
111
+
112
+ def validate_range_values(self):
113
+ # Validate that start and end rows/columns are non-negative integers
114
+ for attribute in [self.start_row, self.start_column, self.end_row, self.end_column]:
115
+ if not isinstance(attribute, int) or attribute < 0:
116
+ raise ValueError("Row and column indices must be non-negative integers")
117
+
118
+ # Validate that start is before end if end is specified (non-zero)
119
+ if (self.end_row > 0 and self.start_row > self.end_row) or \
120
+ (self.end_column > 0 and self.start_column > self.end_column):
121
+ raise ValueError("Start row/column must not be greater than end row/column if specified")
122
+
123
+
124
+ class ReceivedTable(ReceivedExcelTable, ReceivedCsvTable, ReceivedParquetTable):
125
+ ...
126
+
127
+
128
+ class OutputCsvTable(BaseModel):
129
+ file_type: str = 'csv'
130
+ delimiter: str = ','
131
+ encoding: str = 'utf-8'
132
+
133
+
134
+ class OutputParquetTable(BaseModel):
135
+ file_type: str = 'parquet'
136
+
137
+
138
+ class OutputExcelTable(BaseModel):
139
+ file_type: str = 'excel'
140
+ sheet_name: str = 'Sheet1'
141
+
142
+
143
+ class OutputSettings(BaseModel):
144
+ name: str
145
+ directory: str
146
+ file_type: str
147
+ fields: Optional[List[str]] = Field(default_factory=list)
148
+ write_mode: str = 'overwrite'
149
+ output_csv_table: OutputCsvTable
150
+ output_parquet_table: OutputParquetTable
151
+ output_excel_table: OutputExcelTable
152
+ abs_file_path: Optional[str] = None
153
+
154
+ def set_absolute_filepath(self):
155
+ base_path = Path(self.directory)
156
+ # Check if the path is relative, resolve it with the current working directory
157
+ if not base_path.is_absolute():
158
+ base_path = Path.cwd() / base_path
159
+
160
+ if self.name and self.name not in base_path.name:
161
+ base_path = base_path / self.name
162
+
163
+ self.abs_file_path = str(base_path.resolve())
164
+
165
+ @model_validator(mode='after')
166
+ def populate_abs_file_path(self):
167
+ if not self.abs_file_path:
168
+ self.set_absolute_filepath()
169
+ return self
170
+
171
+
172
+ class NodeBase(BaseModel):
173
+ model_config = ConfigDict(arbitrary_types_allowed=True)
174
+ flow_id: int
175
+ node_id: int
176
+ cache_results: Optional[bool] = False
177
+ pos_x: Optional[float] = 0
178
+ pos_y: Optional[float] = 0
179
+ is_setup: Optional[bool] = True
180
+ description: Optional[str] = ''
181
+ user_id: Optional[int] = None
182
+
183
+ @classmethod
184
+ def overridden_hash(cls):
185
+ if getattr(cls, '__hash__'):
186
+ return BaseModel.__hash__ is not getattr(cls, '__hash__')
187
+ return False
188
+
189
+
190
+ class NodeSingleInput(NodeBase):
191
+ depending_on_id: Optional[int] = -1
192
+
193
+
194
+ class NodeMultiInput(NodeBase):
195
+ depending_on_ids: Optional[List[int]] = [-1]
196
+
197
+
198
+ class NodeSelect(NodeSingleInput):
199
+ keep_missing: bool = True
200
+ select_input: List[transform_schema.SelectInput] = Field(default_factory=list)
201
+ sorted_by: Optional[Literal['none', 'asc', 'desc']] = 'none'
202
+
203
+
204
+ class NodeFilter(NodeSingleInput):
205
+ filter_input: transform_schema.FilterInput
206
+
207
+
208
+ class NodeSort(NodeSingleInput):
209
+ sort_input: List[transform_schema.SortByInput] = Field(default_factory=list)
210
+
211
+
212
+ class NodeTextToRows(NodeSingleInput):
213
+ text_to_rows_input: transform_schema.TextToRowsInput
214
+
215
+
216
+ class NodeSample(NodeSingleInput):
217
+ sample_size: int = 1000
218
+
219
+
220
+ class NodeRecordId(NodeSingleInput):
221
+ record_id_input: transform_schema.RecordIdInput
222
+
223
+
224
+ class NodeJoin(NodeMultiInput):
225
+ auto_generate_selection: bool = True
226
+ verify_integrity: bool = True
227
+ join_input: transform_schema.JoinInput
228
+ auto_keep_all: bool = True
229
+ auto_keep_right: bool = True
230
+ auto_keep_left: bool = True
231
+
232
+
233
+ class NodeCrossJoin(NodeMultiInput):
234
+ auto_generate_selection: bool = True
235
+ verify_integrity: bool = True
236
+ cross_join_input: transform_schema.CrossJoinInput
237
+ auto_keep_all: bool = True
238
+ auto_keep_right: bool = True
239
+ auto_keep_left: bool = True
240
+
241
+
242
+ class NodeFuzzyMatch(NodeJoin):
243
+ join_input: transform_schema.FuzzyMatchInput
244
+
245
+
246
+ class NodeDatasource(NodeBase):
247
+ file_ref: str = None
248
+
249
+
250
+ class NodeManualInput(NodeBase):
251
+ raw_data: List = None
252
+
253
+
254
+ class NodeRead(NodeBase):
255
+ received_file: ReceivedTable
256
+
257
+
258
+ class DatabaseConnection(BaseModel):
259
+ database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
260
+ username: Optional[str] = None
261
+ password_ref: Optional[str] = None
262
+ host: Optional[str] = None
263
+ port: Optional[int] = None
264
+ database: Optional[str] = None
265
+ url: Optional[str] = None
266
+
267
+
268
+ class FullDatabaseConnection(BaseModel):
269
+ connection_name: str
270
+ database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
271
+ username: str
272
+ password: SecretStr
273
+ host: Optional[str] = None
274
+ port: Optional[int] = None
275
+ database: Optional[str] = None
276
+ ssl_enabled: Optional[bool] = False
277
+ url: Optional[str] = None
278
+
279
+
280
+ class FullDatabaseConnectionInterface(BaseModel):
281
+ connection_name: str
282
+ database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
283
+ username: str
284
+ host: Optional[str] = None
285
+ port: Optional[int] = None
286
+ database: Optional[str] = None
287
+ ssl_enabled: Optional[bool] = False
288
+ url: Optional[str] = None
289
+
290
+
291
+ class DatabaseSettings(BaseModel):
292
+ connection_mode: Optional[Literal['inline', 'reference']] = 'inline'
293
+ database_connection: Optional[DatabaseConnection] = None
294
+ database_connection_name: Optional[str] = None
295
+ schema_name: Optional[str] = None
296
+ table_name: Optional[str] = None
297
+ query: Optional[str] = None
298
+ query_mode: Literal['query', 'table', 'reference'] = 'table'
299
+
300
+ @model_validator(mode='after')
301
+ def validate_table_or_query(self):
302
+ if (not self.table_name and not self.query) and self.query_mode == 'inline':
303
+ raise ValueError("Either 'table' or 'query' must be provided")
304
+ return self
305
+
306
+ @model_validator(mode='after')
307
+ def validate_table_or_query(self):
308
+ # Validate that either table_name or query is provided
309
+ if (not self.table_name and not self.query) and self.query_mode == 'inline':
310
+ raise ValueError("Either 'table_name' or 'query' must be provided")
311
+
312
+ # Validate correct connection information based on connection_mode
313
+ if self.connection_mode == 'inline' and self.database_connection is None:
314
+ raise ValueError("When 'connection_mode' is 'inline', 'database_connection' must be provided")
315
+
316
+ if self.connection_mode == 'reference' and not self.database_connection_name:
317
+ raise ValueError("When 'connection_mode' is 'reference', 'database_connection_name' must be provided")
318
+
319
+ return self
320
+
321
+
322
+ class DatabaseWriteSettings(BaseModel):
323
+ connection_mode: Optional[Literal['inline', 'reference']] = 'inline'
324
+ database_connection: Optional[DatabaseConnection] = None
325
+ database_connection_name: Optional[str] = None
326
+ table_name: str
327
+ schema_name: Optional[str] = None
328
+ if_exists: Optional[Literal['append', 'replace', 'fail']] = 'append'
329
+
330
+
331
+ class NodeDatabaseReader(NodeBase):
332
+ database_settings: DatabaseSettings
333
+ fields: Optional[List[MinimalFieldInfo]] = None
334
+
335
+
336
+ class NodeDatabaseWriter(NodeSingleInput):
337
+ database_write_settings: DatabaseWriteSettings
338
+
339
+
340
+ class ExternalSource(BaseModel):
341
+ orientation: str = 'row'
342
+ fields: Optional[List[MinimalFieldInfo]] = None
343
+
344
+
345
+ class SampleUsers(ExternalSource):
346
+ SAMPLE_USERS: bool
347
+ class_name: str = "sample_users"
348
+ size: int = 100
349
+
350
+
351
+ class GoogleSheet(ExternalSource):
352
+ GOOGLE_SHEET: bool
353
+ class_name: str = "google_sheet"
354
+ access_token: SecretStr = None
355
+ sheet_id: str
356
+ worksheet_name: str
357
+ sheet_name: str
358
+
359
+
360
+ class AirbyteReader(AirbyteConfig):
361
+ class_name: Optional[str] = "airbyte_reader"
362
+ fields: Optional[List[MinimalFieldInfo]] = None
363
+
364
+
365
+ class AccessToken(BaseModel):
366
+ user_id: str
367
+ access_token: SecretStr = None
368
+
369
+
370
+ class NodeExternalSource(NodeBase):
371
+ identifier: str
372
+ source_settings: SampleUsers | GoogleSheet
373
+
374
+
375
+ class NodeAirbyteReader(NodeExternalSource):
376
+ identifier: str = 'airbyte'
377
+ source_settings: AirbyteReader
378
+
379
+
380
+ class NodeFormula(NodeSingleInput):
381
+ function: transform_schema.FunctionInput = None
382
+
383
+
384
+ class NodeGroupBy(NodeSingleInput):
385
+ groupby_input: transform_schema.GroupByInput = None
386
+
387
+
388
+ class NodePromise(NodeBase):
389
+ is_setup: bool = False
390
+ node_type: str
391
+
392
+
393
+ class NodeInputConnection(BaseModel):
394
+ node_id: int
395
+ connection_class: InputConnectionClass
396
+
397
+ def get_node_input_connection_type(self) -> Literal['main', 'right', 'left']:
398
+ match self.connection_class:
399
+ case 'input-0':
400
+ return 'main'
401
+ case 'input-1':
402
+ return 'right'
403
+ case 'input-2':
404
+ return 'left'
405
+ case _:
406
+ raise ValueError(f"Unexpected connection_class: {self.connection_class}")
407
+
408
+
409
+ class NodePivot(NodeSingleInput):
410
+ pivot_input: transform_schema.PivotInput = None
411
+ output_fields: Optional[List[MinimalFieldInfo]] = None
412
+
413
+
414
+ class NodeUnpivot(NodeSingleInput):
415
+ unpivot_input: transform_schema.UnpivotInput = None
416
+
417
+
418
+ class NodeUnion(NodeMultiInput):
419
+ union_input: transform_schema.UnionInput = Field(default_factory=transform_schema.UnionInput)
420
+
421
+
422
+ class NodeOutput(NodeSingleInput):
423
+ output_settings: OutputSettings
424
+
425
+
426
+ class NodeOutputConnection(BaseModel):
427
+ node_id: int
428
+ connection_class: OutputConnectionClass
429
+
430
+
431
+ class NodeConnection(BaseModel):
432
+ input_connection: NodeInputConnection
433
+ output_connection: NodeOutputConnection
434
+
435
+ @classmethod
436
+ def create_from_simple_input(cls, from_id: int, to_id: int, input_type: InputType = "input-0"):
437
+
438
+ match input_type:
439
+ case "main":
440
+ connection_class: InputConnectionClass = "input-0"
441
+ case "right":
442
+ connection_class: InputConnectionClass = "input-1"
443
+ case "left":
444
+ connection_class: InputConnectionClass = "input-2"
445
+ case _:
446
+ connection_class: InputConnectionClass = "input-0"
447
+ node_input = NodeInputConnection(node_id=to_id, connection_class=connection_class)
448
+ node_output = NodeOutputConnection(node_id=from_id, connection_class='output-0')
449
+ return cls(input_connection=node_input, output_connection=node_output)
450
+
451
+
452
+ class NodeDescription(BaseModel):
453
+ description: str = ''
454
+
455
+
456
+ class NodeExploreData(NodeBase):
457
+ graphic_walker_input: Optional[gs_schemas.GraphicWalkerInput] = None
458
+ _hash_overrule: int = 0
459
+
460
+ def __hash__(self):
461
+ return 0
462
+
463
+
464
+ class NodeGraphSolver(NodeSingleInput):
465
+ graph_solver_input: transform_schema.GraphSolverInput
466
+
467
+
468
+ class NodeUnique(NodeSingleInput):
469
+ unique_input: transform_schema.UniqueInput
470
+
471
+
472
+ class NodeRecordCount(NodeSingleInput):
473
+ pass
474
+
475
+
476
+ class NodePolarsCode(NodeMultiInput):
477
+ polars_code_input: transform_schema.PolarsCodeInput