Flowfile 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (171) hide show
  1. build_backends/__init__.py +0 -0
  2. build_backends/main.py +313 -0
  3. build_backends/main_prd.py +202 -0
  4. flowfile/__init__.py +71 -0
  5. flowfile/__main__.py +24 -0
  6. flowfile-0.2.2.dist-info/LICENSE +21 -0
  7. flowfile-0.2.2.dist-info/METADATA +225 -0
  8. flowfile-0.2.2.dist-info/RECORD +171 -0
  9. flowfile-0.2.2.dist-info/WHEEL +4 -0
  10. flowfile-0.2.2.dist-info/entry_points.txt +9 -0
  11. flowfile_core/__init__.py +13 -0
  12. flowfile_core/auth/__init__.py +0 -0
  13. flowfile_core/auth/jwt.py +140 -0
  14. flowfile_core/auth/models.py +40 -0
  15. flowfile_core/auth/secrets.py +178 -0
  16. flowfile_core/configs/__init__.py +35 -0
  17. flowfile_core/configs/flow_logger.py +433 -0
  18. flowfile_core/configs/node_store/__init__.py +0 -0
  19. flowfile_core/configs/node_store/nodes.py +98 -0
  20. flowfile_core/configs/settings.py +120 -0
  21. flowfile_core/database/__init__.py +0 -0
  22. flowfile_core/database/connection.py +51 -0
  23. flowfile_core/database/init_db.py +45 -0
  24. flowfile_core/database/models.py +41 -0
  25. flowfile_core/fileExplorer/__init__.py +0 -0
  26. flowfile_core/fileExplorer/funcs.py +259 -0
  27. flowfile_core/fileExplorer/utils.py +53 -0
  28. flowfile_core/flowfile/FlowfileFlow.py +1403 -0
  29. flowfile_core/flowfile/__init__.py +0 -0
  30. flowfile_core/flowfile/_extensions/__init__.py +0 -0
  31. flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
  32. flowfile_core/flowfile/analytics/__init__.py +0 -0
  33. flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
  34. flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
  35. flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
  36. flowfile_core/flowfile/analytics/utils.py +9 -0
  37. flowfile_core/flowfile/connection_manager/__init__.py +3 -0
  38. flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
  39. flowfile_core/flowfile/connection_manager/models.py +10 -0
  40. flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
  41. flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
  42. flowfile_core/flowfile/database_connection_manager/models.py +15 -0
  43. flowfile_core/flowfile/extensions.py +36 -0
  44. flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
  45. flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
  46. flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
  47. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
  48. flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
  49. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
  50. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
  51. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
  52. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
  53. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
  54. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
  55. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
  56. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
  57. flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
  58. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
  59. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
  60. flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
  61. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
  62. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
  63. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
  64. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
  65. flowfile_core/flowfile/flow_data_engine/types.py +0 -0
  66. flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
  67. flowfile_core/flowfile/flow_node/__init__.py +0 -0
  68. flowfile_core/flowfile/flow_node/flow_node.py +771 -0
  69. flowfile_core/flowfile/flow_node/models.py +111 -0
  70. flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
  71. flowfile_core/flowfile/handler.py +123 -0
  72. flowfile_core/flowfile/manage/__init__.py +0 -0
  73. flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
  74. flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
  75. flowfile_core/flowfile/manage/open_flowfile.py +136 -0
  76. flowfile_core/flowfile/setting_generator/__init__.py +2 -0
  77. flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
  78. flowfile_core/flowfile/setting_generator/settings.py +176 -0
  79. flowfile_core/flowfile/sources/__init__.py +0 -0
  80. flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
  81. flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
  82. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
  83. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
  84. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
  85. flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
  86. flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
  87. flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
  88. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
  89. flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
  90. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
  91. flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
  92. flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
  93. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
  94. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
  95. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
  96. flowfile_core/flowfile/util/__init__.py +0 -0
  97. flowfile_core/flowfile/util/calculate_layout.py +137 -0
  98. flowfile_core/flowfile/util/execution_orderer.py +141 -0
  99. flowfile_core/flowfile/utils.py +106 -0
  100. flowfile_core/main.py +138 -0
  101. flowfile_core/routes/__init__.py +0 -0
  102. flowfile_core/routes/auth.py +34 -0
  103. flowfile_core/routes/logs.py +163 -0
  104. flowfile_core/routes/public.py +10 -0
  105. flowfile_core/routes/routes.py +601 -0
  106. flowfile_core/routes/secrets.py +85 -0
  107. flowfile_core/run_lock.py +11 -0
  108. flowfile_core/schemas/__init__.py +0 -0
  109. flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
  110. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
  111. flowfile_core/schemas/defaults.py +9 -0
  112. flowfile_core/schemas/external_sources/__init__.py +0 -0
  113. flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
  114. flowfile_core/schemas/input_schema.py +477 -0
  115. flowfile_core/schemas/models.py +193 -0
  116. flowfile_core/schemas/output_model.py +115 -0
  117. flowfile_core/schemas/schemas.py +106 -0
  118. flowfile_core/schemas/transform_schema.py +569 -0
  119. flowfile_core/secrets/__init__.py +0 -0
  120. flowfile_core/secrets/secrets.py +64 -0
  121. flowfile_core/utils/__init__.py +0 -0
  122. flowfile_core/utils/arrow_reader.py +247 -0
  123. flowfile_core/utils/excel_file_manager.py +18 -0
  124. flowfile_core/utils/fileManager.py +45 -0
  125. flowfile_core/utils/fl_executor.py +38 -0
  126. flowfile_core/utils/utils.py +8 -0
  127. flowfile_frame/__init__.py +56 -0
  128. flowfile_frame/__main__.py +12 -0
  129. flowfile_frame/adapters.py +17 -0
  130. flowfile_frame/expr.py +1163 -0
  131. flowfile_frame/flow_frame.py +2093 -0
  132. flowfile_frame/group_frame.py +199 -0
  133. flowfile_frame/join.py +75 -0
  134. flowfile_frame/selectors.py +242 -0
  135. flowfile_frame/utils.py +184 -0
  136. flowfile_worker/__init__.py +55 -0
  137. flowfile_worker/configs.py +95 -0
  138. flowfile_worker/create/__init__.py +37 -0
  139. flowfile_worker/create/funcs.py +146 -0
  140. flowfile_worker/create/models.py +86 -0
  141. flowfile_worker/create/pl_types.py +35 -0
  142. flowfile_worker/create/read_excel_tables.py +110 -0
  143. flowfile_worker/create/utils.py +84 -0
  144. flowfile_worker/external_sources/__init__.py +0 -0
  145. flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
  146. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
  147. flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
  148. flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
  149. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  150. flowfile_worker/external_sources/sql_source/__init__.py +0 -0
  151. flowfile_worker/external_sources/sql_source/main.py +56 -0
  152. flowfile_worker/external_sources/sql_source/models.py +72 -0
  153. flowfile_worker/flow_logger.py +58 -0
  154. flowfile_worker/funcs.py +327 -0
  155. flowfile_worker/main.py +108 -0
  156. flowfile_worker/models.py +95 -0
  157. flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
  158. flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
  159. flowfile_worker/polars_fuzzy_match/models.py +36 -0
  160. flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
  161. flowfile_worker/polars_fuzzy_match/process.py +86 -0
  162. flowfile_worker/polars_fuzzy_match/utils.py +50 -0
  163. flowfile_worker/process_manager.py +36 -0
  164. flowfile_worker/routes.py +440 -0
  165. flowfile_worker/secrets.py +148 -0
  166. flowfile_worker/spawner.py +187 -0
  167. flowfile_worker/utils.py +25 -0
  168. test_utils/__init__.py +3 -0
  169. test_utils/postgres/__init__.py +1 -0
  170. test_utils/postgres/commands.py +109 -0
  171. test_utils/postgres/fixtures.py +417 -0
@@ -0,0 +1,247 @@
1
+ import pyarrow as pa
2
+ from typing import List, Iterator, Tuple, Callable
3
+ from flowfile_core.configs import logger
4
+
5
+
6
+ def open_validated_file(file_path: str, n: int) -> pa.OSFile:
7
+ """
8
+ Validate and open an Arrow file with input parameter checking.
9
+
10
+ This function performs validation on the input parameters and opens the Arrow file
11
+ in binary read mode. It includes checks for file existence and parameter types.
12
+
13
+ Args:
14
+ file_path (str): Path to the Arrow file to be opened.
15
+ n (int): Number of rows to be read. Used for validation purposes.
16
+ Must be non-negative.
17
+
18
+ Returns:
19
+ pa.OSFile: An open PyArrow file object ready for reading.
20
+
21
+ Raises:
22
+ ValueError: If n is negative.
23
+ TypeError: If file_path is not a string.
24
+ FileNotFoundError: If the specified file does not exist.
25
+
26
+ Example:
27
+ >>> file = open_validated_file("data.arrow", 1000)
28
+ >>> # Use the file object
29
+ >>> file.close()
30
+ """
31
+ logger.debug(f"Attempting to open file: {file_path} with n={n}")
32
+ if n < 0:
33
+ logger.error(f"Invalid negative row count requested: {n}")
34
+ raise ValueError("Number of rows must be non-negative")
35
+ if not isinstance(file_path, str):
36
+ logger.error(f"Invalid file_path type: {type(file_path)}")
37
+ raise TypeError("file_path must be a string")
38
+ try:
39
+ file = pa.OSFile(file_path, 'rb')
40
+ logger.info(f"Successfully opened file: {file_path}")
41
+ return file
42
+ except FileNotFoundError:
43
+ logger.error(f"File not found: {file_path}")
44
+ raise FileNotFoundError(f"Could not find file: {file_path}")
45
+
46
+
47
+ def create_reader(source: pa.OSFile) -> pa.ipc.RecordBatchFileReader:
48
+ """
49
+ Create a RecordBatchFileReader from an open Arrow file.
50
+
51
+ This function initializes a reader that can process Arrow IPC file formats.
52
+ It handles the creation of the reader and validates the file format.
53
+
54
+ Args:
55
+ source (pa.OSFile): An open PyArrow file object.
56
+
57
+ Returns:
58
+ pa.ipc.RecordBatchFileReader: A reader object for processing Arrow record batches.
59
+
60
+ Raises:
61
+ ValueError: If the file is not a valid Arrow format.
62
+
63
+ Example:
64
+ >>> with open_validated_file("data.arrow", 1000) as source:
65
+ ... reader = create_reader(source)
66
+ ... # Use the reader
67
+ """
68
+ try:
69
+ reader = pa.ipc.open_file(source)
70
+ logger.debug(f"Created reader with {reader.num_record_batches} batches")
71
+ return reader
72
+ except pa.ArrowInvalid:
73
+ logger.error("Failed to create reader: Invalid Arrow file format")
74
+ raise ValueError("Invalid Arrow file format")
75
+
76
+
77
+ def iter_batches(reader: pa.ipc.RecordBatchFileReader, n: int, rows_collected: int) -> Iterator[pa.RecordBatch]:
78
+ """
79
+ Iterator over record batches with row limit handling.
80
+
81
+ Yields record batches from the reader, handling the case where the last batch
82
+ needs to be sliced to meet the requested row count. This function provides
83
+ efficient batch-wise processing of Arrow data.
84
+
85
+ Args:
86
+ reader (pa.ipc.RecordBatchFileReader): The Arrow file reader.
87
+ n (int): Maximum number of rows to read in total.
88
+ rows_collected (int): Number of rows already collected before this iteration.
89
+
90
+ Yields:
91
+ pa.RecordBatch: Record batches containing the data, with the last batch
92
+ potentially sliced to meet the row count requirement.
93
+
94
+ Example:
95
+ >>> reader = create_reader(source)
96
+ >>> for batch in iter_batches(reader, 1000, 0):
97
+ ... # Process each batch
98
+ ... process_batch(batch)
99
+ """
100
+ logger.debug(f"Starting batch iteration: target={n}, collected={rows_collected}")
101
+ for i in range(reader.num_record_batches):
102
+ batch = reader.get_record_batch(i)
103
+ batch_rows = batch.num_rows
104
+ logger.debug(f"Processing batch {i}: {batch_rows} rows")
105
+
106
+ if rows_collected + batch_rows <= n:
107
+ yield batch
108
+ else:
109
+ remaining_rows = n - rows_collected
110
+ logger.debug(f"Slicing final batch to {remaining_rows} rows")
111
+ yield batch.slice(0, remaining_rows)
112
+ break
113
+
114
+
115
+ def collect_batches(reader: pa.ipc.RecordBatchFileReader, n: int) -> Tuple[List[pa.RecordBatch], int]:
116
+ """
117
+ Collect record batches from a reader up to a specified number of rows.
118
+
119
+ This function aggregates record batches while respecting the total row count
120
+ limit. It's useful for building a complete dataset from multiple batches.
121
+
122
+ Args:
123
+ reader (pa.ipc.RecordBatchFileReader): The Arrow file reader.
124
+ n (int): Maximum number of rows to collect.
125
+
126
+ Returns:
127
+ Tuple[List[pa.RecordBatch], int]: A tuple containing:
128
+ - List of collected record batches
129
+ - Total number of rows collected
130
+
131
+ Example:
132
+ >>> reader = create_reader(source)
133
+ >>> batches, row_count = collect_batches(reader, 1000)
134
+ >>> print(f"Collected {row_count} rows in {len(batches)} batches")
135
+ """
136
+ logger.debug(f"Collecting batches up to {n} rows")
137
+ batches: List[pa.RecordBatch] = []
138
+ rows_collected = 0
139
+
140
+ for batch in iter_batches(reader, n, rows_collected):
141
+ batches.append(batch)
142
+ rows_collected += batch.num_rows
143
+ logger.debug(f"Collected batch: total rows now {rows_collected}")
144
+ if rows_collected >= n:
145
+ break
146
+
147
+ logger.info(f"Finished collecting {len(batches)} batches with {rows_collected} total rows")
148
+ return batches, rows_collected
149
+
150
+
151
+ def read(file_path: str) -> pa.Table:
152
+ """
153
+ Read an entire Arrow file into a Table.
154
+
155
+ This function provides a simple interface to read all data from an Arrow file.
156
+ It handles file opening, reading, and proper resource cleanup.
157
+
158
+ Args:
159
+ file_path (str): Path to the Arrow file to read.
160
+
161
+ Returns:
162
+ pa.Table: PyArrow Table containing all data from the file.
163
+
164
+ Raises:
165
+ FileNotFoundError: If the file doesn't exist.
166
+ ValueError: If the file is not a valid Arrow format.
167
+
168
+ Example:
169
+ >>> table = read("data.arrow")
170
+ >>> print(f"Read {len(table)} rows")
171
+ >>> print(f"Columns: {table.column_names}")
172
+ """
173
+ logger.info(f"Reading entire file: {file_path}")
174
+ with open_validated_file(file_path, 0) as source:
175
+ reader = create_reader(source)
176
+ batches, total_rows = collect_batches(reader, float('inf'))
177
+ table = pa.Table.from_batches(batches) # type: ignore
178
+ logger.info(f"Successfully read {total_rows} rows from {file_path}")
179
+ return table
180
+
181
+
182
+ def read_top_n(file_path: str, n: int = 1000, strict: bool = False) -> pa.Table:
183
+ """
184
+ Read the first N rows from an Arrow file.
185
+
186
+ This function provides efficient reading of a limited number of rows from an
187
+ Arrow file. It's useful for data sampling and preview operations.
188
+
189
+ Args:
190
+ file_path (str): Path to the Arrow file to read.
191
+ n (int, optional): Number of rows to read. Defaults to 1000.
192
+ strict (bool, optional): If True, raises an error when fewer than n rows
193
+ are available. Defaults to False.
194
+
195
+ Returns:
196
+ pa.Table: PyArrow Table containing up to n rows of data.
197
+
198
+ Raises:
199
+ ValueError: If strict=True and fewer than n rows are available,
200
+ or if n is negative.
201
+ FileNotFoundError: If the file doesn't exist.
202
+
203
+ Example:
204
+ >>> # Read first 1000 rows
205
+ >>> table = read_top_n("data.arrow")
206
+ >>> # Read exactly 500 rows with strict checking
207
+ >>> table = read_top_n("data.arrow", n=500, strict=True)
208
+ """
209
+ logger.info(f"Reading top {n} rows from {file_path} (strict={strict})")
210
+ with open_validated_file(file_path, n) as source:
211
+ reader = create_reader(source)
212
+ batches, rows_collected = collect_batches(reader, n)
213
+
214
+ if strict and rows_collected < n:
215
+ logger.error(f"Strict mode: requested {n} rows but only {rows_collected} available")
216
+ raise ValueError(f"Requested {n} rows but only {rows_collected} available")
217
+
218
+ table = pa.Table.from_batches(batches) # type: ignore
219
+ logger.info(f"Successfully read {rows_collected} rows from {file_path}")
220
+ return table
221
+
222
+
223
+ def get_read_top_n(file_path: str, n: int = 1000, strict: bool = False) -> Callable[[], pa.Table]:
224
+ """
225
+ Create a callable that reads the first N rows from an Arrow file.
226
+
227
+ This function returns a closure that can be called later to read data.
228
+ It's useful for creating reusable data reading functions with fixed parameters.
229
+
230
+ Args:
231
+ file_path (str): Path to the Arrow file to read.
232
+ n (int, optional): Number of rows to read. Defaults to 1000.
233
+ strict (bool, optional): If True, raises an error when fewer than n rows
234
+ are available. Defaults to False.
235
+
236
+ Returns:
237
+ Callable[[], pa.Table]: A function that when called, reads and returns
238
+ the data as a PyArrow Table.
239
+
240
+ Example:
241
+ >>> # Create a reader function for the first 500 rows
242
+ >>> reader_func = get_read_top_n("data.arrow", n=500)
243
+ >>> # Later, use the function to read the data
244
+ >>> table = reader_func()
245
+ """
246
+ logger.info(f"Creating reader function for {file_path} with n={n}, strict={strict}")
247
+ return lambda: read_top_n(file_path, n, strict)
@@ -0,0 +1,18 @@
1
+ import os
2
+ import fastexcel
3
+ from typing import List
4
+ from functools import lru_cache
5
+
6
+ from flowfile_core.configs import logger
7
+
8
+
9
+ @lru_cache(maxsize=32)
10
+ def get_sheet_names(file_path: str) -> List[str] | None:
11
+ if not os.path.exists(file_path):
12
+ logger.error(f"File does not exist: {file_path}")
13
+ return
14
+ try:
15
+ return fastexcel.read_excel(file_path).sheet_names
16
+ except Exception as e:
17
+ logger.error(e)
18
+ return
@@ -0,0 +1,45 @@
1
+ import os
2
+ from flowfile_core.schemas.input_schema import NewDirectory, RemoveItem, RemoveItemsInput
3
+ from typing import Tuple, Optional
4
+ from flowfile_core.configs import logger
5
+
6
+ local_database_connection = None
7
+
8
+
9
+ def create_dir(new_directory: NewDirectory) -> Tuple[bool, Optional[Exception]]:
10
+ full_path: str = os.path.join(new_directory.source_path,new_directory.dir_name)
11
+ try:
12
+ os.mkdir(full_path)
13
+ logger.info("Successfully created a new folder")
14
+ return True, None
15
+ except Exception as e:
16
+ return False, e
17
+
18
+
19
+ def remove_path(path: str) -> Tuple[bool, Optional[Exception]]:
20
+ try:
21
+ os.remove(path)
22
+ logger.info(f"Succesfully removed {path}")
23
+ return True, None
24
+ except Exception as e:
25
+ return False, e
26
+
27
+
28
+ def remove_item(item_to_remove: RemoveItem):
29
+ if item_to_remove.id >= 0:
30
+ os.remove(item_to_remove.path)
31
+
32
+ elif os.path.isfile(item_to_remove.path):
33
+ os.remove(item_to_remove.path)
34
+ elif os.path.isdir(item_to_remove.path):
35
+ os.rmdir(item_to_remove.path)
36
+
37
+
38
+ def remove_paths(remove_items: RemoveItemsInput) -> Tuple[bool, Optional[Exception]]:
39
+ try:
40
+ for path in remove_items.paths:
41
+ remove_item(path)
42
+ logger.info(f'Successfully removed {remove_items.paths}')
43
+ return True, None
44
+ except Exception as e:
45
+ return False, e
@@ -0,0 +1,38 @@
1
+ from flowfile_core.configs import logger
2
+ from inspect import isfunction
3
+ from loky import get_reusable_executor
4
+ from functools import wraps
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ import atexit
7
+
8
+
9
+ # process_executor: Uses loky for process-based parallelism
10
+ def process_executor(wait_on_completion: bool = False, max_workers: int = 12):
11
+ max_workers = max_workers if not wait_on_completion else 1
12
+
13
+ def executor(f):
14
+ @wraps(f)
15
+ def inner(*args, **kwargs):
16
+ logger.debug(f'Added task {f.__name__} to a process executor')
17
+ logger.debug(f'max_workers: {max_workers}')
18
+
19
+ # Create a new executor with the required number of workers
20
+ func_executor = get_reusable_executor(max_workers=max_workers, timeout=2, kill_workers=False, reuse=True)
21
+ r = func_executor.submit(f, *args, **kwargs)
22
+ if wait_on_completion:
23
+ result = r.result()
24
+ logger.info(f'done executing {f.__name__}')
25
+ return result
26
+
27
+ logger.info(f'done submitting {f.__name__} to a process executor')
28
+ return r
29
+
30
+ return inner
31
+
32
+ if isfunction(wait_on_completion):
33
+ f = wait_on_completion
34
+ wait_on_completion = False
35
+ return executor(f)
36
+ return executor
37
+
38
+
@@ -0,0 +1,8 @@
1
+ import re
2
+
3
+
4
+ def camel_case_to_snake_case(text: str) -> str:
5
+ # Use a regular expression to find capital letters and replace them with _ followed by the lowercase letter
6
+ transformed_text = re.sub(r'(?<!^)(?=[A-Z])', '_', text).lower()
7
+ return transformed_text
8
+
@@ -0,0 +1,56 @@
1
+ # flowframe/__init__.py
2
+ """A Polars-like API for building ETL graphs."""
3
+
4
+ # Core classes
5
+ from flowfile_frame.flow_frame import FlowFrame # noqa: F401
6
+
7
+ from flowfile_frame.utils import create_etl_graph # noqa: F401
8
+
9
+ # Commonly used functions
10
+ from flowfile_frame.expr import ( # noqa: F401
11
+ col, lit, column,
12
+ cum_count, len,
13
+ sum, min, max, mean, count, when
14
+ )
15
+
16
+ # Selector utilities
17
+ from flowfile_frame.selectors import ( # noqa: F401
18
+ numeric, float_, integer, string, temporal,
19
+ datetime, date, time, duration, boolean,
20
+ categorical, object_, list_, struct, all_,
21
+ by_dtype, contains, starts_with, ends_with, matches
22
+ )
23
+
24
+ # File I/O
25
+ from flowfile_frame.flow_frame import ( # noqa: F401
26
+ read_csv, read_parquet, from_dict, concat
27
+ )
28
+
29
+ # Import Polars data types for user convenience
30
+ from polars.datatypes import ( # noqa: F401
31
+ # Integer types
32
+ Int8, Int16, Int32, Int64, Int128,
33
+ UInt8, UInt16, UInt32, UInt64,
34
+ IntegerType,
35
+
36
+ # Float types
37
+ Float32, Float64,
38
+
39
+ # Other primitive types
40
+ Boolean, String, Utf8, Binary, Null,
41
+
42
+ # Complex types
43
+ List, Array, Struct, Object,
44
+
45
+ # Date/time types
46
+ Date, Time, Datetime, Duration,
47
+ TemporalType,
48
+
49
+ # Special types
50
+ Categorical, Decimal, Enum, Unknown,
51
+
52
+ # Type classes
53
+ DataType, DataTypeClass, Field
54
+ )
55
+
56
+ __version__ = "0.1.0"
@@ -0,0 +1,12 @@
1
+ """Main entry point for the FlowFrame CLI."""
2
+
3
+ def main():
4
+ """Main entry point for the FlowFrame CLI."""
5
+ print("FlowFrame - A Polars-like API for building ETL graphs")
6
+ print("Usage: import flowframe as ff")
7
+ print(" df = ff.from_dict({'a': [1, 2, 3]})")
8
+ print(" result = df.filter(ff.col('a') > 1)")
9
+ print(" print(result.collect())")
10
+
11
+ if __name__ == "__main__":
12
+ main()
@@ -0,0 +1,17 @@
1
+ # flowframe/adapters.py
2
+ """Adapters to connect FlowFrame with the flowfile-core library."""
3
+
4
+ # Import from your existing project
5
+ from flowfile_core.flowfile.FlowfileFlow import FlowGraph, add_connection
6
+ from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
7
+ from flowfile_core.schemas import input_schema, schemas, transform_schema
8
+
9
+ # Export these for use in FlowFrame
10
+ __all__ = [
11
+ 'FlowGraph',
12
+ 'add_connection',
13
+ 'FlowDataEngine',
14
+ 'input_schema',
15
+ 'schemas',
16
+ 'transform_schema'
17
+ ]