Flowfile 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (171) hide show
  1. build_backends/__init__.py +0 -0
  2. build_backends/main.py +313 -0
  3. build_backends/main_prd.py +202 -0
  4. flowfile/__init__.py +71 -0
  5. flowfile/__main__.py +24 -0
  6. flowfile-0.2.2.dist-info/LICENSE +21 -0
  7. flowfile-0.2.2.dist-info/METADATA +225 -0
  8. flowfile-0.2.2.dist-info/RECORD +171 -0
  9. flowfile-0.2.2.dist-info/WHEEL +4 -0
  10. flowfile-0.2.2.dist-info/entry_points.txt +9 -0
  11. flowfile_core/__init__.py +13 -0
  12. flowfile_core/auth/__init__.py +0 -0
  13. flowfile_core/auth/jwt.py +140 -0
  14. flowfile_core/auth/models.py +40 -0
  15. flowfile_core/auth/secrets.py +178 -0
  16. flowfile_core/configs/__init__.py +35 -0
  17. flowfile_core/configs/flow_logger.py +433 -0
  18. flowfile_core/configs/node_store/__init__.py +0 -0
  19. flowfile_core/configs/node_store/nodes.py +98 -0
  20. flowfile_core/configs/settings.py +120 -0
  21. flowfile_core/database/__init__.py +0 -0
  22. flowfile_core/database/connection.py +51 -0
  23. flowfile_core/database/init_db.py +45 -0
  24. flowfile_core/database/models.py +41 -0
  25. flowfile_core/fileExplorer/__init__.py +0 -0
  26. flowfile_core/fileExplorer/funcs.py +259 -0
  27. flowfile_core/fileExplorer/utils.py +53 -0
  28. flowfile_core/flowfile/FlowfileFlow.py +1403 -0
  29. flowfile_core/flowfile/__init__.py +0 -0
  30. flowfile_core/flowfile/_extensions/__init__.py +0 -0
  31. flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
  32. flowfile_core/flowfile/analytics/__init__.py +0 -0
  33. flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
  34. flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
  35. flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
  36. flowfile_core/flowfile/analytics/utils.py +9 -0
  37. flowfile_core/flowfile/connection_manager/__init__.py +3 -0
  38. flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
  39. flowfile_core/flowfile/connection_manager/models.py +10 -0
  40. flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
  41. flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
  42. flowfile_core/flowfile/database_connection_manager/models.py +15 -0
  43. flowfile_core/flowfile/extensions.py +36 -0
  44. flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
  45. flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
  46. flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
  47. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
  48. flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
  49. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
  50. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
  51. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
  52. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
  53. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
  54. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
  55. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
  56. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
  57. flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
  58. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
  59. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
  60. flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
  61. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
  62. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
  63. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
  64. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
  65. flowfile_core/flowfile/flow_data_engine/types.py +0 -0
  66. flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
  67. flowfile_core/flowfile/flow_node/__init__.py +0 -0
  68. flowfile_core/flowfile/flow_node/flow_node.py +771 -0
  69. flowfile_core/flowfile/flow_node/models.py +111 -0
  70. flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
  71. flowfile_core/flowfile/handler.py +123 -0
  72. flowfile_core/flowfile/manage/__init__.py +0 -0
  73. flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
  74. flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
  75. flowfile_core/flowfile/manage/open_flowfile.py +136 -0
  76. flowfile_core/flowfile/setting_generator/__init__.py +2 -0
  77. flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
  78. flowfile_core/flowfile/setting_generator/settings.py +176 -0
  79. flowfile_core/flowfile/sources/__init__.py +0 -0
  80. flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
  81. flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
  82. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
  83. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
  84. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
  85. flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
  86. flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
  87. flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
  88. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
  89. flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
  90. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
  91. flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
  92. flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
  93. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
  94. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
  95. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
  96. flowfile_core/flowfile/util/__init__.py +0 -0
  97. flowfile_core/flowfile/util/calculate_layout.py +137 -0
  98. flowfile_core/flowfile/util/execution_orderer.py +141 -0
  99. flowfile_core/flowfile/utils.py +106 -0
  100. flowfile_core/main.py +138 -0
  101. flowfile_core/routes/__init__.py +0 -0
  102. flowfile_core/routes/auth.py +34 -0
  103. flowfile_core/routes/logs.py +163 -0
  104. flowfile_core/routes/public.py +10 -0
  105. flowfile_core/routes/routes.py +601 -0
  106. flowfile_core/routes/secrets.py +85 -0
  107. flowfile_core/run_lock.py +11 -0
  108. flowfile_core/schemas/__init__.py +0 -0
  109. flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
  110. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
  111. flowfile_core/schemas/defaults.py +9 -0
  112. flowfile_core/schemas/external_sources/__init__.py +0 -0
  113. flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
  114. flowfile_core/schemas/input_schema.py +477 -0
  115. flowfile_core/schemas/models.py +193 -0
  116. flowfile_core/schemas/output_model.py +115 -0
  117. flowfile_core/schemas/schemas.py +106 -0
  118. flowfile_core/schemas/transform_schema.py +569 -0
  119. flowfile_core/secrets/__init__.py +0 -0
  120. flowfile_core/secrets/secrets.py +64 -0
  121. flowfile_core/utils/__init__.py +0 -0
  122. flowfile_core/utils/arrow_reader.py +247 -0
  123. flowfile_core/utils/excel_file_manager.py +18 -0
  124. flowfile_core/utils/fileManager.py +45 -0
  125. flowfile_core/utils/fl_executor.py +38 -0
  126. flowfile_core/utils/utils.py +8 -0
  127. flowfile_frame/__init__.py +56 -0
  128. flowfile_frame/__main__.py +12 -0
  129. flowfile_frame/adapters.py +17 -0
  130. flowfile_frame/expr.py +1163 -0
  131. flowfile_frame/flow_frame.py +2093 -0
  132. flowfile_frame/group_frame.py +199 -0
  133. flowfile_frame/join.py +75 -0
  134. flowfile_frame/selectors.py +242 -0
  135. flowfile_frame/utils.py +184 -0
  136. flowfile_worker/__init__.py +55 -0
  137. flowfile_worker/configs.py +95 -0
  138. flowfile_worker/create/__init__.py +37 -0
  139. flowfile_worker/create/funcs.py +146 -0
  140. flowfile_worker/create/models.py +86 -0
  141. flowfile_worker/create/pl_types.py +35 -0
  142. flowfile_worker/create/read_excel_tables.py +110 -0
  143. flowfile_worker/create/utils.py +84 -0
  144. flowfile_worker/external_sources/__init__.py +0 -0
  145. flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
  146. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
  147. flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
  148. flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
  149. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  150. flowfile_worker/external_sources/sql_source/__init__.py +0 -0
  151. flowfile_worker/external_sources/sql_source/main.py +56 -0
  152. flowfile_worker/external_sources/sql_source/models.py +72 -0
  153. flowfile_worker/flow_logger.py +58 -0
  154. flowfile_worker/funcs.py +327 -0
  155. flowfile_worker/main.py +108 -0
  156. flowfile_worker/models.py +95 -0
  157. flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
  158. flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
  159. flowfile_worker/polars_fuzzy_match/models.py +36 -0
  160. flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
  161. flowfile_worker/polars_fuzzy_match/process.py +86 -0
  162. flowfile_worker/polars_fuzzy_match/utils.py +50 -0
  163. flowfile_worker/process_manager.py +36 -0
  164. flowfile_worker/routes.py +440 -0
  165. flowfile_worker/secrets.py +148 -0
  166. flowfile_worker/spawner.py +187 -0
  167. flowfile_worker/utils.py +25 -0
  168. test_utils/__init__.py +3 -0
  169. test_utils/postgres/__init__.py +1 -0
  170. test_utils/postgres/commands.py +109 -0
  171. test_utils/postgres/fixtures.py +417 -0
File without changes
build_backends/main.py ADDED
@@ -0,0 +1,313 @@
1
+ import os
2
+ import subprocess
3
+ import platform
4
+ import shutil
5
+ from pathlib import Path
6
+ from typing import List
7
+
8
+
9
+ def merge_directories(directories: List[str], target_dir: str, cleanup_after_merge: bool = True):
10
+ """
11
+ Merge all files from two folders into a new target directory.
12
+ After successful merge, removes the original folders.
13
+ """
14
+ # Create target directory
15
+ Path(target_dir).mkdir(parents=True, exist_ok=True)
16
+ for directory in directories:
17
+ if os.path.exists(directory):
18
+ shutil.copytree(directory, target_dir, dirs_exist_ok=True)
19
+ print('Merged directories:', directories, 'into', target_dir)
20
+ if cleanup_after_merge:
21
+ print('Cleaning up directories:', directories)
22
+ for directory in directories:
23
+ if os.path.exists(directory):
24
+ shutil.rmtree(directory)
25
+
26
+
27
+ def create_spec_file(directory, script_name, output_name, hidden_imports):
28
+ """Create an optimized spec file for faster startup"""
29
+ spec_content = f'''
30
+ import sys
31
+ import os
32
+ from PyInstaller.utils.hooks import collect_submodules, collect_data_files
33
+
34
+ # Add hook to fix connectorx metadata
35
+ def get_connectorx_metadata():
36
+ print("Collecting connectorx metadata...")
37
+ try:
38
+ import site
39
+ import connectorx
40
+
41
+ # Find the site-packages directory
42
+ site_packages = site.getsitepackages()[0]
43
+ print(f"Site-packages directory: {{site_packages}}")
44
+
45
+ # Try both common metadata formats with glob to catch any version
46
+ import glob
47
+ metadata_locations = []
48
+
49
+ # Look for dist-info directories
50
+ dist_info_pattern = os.path.join(site_packages, 'connectorx*.dist-info')
51
+ for dist_info in glob.glob(dist_info_pattern):
52
+ metadata_locations.append(dist_info)
53
+
54
+ # Look for egg-info directories
55
+ egg_info_pattern = os.path.join(site_packages, 'connectorx*.egg-info')
56
+ for egg_info in glob.glob(egg_info_pattern):
57
+ metadata_locations.append(egg_info)
58
+
59
+ # Also try looking in the parent directory of the connectorx package
60
+ connectorx_dir = os.path.dirname(connectorx.__file__)
61
+ parent_dir = os.path.dirname(connectorx_dir)
62
+
63
+ dist_info_pattern = os.path.join(parent_dir, 'connectorx*.dist-info')
64
+ for dist_info in glob.glob(dist_info_pattern):
65
+ metadata_locations.append(dist_info)
66
+
67
+ egg_info_pattern = os.path.join(parent_dir, 'connectorx*.egg-info')
68
+ for egg_info in glob.glob(egg_info_pattern):
69
+ metadata_locations.append(egg_info)
70
+
71
+ found_metadata = []
72
+ for loc in metadata_locations:
73
+ if os.path.exists(loc):
74
+ dest_name = os.path.basename(loc)
75
+ found_metadata.append((loc, dest_name))
76
+ print(f"Found metadata at {{loc}}")
77
+
78
+ if found_metadata:
79
+ return found_metadata
80
+
81
+ # If we can't find the metadata, create a fake one
82
+ print("No connectorx metadata found, creating manual metadata...")
83
+ import tempfile
84
+ temp_dir = tempfile.mkdtemp()
85
+ fake_meta_dir = os.path.join(temp_dir, 'connectorx-0.4.3.dist-info')
86
+ os.makedirs(fake_meta_dir, exist_ok=True)
87
+
88
+ # Create minimal METADATA file
89
+ with open(os.path.join(fake_meta_dir, 'METADATA'), 'w') as f:
90
+ f.write("""Metadata-Version: 2.1
91
+ Name: connectorx
92
+ Version: 0.4.3
93
+ Summary: ConnectorX: Fast and Reliable Data Loading
94
+ """)
95
+
96
+ # Return the fake metadata directory
97
+ print(f"Created fake metadata at {{fake_meta_dir}}")
98
+ return [(fake_meta_dir, 'connectorx-0.4.3.dist-info')]
99
+ except Exception as e:
100
+ print(f"Error collecting connectorx metadata: {{e}}")
101
+ return []
102
+
103
+ # Add runtime hook to handle connectorx metadata issues
104
+ def create_runtime_hook():
105
+ return """
106
+ # Runtime hook to handle connectorx metadata issues
107
+ import sys
108
+ import importlib.metadata
109
+
110
+ # Store original version function
111
+ original_version = importlib.metadata.version
112
+
113
+ # Create patched version function
114
+ def patched_version(distribution_name):
115
+ try:
116
+ return original_version(distribution_name)
117
+ except (importlib.metadata.PackageNotFoundError, StopIteration):
118
+ # Handle specific packages
119
+ if distribution_name == 'connectorx':
120
+ return '0.4.3' # Hardcode the version
121
+ # Let other package errors propagate normally
122
+ raise
123
+
124
+ # Apply the patch
125
+ importlib.metadata.version = patched_version
126
+ print("Applied connectorx metadata patch")
127
+ """
128
+
129
+ # Collect minimal snowflake dependencies
130
+ snowflake_imports = collect_submodules('snowflake.connector',
131
+ filter=lambda name: any(x in name for x in [
132
+ 'connection',
133
+ 'errors',
134
+ 'snow_logging',
135
+ 'auth',
136
+ 'network'
137
+ ])
138
+ )
139
+
140
+ # Collect numpy and pyarrow data files
141
+ numpy_datas = collect_data_files('numpy')
142
+ pyarrow_datas = collect_data_files('pyarrow')
143
+ connectorx_datas = get_connectorx_metadata()
144
+
145
+ # Create runtime hook file
146
+ with open('connectorx_hook.py', 'w') as f:
147
+ f.write(create_runtime_hook())
148
+
149
+ a = Analysis(
150
+ [r'{os.path.join(directory, script_name)}'],
151
+ binaries=[],
152
+ datas=numpy_datas + pyarrow_datas + connectorx_datas,
153
+ hiddenimports={hidden_imports} + snowflake_imports + [
154
+ 'numpy',
155
+ 'numpy.core._dtype_ctypes',
156
+ 'numpy.core._methods',
157
+ 'numpy._pyarray_api',
158
+ 'pyarrow',
159
+ 'pyarrow.lib',
160
+ 'fastexcel',
161
+ 'importlib.metadata',
162
+ ],
163
+ excludes=[
164
+ 'tkinter',
165
+ 'PIL',
166
+ 'pytest',
167
+ 'unittest'
168
+ ],
169
+ runtime_hooks=['connectorx_hook.py'],
170
+ noarchive=False,
171
+ )
172
+
173
+ pyz = PYZ(a.pure, compress_level=9)
174
+
175
+ exe = EXE(
176
+ pyz,
177
+ a.scripts,
178
+ [],
179
+ exclude_binaries=True,
180
+ name='{output_name}',
181
+ debug=False,
182
+ bootloader_ignore_signals=False,
183
+ strip=False,
184
+ upx=False,
185
+ console=True,
186
+ optimize=1
187
+ )
188
+
189
+ coll = COLLECT(
190
+ exe,
191
+ a.binaries,
192
+ a.zipfiles,
193
+ a.datas,
194
+ strip=False,
195
+ upx=False,
196
+ upx_exclude=[],
197
+ name='{output_name}'
198
+ )
199
+ '''
200
+ spec_path = f'{output_name}.spec'
201
+ with open(spec_path, 'w') as f:
202
+ f.write(spec_content)
203
+ return spec_path
204
+
205
+ def build_backend(directory, script_name, output_name, hidden_imports=None):
206
+ try:
207
+ spec_path = create_spec_file(directory, script_name, output_name, hidden_imports)
208
+
209
+ env = os.environ.copy()
210
+ env['PYTHONOPTIMIZE'] = "1"
211
+
212
+ command = [
213
+ "pyinstaller",
214
+ "--clean",
215
+ "-y",
216
+ "--dist", "./services_dist",
217
+ "--workpath", "/tmp" if platform.system() != "Windows" else os.path.join(os.getenv('TEMP'), 'pyinstaller'),
218
+ spec_path
219
+ ]
220
+
221
+ print(f"Building {output_name}...")
222
+ subprocess.run(command, check=True, env=env)
223
+ os.remove(spec_path)
224
+
225
+ return True
226
+
227
+ except subprocess.CalledProcessError as e:
228
+ print(f"Error while building {script_name}: {e}")
229
+ return False
230
+ except Exception as e:
231
+ print(f"Unexpected error: {e}")
232
+ return False
233
+
234
+
235
+ def combine_packages():
236
+ """Reorganize the services_dist directory to have shared dependencies"""
237
+ dist_dir = "services_dist"
238
+ shared_internal = os.path.join(dist_dir, "_internal")
239
+ core_internal = os.path.join(dist_dir, "flowfile_core", "_internal")
240
+ worker_internal = os.path.join(dist_dir, "flowfile_worker", "_internal")
241
+ merge_directories([core_internal, worker_internal], shared_internal, False)
242
+
243
+ for project in ["flowfile_worker", "flowfile_core"]:
244
+ src_dir = os.path.join(dist_dir, project)
245
+ if os.path.exists(src_dir) and os.path.isdir(src_dir):
246
+ # Move executable
247
+ exe_name = project + ".exe" if platform.system() == "Windows" else project
248
+ src_exe = os.path.join(src_dir, exe_name)
249
+ temp_target_exe = os.path.join(dist_dir, "_" + exe_name)
250
+ target_exe = os.path.join(dist_dir, exe_name)
251
+ if os.path.exists(src_exe) and os.path.isfile(src_exe):
252
+ # Instead of removing, overwrite the target
253
+ shutil.move(src_exe, temp_target_exe)
254
+ if os.path.exists(target_exe) and os.path.isdir(target_exe):
255
+ shutil.rmtree(target_exe)
256
+ shutil.move(temp_target_exe, target_exe)
257
+ if platform.system() == "Windows" and os.path.exists(os.path.join(dist_dir, project)):
258
+ shutil.rmtree(os.path.join(dist_dir, project))
259
+
260
+ def main():
261
+ # Clean previous builds
262
+ for dir_name in ['services_dist']:
263
+ if os.path.exists(dir_name):
264
+ shutil.rmtree(dir_name)
265
+
266
+ # Common imports for both projects
267
+ common_imports = [
268
+ "fastexcel",
269
+ "polars",
270
+ "numpy",
271
+ "numpy.core._methods",
272
+ "pyarrow",
273
+ "snowflake.connector",
274
+ "snowflake.connector.snow_logging",
275
+ "snowflake.connector.errors",
276
+ "multiprocessing",
277
+ "uvicorn.protocols.http",
278
+ "uvicorn.protocols.websockets",
279
+ "passlib.handlers.bcrypt",
280
+ "connectorx",
281
+ ]
282
+
283
+ # Build both projects
284
+ builds_successful = True
285
+
286
+ # Build flowfile_worker
287
+
288
+ if not build_backend(
289
+ directory=os.path.join("flowfile_worker", "flowfile_worker"),
290
+ script_name="main.py",
291
+ output_name="flowfile_worker",
292
+ hidden_imports=common_imports
293
+ ):
294
+ builds_successful = False
295
+
296
+ # Build flowfile_core
297
+
298
+ if not build_backend(
299
+ directory=os.path.join("flowfile_core", "flowfile_core"),
300
+ script_name="main.py",
301
+ output_name="flowfile_core",
302
+ hidden_imports=common_imports
303
+ ):
304
+ builds_successful = False
305
+
306
+ if builds_successful:
307
+ print("Reorganizing services_dist directory...")
308
+ combine_packages()
309
+ print("Build complete! Final structure created in services_dist/")
310
+
311
+
312
+ # if __name__ == "__main__":
313
+ # main()
@@ -0,0 +1,202 @@
1
+ import os
2
+ import subprocess
3
+ import platform
4
+ from concurrent.futures import ProcessPoolExecutor, wait
5
+
6
+ import subprocess
7
+ import time
8
+ import requests
9
+ from statistics import mean, stdev
10
+ import sys
11
+ from datetime import datetime
12
+
13
+
14
+ def wait_for_endpoint(url, timeout=60):
15
+ """Wait for the endpoint to become available."""
16
+ start_time = time.time()
17
+ while time.time() - start_time < timeout:
18
+ try:
19
+ response = requests.get(url)
20
+ if response.status_code == 200:
21
+ return True
22
+ except requests.exceptions.ConnectionError:
23
+ time.sleep(0.1)
24
+ return False
25
+
26
+
27
+ def shutdown_service():
28
+ """Shutdown the service gracefully using the shutdown endpoint."""
29
+ try:
30
+ response = requests.post(
31
+ 'http://0.0.0.0:63578/shutdown',
32
+ headers={'accept': 'application/json'},
33
+ data=''
34
+ )
35
+ print("Shutdown request sent, waiting for service to stop...")
36
+ time.sleep(1) # Wait 10 seconds to ensure the service is fully stopped
37
+ return True
38
+ except requests.exceptions.RequestException as e:
39
+ print(f"Error shutting down service: {e}")
40
+ return False
41
+
42
+
43
+ def measure_startup_time(executable_path):
44
+ """Measure the startup time of the executable."""
45
+ start_time = time.time()
46
+
47
+ # Start the process
48
+ process = subprocess.Popen([executable_path])
49
+
50
+ # Wait for the endpoint to become available
51
+ endpoint_url = "http://0.0.0.0:63578/docs"
52
+ if not wait_for_endpoint(endpoint_url):
53
+ print(f"Error: Endpoint did not become available for {executable_path}")
54
+ process.kill()
55
+ return None
56
+
57
+ elapsed_time = time.time() - start_time
58
+
59
+ # Gracefully shutdown the service
60
+ if not shutdown_service():
61
+ print("Failed to shutdown service gracefully, killing process...")
62
+ process.kill()
63
+ time.sleep(1)
64
+
65
+ return elapsed_time
66
+
67
+
68
+ def run_comparison_test(old_exe, new_exe, num_runs=3):
69
+ """Run multiple comparison tests and print statistics."""
70
+ print(f"\nStarting comparison test at {datetime.now()}")
71
+ print(f"Number of runs: {num_runs}")
72
+ print("\nExecutables being tested:")
73
+ print(f"Old: {old_exe}")
74
+ print(f"New: {new_exe}")
75
+
76
+ old_times = []
77
+ new_times = []
78
+
79
+ for i in range(num_runs):
80
+ print(f"\nRun {i + 1}/{num_runs}")
81
+
82
+ # Test old executable
83
+ print("Testing old executable...")
84
+ old_time = measure_startup_time(old_exe)
85
+ if old_time is not None:
86
+ old_times.append(old_time)
87
+ print(f"Old startup time: {old_time:.3f} seconds")
88
+
89
+ # Test new executable
90
+ print("Testing new executable...")
91
+ new_time = measure_startup_time(new_exe)
92
+ if new_time is not None:
93
+ new_times.append(new_time)
94
+ print(f"New startup time: {new_time:.3f} seconds")
95
+
96
+ # Print results
97
+ print("\nResults:")
98
+ print("-" * 50)
99
+ if old_times:
100
+ print(f"Old executable:")
101
+ print(f" Average: {mean(old_times):.3f} seconds")
102
+ print(f" Std Dev: {stdev(old_times):.3f} seconds" if len(old_times) > 1 else " Std Dev: N/A")
103
+ print(f" Min: {min(old_times):.3f} seconds")
104
+ print(f" Max: {max(old_times):.3f} seconds")
105
+
106
+ if new_times:
107
+ print(f"\nNew executable:")
108
+ print(f" Average: {mean(new_times):.3f} seconds")
109
+ print(f" Std Dev: {stdev(new_times):.3f} seconds" if len(new_times) > 1 else " Std Dev: N/A")
110
+ print(f" Min: {min(new_times):.3f} seconds")
111
+ print(f" Max: {max(new_times):.3f} seconds")
112
+
113
+ if old_times and new_times:
114
+ improvement = (mean(old_times) - mean(new_times)) / mean(old_times) * 100
115
+ print(f"\nPerformance difference:")
116
+ print(f" {improvement:.1f}% {'faster' if improvement > 0 else 'slower'} than old version")
117
+
118
+
119
+ if __name__ == "__main__":
120
+ old_exe = '/Users/edwardvanechoud/personal_dev/Flowfile/dist/flowfile_core/flowfile_core'
121
+ new_exe = '/Users/edwardvanechoud/personal_dev/Flowfile/dist_flowfile_core/flowfile_core'
122
+
123
+ run_comparison_test(old_exe, old_exe)
124
+
125
+ # def build_backend(directory, script_name, output_name, hidden_imports=None):
126
+ # try:
127
+ # script_path = os.path.join(directory, script_name)
128
+ # command = [
129
+ # "python", "-m", "nuitka",
130
+ # "--onefile",
131
+ # "--standalone",
132
+ # "--assume-yes-for-downloads",
133
+ # "--include-package=tempfile",
134
+ # "--include-package=polars",
135
+ # "--include-package=fastexcel",
136
+ # "--include-package=snowflake.connector"
137
+ # ]
138
+ #
139
+ # if hidden_imports:
140
+ # for imp in hidden_imports:
141
+ # if '.' not in imp:
142
+ # command.extend(["--include-package=" + imp])
143
+ # else:
144
+ # command.extend(["--include-module=" + imp])
145
+ #
146
+ # dist_folder = f"dist_{output_name}"
147
+ # os.makedirs(dist_folder, exist_ok=True)
148
+ # ext = ".exe" if platform.system() == "Windows" else ""
149
+ # command.extend([
150
+ # f"--output-dir={dist_folder}",
151
+ # f"--output-filename={output_name}{ext}",
152
+ # script_path
153
+ # ])
154
+ #
155
+ # print(f"Starting build for {output_name}")
156
+ # result = subprocess.run(command, check=True)
157
+ # print(f"Build completed for {output_name} with exit code {result.returncode}")
158
+ # return result.returncode
159
+ #
160
+ # except subprocess.CalledProcessError as e:
161
+ # print(f"Error while building {script_name}: {e}")
162
+ # return 1
163
+ #
164
+ #
165
+ # def main():
166
+ # common_imports = [
167
+ # "fastexcel",
168
+ # "polars",
169
+ # "snowflake.connector",
170
+ # "snowflake.connector.snow_logging",
171
+ # "snowflake.connector.errors"
172
+ # ]
173
+ #
174
+ # builds = [
175
+ # {
176
+ # "directory": os.path.join("flowfile_worker", "flowfile_worker"),
177
+ # "script_name": "main.py",
178
+ # "output_name": "flowfile_worker",
179
+ # "hidden_imports": ["multiprocessing", "multiprocessing.resource_tracker",
180
+ # "multiprocessing.sharedctypes", "uvicorn",
181
+ # "uvicorn.logging", "uvicorn.protocols.http",
182
+ # "uvicorn.protocols.websockets"] + common_imports
183
+ # },
184
+ # {
185
+ # "directory": os.path.join("flowfile_core", "flowfile_core"),
186
+ # "script_name": "main.py",
187
+ # "output_name": "flowfile_core",
188
+ # "hidden_imports": ["passlib.handlers.bcrypt"] + common_imports
189
+ # }
190
+ # ]
191
+ #
192
+ # with ProcessPoolExecutor(max_workers=2) as executor:
193
+ # futures = [executor.submit(build_backend, **build) for build in builds]
194
+ # wait(futures)
195
+ #
196
+ # for future in futures:
197
+ # if future.result() != 0:
198
+ # raise Exception("One or more builds failed")
199
+ #
200
+ #
201
+ # if __name__ == "__main__":
202
+ # main()
flowfile/__init__.py ADDED
@@ -0,0 +1,71 @@
1
+ """
2
+ FlowFile: A framework combining visual ETL with a Polars-like API.
3
+
4
+ This package ties together the FlowFile ecosystem components:
5
+ - flowfile_core: Core ETL functionality
6
+ - flowfile_frame: Polars-like DataFrame API
7
+ - flowfile_worker: Computation engine
8
+ """
9
+
10
+ __version__ = "0.2.1"
11
+
12
+ # Import the key components from flowfile_frame
13
+ from flowfile_frame.flow_frame import (
14
+ FlowFrame, read_csv, read_parquet, from_dict, concat
15
+ )
16
+ from flowfile_frame.expr import (
17
+ col, lit, column, cum_count, len,
18
+ sum, min, max, mean, count, when
19
+ )
20
+ from flowfile_frame.group_frame import GroupByFrame
21
+ from flowfile_frame.utils import create_etl_graph, open_graph_in_editor
22
+ from flowfile_frame.selectors import (
23
+ numeric, float_, integer, string, temporal,
24
+ datetime, date, time, duration, boolean,
25
+ categorical, object_, list_, struct, all_,
26
+ by_dtype, contains, starts_with, ends_with, matches
27
+ )
28
+
29
+ # Import Polars data types for convenience
30
+ from polars.datatypes import (
31
+ Int8, Int16, Int32, Int64, Int128,
32
+ UInt8, UInt16, UInt32, UInt64,
33
+ Float32, Float64,
34
+ Boolean, String, Utf8, Binary, Null,
35
+ List, Array, Struct, Object,
36
+ Date, Time, Datetime, Duration,
37
+ Categorical, Decimal, Enum, Unknown,
38
+ DataType, DataTypeClass, Field
39
+ )
40
+
41
+ # Define what's publicly available from the package
42
+ __all__ = [
43
+ # Core FlowFrame classes
44
+ 'FlowFrame', 'GroupByFrame',
45
+
46
+ # Main creation functions
47
+ 'read_csv', 'read_parquet', 'from_dict', 'concat',
48
+
49
+ # Expression API
50
+ 'col', 'lit', 'column', 'cum_count', 'len',
51
+ 'sum', 'min', 'max', 'mean', 'count', 'when',
52
+
53
+ # Selector utilities
54
+ 'numeric', 'float_', 'integer', 'string', 'temporal',
55
+ 'datetime', 'date', 'time', 'duration', 'boolean',
56
+ 'categorical', 'object_', 'list_', 'struct', 'all_',
57
+ 'by_dtype', 'contains', 'starts_with', 'ends_with', 'matches',
58
+
59
+ # Utilities
60
+ 'create_etl_graph', 'open_graph_in_editor',
61
+
62
+ # Data types from Polars
63
+ 'Int8', 'Int16', 'Int32', 'Int64', 'Int128',
64
+ 'UInt8', 'UInt16', 'UInt32', 'UInt64',
65
+ 'Float32', 'Float64',
66
+ 'Boolean', 'String', 'Utf8', 'Binary', 'Null',
67
+ 'List', 'Array', 'Struct', 'Object',
68
+ 'Date', 'Time', 'Datetime', 'Duration',
69
+ 'Categorical', 'Decimal', 'Enum', 'Unknown',
70
+ 'DataType', 'DataTypeClass', 'Field',
71
+ ]
flowfile/__main__.py ADDED
@@ -0,0 +1,24 @@
1
+ """
2
+ Main entry point for the FlowFile package.
3
+ """
4
+
5
+
6
+ def main():
7
+ """
8
+ Display information about FlowFile when run directly as a module.
9
+ """
10
+ import flowfile
11
+
12
+ print(f"FlowFile v{flowfile.__version__}")
13
+ print("A framework combining visual ETL with a Polars-like API")
14
+ print("\nUsage examples:")
15
+ print(" import flowfile as ff")
16
+ print(" df = ff.read_csv('data.csv')")
17
+ print(" result = df.filter(ff.col('value') > 10)")
18
+ print(" result.write_csv('output.csv')")
19
+ print("\nFor visual ETL:")
20
+ print(" ff.open_graph_in_editor(result.to_graph())")
21
+
22
+
23
+ if __name__ == "__main__":
24
+ main()
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Edward van Eechoud
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.