Flowfile 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- build_backends/__init__.py +0 -0
- build_backends/main.py +313 -0
- build_backends/main_prd.py +202 -0
- flowfile/__init__.py +71 -0
- flowfile/__main__.py +24 -0
- flowfile-0.2.2.dist-info/LICENSE +21 -0
- flowfile-0.2.2.dist-info/METADATA +225 -0
- flowfile-0.2.2.dist-info/RECORD +171 -0
- flowfile-0.2.2.dist-info/WHEEL +4 -0
- flowfile-0.2.2.dist-info/entry_points.txt +9 -0
- flowfile_core/__init__.py +13 -0
- flowfile_core/auth/__init__.py +0 -0
- flowfile_core/auth/jwt.py +140 -0
- flowfile_core/auth/models.py +40 -0
- flowfile_core/auth/secrets.py +178 -0
- flowfile_core/configs/__init__.py +35 -0
- flowfile_core/configs/flow_logger.py +433 -0
- flowfile_core/configs/node_store/__init__.py +0 -0
- flowfile_core/configs/node_store/nodes.py +98 -0
- flowfile_core/configs/settings.py +120 -0
- flowfile_core/database/__init__.py +0 -0
- flowfile_core/database/connection.py +51 -0
- flowfile_core/database/init_db.py +45 -0
- flowfile_core/database/models.py +41 -0
- flowfile_core/fileExplorer/__init__.py +0 -0
- flowfile_core/fileExplorer/funcs.py +259 -0
- flowfile_core/fileExplorer/utils.py +53 -0
- flowfile_core/flowfile/FlowfileFlow.py +1403 -0
- flowfile_core/flowfile/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
- flowfile_core/flowfile/analytics/__init__.py +0 -0
- flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
- flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
- flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
- flowfile_core/flowfile/analytics/utils.py +9 -0
- flowfile_core/flowfile/connection_manager/__init__.py +3 -0
- flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
- flowfile_core/flowfile/connection_manager/models.py +10 -0
- flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
- flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
- flowfile_core/flowfile/database_connection_manager/models.py +15 -0
- flowfile_core/flowfile/extensions.py +36 -0
- flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
- flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
- flowfile_core/flowfile/flow_data_engine/types.py +0 -0
- flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
- flowfile_core/flowfile/flow_node/__init__.py +0 -0
- flowfile_core/flowfile/flow_node/flow_node.py +771 -0
- flowfile_core/flowfile/flow_node/models.py +111 -0
- flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
- flowfile_core/flowfile/handler.py +123 -0
- flowfile_core/flowfile/manage/__init__.py +0 -0
- flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
- flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
- flowfile_core/flowfile/manage/open_flowfile.py +136 -0
- flowfile_core/flowfile/setting_generator/__init__.py +2 -0
- flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
- flowfile_core/flowfile/setting_generator/settings.py +176 -0
- flowfile_core/flowfile/sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
- flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
- flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
- flowfile_core/flowfile/util/__init__.py +0 -0
- flowfile_core/flowfile/util/calculate_layout.py +137 -0
- flowfile_core/flowfile/util/execution_orderer.py +141 -0
- flowfile_core/flowfile/utils.py +106 -0
- flowfile_core/main.py +138 -0
- flowfile_core/routes/__init__.py +0 -0
- flowfile_core/routes/auth.py +34 -0
- flowfile_core/routes/logs.py +163 -0
- flowfile_core/routes/public.py +10 -0
- flowfile_core/routes/routes.py +601 -0
- flowfile_core/routes/secrets.py +85 -0
- flowfile_core/run_lock.py +11 -0
- flowfile_core/schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
- flowfile_core/schemas/defaults.py +9 -0
- flowfile_core/schemas/external_sources/__init__.py +0 -0
- flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
- flowfile_core/schemas/input_schema.py +477 -0
- flowfile_core/schemas/models.py +193 -0
- flowfile_core/schemas/output_model.py +115 -0
- flowfile_core/schemas/schemas.py +106 -0
- flowfile_core/schemas/transform_schema.py +569 -0
- flowfile_core/secrets/__init__.py +0 -0
- flowfile_core/secrets/secrets.py +64 -0
- flowfile_core/utils/__init__.py +0 -0
- flowfile_core/utils/arrow_reader.py +247 -0
- flowfile_core/utils/excel_file_manager.py +18 -0
- flowfile_core/utils/fileManager.py +45 -0
- flowfile_core/utils/fl_executor.py +38 -0
- flowfile_core/utils/utils.py +8 -0
- flowfile_frame/__init__.py +56 -0
- flowfile_frame/__main__.py +12 -0
- flowfile_frame/adapters.py +17 -0
- flowfile_frame/expr.py +1163 -0
- flowfile_frame/flow_frame.py +2093 -0
- flowfile_frame/group_frame.py +199 -0
- flowfile_frame/join.py +75 -0
- flowfile_frame/selectors.py +242 -0
- flowfile_frame/utils.py +184 -0
- flowfile_worker/__init__.py +55 -0
- flowfile_worker/configs.py +95 -0
- flowfile_worker/create/__init__.py +37 -0
- flowfile_worker/create/funcs.py +146 -0
- flowfile_worker/create/models.py +86 -0
- flowfile_worker/create/pl_types.py +35 -0
- flowfile_worker/create/read_excel_tables.py +110 -0
- flowfile_worker/create/utils.py +84 -0
- flowfile_worker/external_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
- flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
- flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- flowfile_worker/external_sources/sql_source/__init__.py +0 -0
- flowfile_worker/external_sources/sql_source/main.py +56 -0
- flowfile_worker/external_sources/sql_source/models.py +72 -0
- flowfile_worker/flow_logger.py +58 -0
- flowfile_worker/funcs.py +327 -0
- flowfile_worker/main.py +108 -0
- flowfile_worker/models.py +95 -0
- flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
- flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
- flowfile_worker/polars_fuzzy_match/models.py +36 -0
- flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
- flowfile_worker/polars_fuzzy_match/process.py +86 -0
- flowfile_worker/polars_fuzzy_match/utils.py +50 -0
- flowfile_worker/process_manager.py +36 -0
- flowfile_worker/routes.py +440 -0
- flowfile_worker/secrets.py +148 -0
- flowfile_worker/spawner.py +187 -0
- flowfile_worker/utils.py +25 -0
- test_utils/__init__.py +3 -0
- test_utils/postgres/__init__.py +1 -0
- test_utils/postgres/commands.py +109 -0
- test_utils/postgres/fixtures.py +417 -0
|
File without changes
|
build_backends/main.py
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
import platform
|
|
4
|
+
import shutil
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def merge_directories(directories: List[str], target_dir: str, cleanup_after_merge: bool = True):
|
|
10
|
+
"""
|
|
11
|
+
Merge all files from two folders into a new target directory.
|
|
12
|
+
After successful merge, removes the original folders.
|
|
13
|
+
"""
|
|
14
|
+
# Create target directory
|
|
15
|
+
Path(target_dir).mkdir(parents=True, exist_ok=True)
|
|
16
|
+
for directory in directories:
|
|
17
|
+
if os.path.exists(directory):
|
|
18
|
+
shutil.copytree(directory, target_dir, dirs_exist_ok=True)
|
|
19
|
+
print('Merged directories:', directories, 'into', target_dir)
|
|
20
|
+
if cleanup_after_merge:
|
|
21
|
+
print('Cleaning up directories:', directories)
|
|
22
|
+
for directory in directories:
|
|
23
|
+
if os.path.exists(directory):
|
|
24
|
+
shutil.rmtree(directory)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def create_spec_file(directory, script_name, output_name, hidden_imports):
|
|
28
|
+
"""Create an optimized spec file for faster startup"""
|
|
29
|
+
spec_content = f'''
|
|
30
|
+
import sys
|
|
31
|
+
import os
|
|
32
|
+
from PyInstaller.utils.hooks import collect_submodules, collect_data_files
|
|
33
|
+
|
|
34
|
+
# Add hook to fix connectorx metadata
|
|
35
|
+
def get_connectorx_metadata():
|
|
36
|
+
print("Collecting connectorx metadata...")
|
|
37
|
+
try:
|
|
38
|
+
import site
|
|
39
|
+
import connectorx
|
|
40
|
+
|
|
41
|
+
# Find the site-packages directory
|
|
42
|
+
site_packages = site.getsitepackages()[0]
|
|
43
|
+
print(f"Site-packages directory: {{site_packages}}")
|
|
44
|
+
|
|
45
|
+
# Try both common metadata formats with glob to catch any version
|
|
46
|
+
import glob
|
|
47
|
+
metadata_locations = []
|
|
48
|
+
|
|
49
|
+
# Look for dist-info directories
|
|
50
|
+
dist_info_pattern = os.path.join(site_packages, 'connectorx*.dist-info')
|
|
51
|
+
for dist_info in glob.glob(dist_info_pattern):
|
|
52
|
+
metadata_locations.append(dist_info)
|
|
53
|
+
|
|
54
|
+
# Look for egg-info directories
|
|
55
|
+
egg_info_pattern = os.path.join(site_packages, 'connectorx*.egg-info')
|
|
56
|
+
for egg_info in glob.glob(egg_info_pattern):
|
|
57
|
+
metadata_locations.append(egg_info)
|
|
58
|
+
|
|
59
|
+
# Also try looking in the parent directory of the connectorx package
|
|
60
|
+
connectorx_dir = os.path.dirname(connectorx.__file__)
|
|
61
|
+
parent_dir = os.path.dirname(connectorx_dir)
|
|
62
|
+
|
|
63
|
+
dist_info_pattern = os.path.join(parent_dir, 'connectorx*.dist-info')
|
|
64
|
+
for dist_info in glob.glob(dist_info_pattern):
|
|
65
|
+
metadata_locations.append(dist_info)
|
|
66
|
+
|
|
67
|
+
egg_info_pattern = os.path.join(parent_dir, 'connectorx*.egg-info')
|
|
68
|
+
for egg_info in glob.glob(egg_info_pattern):
|
|
69
|
+
metadata_locations.append(egg_info)
|
|
70
|
+
|
|
71
|
+
found_metadata = []
|
|
72
|
+
for loc in metadata_locations:
|
|
73
|
+
if os.path.exists(loc):
|
|
74
|
+
dest_name = os.path.basename(loc)
|
|
75
|
+
found_metadata.append((loc, dest_name))
|
|
76
|
+
print(f"Found metadata at {{loc}}")
|
|
77
|
+
|
|
78
|
+
if found_metadata:
|
|
79
|
+
return found_metadata
|
|
80
|
+
|
|
81
|
+
# If we can't find the metadata, create a fake one
|
|
82
|
+
print("No connectorx metadata found, creating manual metadata...")
|
|
83
|
+
import tempfile
|
|
84
|
+
temp_dir = tempfile.mkdtemp()
|
|
85
|
+
fake_meta_dir = os.path.join(temp_dir, 'connectorx-0.4.3.dist-info')
|
|
86
|
+
os.makedirs(fake_meta_dir, exist_ok=True)
|
|
87
|
+
|
|
88
|
+
# Create minimal METADATA file
|
|
89
|
+
with open(os.path.join(fake_meta_dir, 'METADATA'), 'w') as f:
|
|
90
|
+
f.write("""Metadata-Version: 2.1
|
|
91
|
+
Name: connectorx
|
|
92
|
+
Version: 0.4.3
|
|
93
|
+
Summary: ConnectorX: Fast and Reliable Data Loading
|
|
94
|
+
""")
|
|
95
|
+
|
|
96
|
+
# Return the fake metadata directory
|
|
97
|
+
print(f"Created fake metadata at {{fake_meta_dir}}")
|
|
98
|
+
return [(fake_meta_dir, 'connectorx-0.4.3.dist-info')]
|
|
99
|
+
except Exception as e:
|
|
100
|
+
print(f"Error collecting connectorx metadata: {{e}}")
|
|
101
|
+
return []
|
|
102
|
+
|
|
103
|
+
# Add runtime hook to handle connectorx metadata issues
|
|
104
|
+
def create_runtime_hook():
|
|
105
|
+
return """
|
|
106
|
+
# Runtime hook to handle connectorx metadata issues
|
|
107
|
+
import sys
|
|
108
|
+
import importlib.metadata
|
|
109
|
+
|
|
110
|
+
# Store original version function
|
|
111
|
+
original_version = importlib.metadata.version
|
|
112
|
+
|
|
113
|
+
# Create patched version function
|
|
114
|
+
def patched_version(distribution_name):
|
|
115
|
+
try:
|
|
116
|
+
return original_version(distribution_name)
|
|
117
|
+
except (importlib.metadata.PackageNotFoundError, StopIteration):
|
|
118
|
+
# Handle specific packages
|
|
119
|
+
if distribution_name == 'connectorx':
|
|
120
|
+
return '0.4.3' # Hardcode the version
|
|
121
|
+
# Let other package errors propagate normally
|
|
122
|
+
raise
|
|
123
|
+
|
|
124
|
+
# Apply the patch
|
|
125
|
+
importlib.metadata.version = patched_version
|
|
126
|
+
print("Applied connectorx metadata patch")
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
# Collect minimal snowflake dependencies
|
|
130
|
+
snowflake_imports = collect_submodules('snowflake.connector',
|
|
131
|
+
filter=lambda name: any(x in name for x in [
|
|
132
|
+
'connection',
|
|
133
|
+
'errors',
|
|
134
|
+
'snow_logging',
|
|
135
|
+
'auth',
|
|
136
|
+
'network'
|
|
137
|
+
])
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Collect numpy and pyarrow data files
|
|
141
|
+
numpy_datas = collect_data_files('numpy')
|
|
142
|
+
pyarrow_datas = collect_data_files('pyarrow')
|
|
143
|
+
connectorx_datas = get_connectorx_metadata()
|
|
144
|
+
|
|
145
|
+
# Create runtime hook file
|
|
146
|
+
with open('connectorx_hook.py', 'w') as f:
|
|
147
|
+
f.write(create_runtime_hook())
|
|
148
|
+
|
|
149
|
+
a = Analysis(
|
|
150
|
+
[r'{os.path.join(directory, script_name)}'],
|
|
151
|
+
binaries=[],
|
|
152
|
+
datas=numpy_datas + pyarrow_datas + connectorx_datas,
|
|
153
|
+
hiddenimports={hidden_imports} + snowflake_imports + [
|
|
154
|
+
'numpy',
|
|
155
|
+
'numpy.core._dtype_ctypes',
|
|
156
|
+
'numpy.core._methods',
|
|
157
|
+
'numpy._pyarray_api',
|
|
158
|
+
'pyarrow',
|
|
159
|
+
'pyarrow.lib',
|
|
160
|
+
'fastexcel',
|
|
161
|
+
'importlib.metadata',
|
|
162
|
+
],
|
|
163
|
+
excludes=[
|
|
164
|
+
'tkinter',
|
|
165
|
+
'PIL',
|
|
166
|
+
'pytest',
|
|
167
|
+
'unittest'
|
|
168
|
+
],
|
|
169
|
+
runtime_hooks=['connectorx_hook.py'],
|
|
170
|
+
noarchive=False,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
pyz = PYZ(a.pure, compress_level=9)
|
|
174
|
+
|
|
175
|
+
exe = EXE(
|
|
176
|
+
pyz,
|
|
177
|
+
a.scripts,
|
|
178
|
+
[],
|
|
179
|
+
exclude_binaries=True,
|
|
180
|
+
name='{output_name}',
|
|
181
|
+
debug=False,
|
|
182
|
+
bootloader_ignore_signals=False,
|
|
183
|
+
strip=False,
|
|
184
|
+
upx=False,
|
|
185
|
+
console=True,
|
|
186
|
+
optimize=1
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
coll = COLLECT(
|
|
190
|
+
exe,
|
|
191
|
+
a.binaries,
|
|
192
|
+
a.zipfiles,
|
|
193
|
+
a.datas,
|
|
194
|
+
strip=False,
|
|
195
|
+
upx=False,
|
|
196
|
+
upx_exclude=[],
|
|
197
|
+
name='{output_name}'
|
|
198
|
+
)
|
|
199
|
+
'''
|
|
200
|
+
spec_path = f'{output_name}.spec'
|
|
201
|
+
with open(spec_path, 'w') as f:
|
|
202
|
+
f.write(spec_content)
|
|
203
|
+
return spec_path
|
|
204
|
+
|
|
205
|
+
def build_backend(directory, script_name, output_name, hidden_imports=None):
|
|
206
|
+
try:
|
|
207
|
+
spec_path = create_spec_file(directory, script_name, output_name, hidden_imports)
|
|
208
|
+
|
|
209
|
+
env = os.environ.copy()
|
|
210
|
+
env['PYTHONOPTIMIZE'] = "1"
|
|
211
|
+
|
|
212
|
+
command = [
|
|
213
|
+
"pyinstaller",
|
|
214
|
+
"--clean",
|
|
215
|
+
"-y",
|
|
216
|
+
"--dist", "./services_dist",
|
|
217
|
+
"--workpath", "/tmp" if platform.system() != "Windows" else os.path.join(os.getenv('TEMP'), 'pyinstaller'),
|
|
218
|
+
spec_path
|
|
219
|
+
]
|
|
220
|
+
|
|
221
|
+
print(f"Building {output_name}...")
|
|
222
|
+
subprocess.run(command, check=True, env=env)
|
|
223
|
+
os.remove(spec_path)
|
|
224
|
+
|
|
225
|
+
return True
|
|
226
|
+
|
|
227
|
+
except subprocess.CalledProcessError as e:
|
|
228
|
+
print(f"Error while building {script_name}: {e}")
|
|
229
|
+
return False
|
|
230
|
+
except Exception as e:
|
|
231
|
+
print(f"Unexpected error: {e}")
|
|
232
|
+
return False
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def combine_packages():
|
|
236
|
+
"""Reorganize the services_dist directory to have shared dependencies"""
|
|
237
|
+
dist_dir = "services_dist"
|
|
238
|
+
shared_internal = os.path.join(dist_dir, "_internal")
|
|
239
|
+
core_internal = os.path.join(dist_dir, "flowfile_core", "_internal")
|
|
240
|
+
worker_internal = os.path.join(dist_dir, "flowfile_worker", "_internal")
|
|
241
|
+
merge_directories([core_internal, worker_internal], shared_internal, False)
|
|
242
|
+
|
|
243
|
+
for project in ["flowfile_worker", "flowfile_core"]:
|
|
244
|
+
src_dir = os.path.join(dist_dir, project)
|
|
245
|
+
if os.path.exists(src_dir) and os.path.isdir(src_dir):
|
|
246
|
+
# Move executable
|
|
247
|
+
exe_name = project + ".exe" if platform.system() == "Windows" else project
|
|
248
|
+
src_exe = os.path.join(src_dir, exe_name)
|
|
249
|
+
temp_target_exe = os.path.join(dist_dir, "_" + exe_name)
|
|
250
|
+
target_exe = os.path.join(dist_dir, exe_name)
|
|
251
|
+
if os.path.exists(src_exe) and os.path.isfile(src_exe):
|
|
252
|
+
# Instead of removing, overwrite the target
|
|
253
|
+
shutil.move(src_exe, temp_target_exe)
|
|
254
|
+
if os.path.exists(target_exe) and os.path.isdir(target_exe):
|
|
255
|
+
shutil.rmtree(target_exe)
|
|
256
|
+
shutil.move(temp_target_exe, target_exe)
|
|
257
|
+
if platform.system() == "Windows" and os.path.exists(os.path.join(dist_dir, project)):
|
|
258
|
+
shutil.rmtree(os.path.join(dist_dir, project))
|
|
259
|
+
|
|
260
|
+
def main():
|
|
261
|
+
# Clean previous builds
|
|
262
|
+
for dir_name in ['services_dist']:
|
|
263
|
+
if os.path.exists(dir_name):
|
|
264
|
+
shutil.rmtree(dir_name)
|
|
265
|
+
|
|
266
|
+
# Common imports for both projects
|
|
267
|
+
common_imports = [
|
|
268
|
+
"fastexcel",
|
|
269
|
+
"polars",
|
|
270
|
+
"numpy",
|
|
271
|
+
"numpy.core._methods",
|
|
272
|
+
"pyarrow",
|
|
273
|
+
"snowflake.connector",
|
|
274
|
+
"snowflake.connector.snow_logging",
|
|
275
|
+
"snowflake.connector.errors",
|
|
276
|
+
"multiprocessing",
|
|
277
|
+
"uvicorn.protocols.http",
|
|
278
|
+
"uvicorn.protocols.websockets",
|
|
279
|
+
"passlib.handlers.bcrypt",
|
|
280
|
+
"connectorx",
|
|
281
|
+
]
|
|
282
|
+
|
|
283
|
+
# Build both projects
|
|
284
|
+
builds_successful = True
|
|
285
|
+
|
|
286
|
+
# Build flowfile_worker
|
|
287
|
+
|
|
288
|
+
if not build_backend(
|
|
289
|
+
directory=os.path.join("flowfile_worker", "flowfile_worker"),
|
|
290
|
+
script_name="main.py",
|
|
291
|
+
output_name="flowfile_worker",
|
|
292
|
+
hidden_imports=common_imports
|
|
293
|
+
):
|
|
294
|
+
builds_successful = False
|
|
295
|
+
|
|
296
|
+
# Build flowfile_core
|
|
297
|
+
|
|
298
|
+
if not build_backend(
|
|
299
|
+
directory=os.path.join("flowfile_core", "flowfile_core"),
|
|
300
|
+
script_name="main.py",
|
|
301
|
+
output_name="flowfile_core",
|
|
302
|
+
hidden_imports=common_imports
|
|
303
|
+
):
|
|
304
|
+
builds_successful = False
|
|
305
|
+
|
|
306
|
+
if builds_successful:
|
|
307
|
+
print("Reorganizing services_dist directory...")
|
|
308
|
+
combine_packages()
|
|
309
|
+
print("Build complete! Final structure created in services_dist/")
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
# if __name__ == "__main__":
|
|
313
|
+
# main()
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
import platform
|
|
4
|
+
from concurrent.futures import ProcessPoolExecutor, wait
|
|
5
|
+
|
|
6
|
+
import subprocess
|
|
7
|
+
import time
|
|
8
|
+
import requests
|
|
9
|
+
from statistics import mean, stdev
|
|
10
|
+
import sys
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def wait_for_endpoint(url, timeout=60):
|
|
15
|
+
"""Wait for the endpoint to become available."""
|
|
16
|
+
start_time = time.time()
|
|
17
|
+
while time.time() - start_time < timeout:
|
|
18
|
+
try:
|
|
19
|
+
response = requests.get(url)
|
|
20
|
+
if response.status_code == 200:
|
|
21
|
+
return True
|
|
22
|
+
except requests.exceptions.ConnectionError:
|
|
23
|
+
time.sleep(0.1)
|
|
24
|
+
return False
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def shutdown_service():
|
|
28
|
+
"""Shutdown the service gracefully using the shutdown endpoint."""
|
|
29
|
+
try:
|
|
30
|
+
response = requests.post(
|
|
31
|
+
'http://0.0.0.0:63578/shutdown',
|
|
32
|
+
headers={'accept': 'application/json'},
|
|
33
|
+
data=''
|
|
34
|
+
)
|
|
35
|
+
print("Shutdown request sent, waiting for service to stop...")
|
|
36
|
+
time.sleep(1) # Wait 10 seconds to ensure the service is fully stopped
|
|
37
|
+
return True
|
|
38
|
+
except requests.exceptions.RequestException as e:
|
|
39
|
+
print(f"Error shutting down service: {e}")
|
|
40
|
+
return False
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def measure_startup_time(executable_path):
|
|
44
|
+
"""Measure the startup time of the executable."""
|
|
45
|
+
start_time = time.time()
|
|
46
|
+
|
|
47
|
+
# Start the process
|
|
48
|
+
process = subprocess.Popen([executable_path])
|
|
49
|
+
|
|
50
|
+
# Wait for the endpoint to become available
|
|
51
|
+
endpoint_url = "http://0.0.0.0:63578/docs"
|
|
52
|
+
if not wait_for_endpoint(endpoint_url):
|
|
53
|
+
print(f"Error: Endpoint did not become available for {executable_path}")
|
|
54
|
+
process.kill()
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
elapsed_time = time.time() - start_time
|
|
58
|
+
|
|
59
|
+
# Gracefully shutdown the service
|
|
60
|
+
if not shutdown_service():
|
|
61
|
+
print("Failed to shutdown service gracefully, killing process...")
|
|
62
|
+
process.kill()
|
|
63
|
+
time.sleep(1)
|
|
64
|
+
|
|
65
|
+
return elapsed_time
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def run_comparison_test(old_exe, new_exe, num_runs=3):
|
|
69
|
+
"""Run multiple comparison tests and print statistics."""
|
|
70
|
+
print(f"\nStarting comparison test at {datetime.now()}")
|
|
71
|
+
print(f"Number of runs: {num_runs}")
|
|
72
|
+
print("\nExecutables being tested:")
|
|
73
|
+
print(f"Old: {old_exe}")
|
|
74
|
+
print(f"New: {new_exe}")
|
|
75
|
+
|
|
76
|
+
old_times = []
|
|
77
|
+
new_times = []
|
|
78
|
+
|
|
79
|
+
for i in range(num_runs):
|
|
80
|
+
print(f"\nRun {i + 1}/{num_runs}")
|
|
81
|
+
|
|
82
|
+
# Test old executable
|
|
83
|
+
print("Testing old executable...")
|
|
84
|
+
old_time = measure_startup_time(old_exe)
|
|
85
|
+
if old_time is not None:
|
|
86
|
+
old_times.append(old_time)
|
|
87
|
+
print(f"Old startup time: {old_time:.3f} seconds")
|
|
88
|
+
|
|
89
|
+
# Test new executable
|
|
90
|
+
print("Testing new executable...")
|
|
91
|
+
new_time = measure_startup_time(new_exe)
|
|
92
|
+
if new_time is not None:
|
|
93
|
+
new_times.append(new_time)
|
|
94
|
+
print(f"New startup time: {new_time:.3f} seconds")
|
|
95
|
+
|
|
96
|
+
# Print results
|
|
97
|
+
print("\nResults:")
|
|
98
|
+
print("-" * 50)
|
|
99
|
+
if old_times:
|
|
100
|
+
print(f"Old executable:")
|
|
101
|
+
print(f" Average: {mean(old_times):.3f} seconds")
|
|
102
|
+
print(f" Std Dev: {stdev(old_times):.3f} seconds" if len(old_times) > 1 else " Std Dev: N/A")
|
|
103
|
+
print(f" Min: {min(old_times):.3f} seconds")
|
|
104
|
+
print(f" Max: {max(old_times):.3f} seconds")
|
|
105
|
+
|
|
106
|
+
if new_times:
|
|
107
|
+
print(f"\nNew executable:")
|
|
108
|
+
print(f" Average: {mean(new_times):.3f} seconds")
|
|
109
|
+
print(f" Std Dev: {stdev(new_times):.3f} seconds" if len(new_times) > 1 else " Std Dev: N/A")
|
|
110
|
+
print(f" Min: {min(new_times):.3f} seconds")
|
|
111
|
+
print(f" Max: {max(new_times):.3f} seconds")
|
|
112
|
+
|
|
113
|
+
if old_times and new_times:
|
|
114
|
+
improvement = (mean(old_times) - mean(new_times)) / mean(old_times) * 100
|
|
115
|
+
print(f"\nPerformance difference:")
|
|
116
|
+
print(f" {improvement:.1f}% {'faster' if improvement > 0 else 'slower'} than old version")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
if __name__ == "__main__":
|
|
120
|
+
old_exe = '/Users/edwardvanechoud/personal_dev/Flowfile/dist/flowfile_core/flowfile_core'
|
|
121
|
+
new_exe = '/Users/edwardvanechoud/personal_dev/Flowfile/dist_flowfile_core/flowfile_core'
|
|
122
|
+
|
|
123
|
+
run_comparison_test(old_exe, old_exe)
|
|
124
|
+
|
|
125
|
+
# def build_backend(directory, script_name, output_name, hidden_imports=None):
|
|
126
|
+
# try:
|
|
127
|
+
# script_path = os.path.join(directory, script_name)
|
|
128
|
+
# command = [
|
|
129
|
+
# "python", "-m", "nuitka",
|
|
130
|
+
# "--onefile",
|
|
131
|
+
# "--standalone",
|
|
132
|
+
# "--assume-yes-for-downloads",
|
|
133
|
+
# "--include-package=tempfile",
|
|
134
|
+
# "--include-package=polars",
|
|
135
|
+
# "--include-package=fastexcel",
|
|
136
|
+
# "--include-package=snowflake.connector"
|
|
137
|
+
# ]
|
|
138
|
+
#
|
|
139
|
+
# if hidden_imports:
|
|
140
|
+
# for imp in hidden_imports:
|
|
141
|
+
# if '.' not in imp:
|
|
142
|
+
# command.extend(["--include-package=" + imp])
|
|
143
|
+
# else:
|
|
144
|
+
# command.extend(["--include-module=" + imp])
|
|
145
|
+
#
|
|
146
|
+
# dist_folder = f"dist_{output_name}"
|
|
147
|
+
# os.makedirs(dist_folder, exist_ok=True)
|
|
148
|
+
# ext = ".exe" if platform.system() == "Windows" else ""
|
|
149
|
+
# command.extend([
|
|
150
|
+
# f"--output-dir={dist_folder}",
|
|
151
|
+
# f"--output-filename={output_name}{ext}",
|
|
152
|
+
# script_path
|
|
153
|
+
# ])
|
|
154
|
+
#
|
|
155
|
+
# print(f"Starting build for {output_name}")
|
|
156
|
+
# result = subprocess.run(command, check=True)
|
|
157
|
+
# print(f"Build completed for {output_name} with exit code {result.returncode}")
|
|
158
|
+
# return result.returncode
|
|
159
|
+
#
|
|
160
|
+
# except subprocess.CalledProcessError as e:
|
|
161
|
+
# print(f"Error while building {script_name}: {e}")
|
|
162
|
+
# return 1
|
|
163
|
+
#
|
|
164
|
+
#
|
|
165
|
+
# def main():
|
|
166
|
+
# common_imports = [
|
|
167
|
+
# "fastexcel",
|
|
168
|
+
# "polars",
|
|
169
|
+
# "snowflake.connector",
|
|
170
|
+
# "snowflake.connector.snow_logging",
|
|
171
|
+
# "snowflake.connector.errors"
|
|
172
|
+
# ]
|
|
173
|
+
#
|
|
174
|
+
# builds = [
|
|
175
|
+
# {
|
|
176
|
+
# "directory": os.path.join("flowfile_worker", "flowfile_worker"),
|
|
177
|
+
# "script_name": "main.py",
|
|
178
|
+
# "output_name": "flowfile_worker",
|
|
179
|
+
# "hidden_imports": ["multiprocessing", "multiprocessing.resource_tracker",
|
|
180
|
+
# "multiprocessing.sharedctypes", "uvicorn",
|
|
181
|
+
# "uvicorn.logging", "uvicorn.protocols.http",
|
|
182
|
+
# "uvicorn.protocols.websockets"] + common_imports
|
|
183
|
+
# },
|
|
184
|
+
# {
|
|
185
|
+
# "directory": os.path.join("flowfile_core", "flowfile_core"),
|
|
186
|
+
# "script_name": "main.py",
|
|
187
|
+
# "output_name": "flowfile_core",
|
|
188
|
+
# "hidden_imports": ["passlib.handlers.bcrypt"] + common_imports
|
|
189
|
+
# }
|
|
190
|
+
# ]
|
|
191
|
+
#
|
|
192
|
+
# with ProcessPoolExecutor(max_workers=2) as executor:
|
|
193
|
+
# futures = [executor.submit(build_backend, **build) for build in builds]
|
|
194
|
+
# wait(futures)
|
|
195
|
+
#
|
|
196
|
+
# for future in futures:
|
|
197
|
+
# if future.result() != 0:
|
|
198
|
+
# raise Exception("One or more builds failed")
|
|
199
|
+
#
|
|
200
|
+
#
|
|
201
|
+
# if __name__ == "__main__":
|
|
202
|
+
# main()
|
flowfile/__init__.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FlowFile: A framework combining visual ETL with a Polars-like API.
|
|
3
|
+
|
|
4
|
+
This package ties together the FlowFile ecosystem components:
|
|
5
|
+
- flowfile_core: Core ETL functionality
|
|
6
|
+
- flowfile_frame: Polars-like DataFrame API
|
|
7
|
+
- flowfile_worker: Computation engine
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__version__ = "0.2.1"
|
|
11
|
+
|
|
12
|
+
# Import the key components from flowfile_frame
|
|
13
|
+
from flowfile_frame.flow_frame import (
|
|
14
|
+
FlowFrame, read_csv, read_parquet, from_dict, concat
|
|
15
|
+
)
|
|
16
|
+
from flowfile_frame.expr import (
|
|
17
|
+
col, lit, column, cum_count, len,
|
|
18
|
+
sum, min, max, mean, count, when
|
|
19
|
+
)
|
|
20
|
+
from flowfile_frame.group_frame import GroupByFrame
|
|
21
|
+
from flowfile_frame.utils import create_etl_graph, open_graph_in_editor
|
|
22
|
+
from flowfile_frame.selectors import (
|
|
23
|
+
numeric, float_, integer, string, temporal,
|
|
24
|
+
datetime, date, time, duration, boolean,
|
|
25
|
+
categorical, object_, list_, struct, all_,
|
|
26
|
+
by_dtype, contains, starts_with, ends_with, matches
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Import Polars data types for convenience
|
|
30
|
+
from polars.datatypes import (
|
|
31
|
+
Int8, Int16, Int32, Int64, Int128,
|
|
32
|
+
UInt8, UInt16, UInt32, UInt64,
|
|
33
|
+
Float32, Float64,
|
|
34
|
+
Boolean, String, Utf8, Binary, Null,
|
|
35
|
+
List, Array, Struct, Object,
|
|
36
|
+
Date, Time, Datetime, Duration,
|
|
37
|
+
Categorical, Decimal, Enum, Unknown,
|
|
38
|
+
DataType, DataTypeClass, Field
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Define what's publicly available from the package
|
|
42
|
+
__all__ = [
|
|
43
|
+
# Core FlowFrame classes
|
|
44
|
+
'FlowFrame', 'GroupByFrame',
|
|
45
|
+
|
|
46
|
+
# Main creation functions
|
|
47
|
+
'read_csv', 'read_parquet', 'from_dict', 'concat',
|
|
48
|
+
|
|
49
|
+
# Expression API
|
|
50
|
+
'col', 'lit', 'column', 'cum_count', 'len',
|
|
51
|
+
'sum', 'min', 'max', 'mean', 'count', 'when',
|
|
52
|
+
|
|
53
|
+
# Selector utilities
|
|
54
|
+
'numeric', 'float_', 'integer', 'string', 'temporal',
|
|
55
|
+
'datetime', 'date', 'time', 'duration', 'boolean',
|
|
56
|
+
'categorical', 'object_', 'list_', 'struct', 'all_',
|
|
57
|
+
'by_dtype', 'contains', 'starts_with', 'ends_with', 'matches',
|
|
58
|
+
|
|
59
|
+
# Utilities
|
|
60
|
+
'create_etl_graph', 'open_graph_in_editor',
|
|
61
|
+
|
|
62
|
+
# Data types from Polars
|
|
63
|
+
'Int8', 'Int16', 'Int32', 'Int64', 'Int128',
|
|
64
|
+
'UInt8', 'UInt16', 'UInt32', 'UInt64',
|
|
65
|
+
'Float32', 'Float64',
|
|
66
|
+
'Boolean', 'String', 'Utf8', 'Binary', 'Null',
|
|
67
|
+
'List', 'Array', 'Struct', 'Object',
|
|
68
|
+
'Date', 'Time', 'Datetime', 'Duration',
|
|
69
|
+
'Categorical', 'Decimal', 'Enum', 'Unknown',
|
|
70
|
+
'DataType', 'DataTypeClass', 'Field',
|
|
71
|
+
]
|
flowfile/__main__.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main entry point for the FlowFile package.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def main():
|
|
7
|
+
"""
|
|
8
|
+
Display information about FlowFile when run directly as a module.
|
|
9
|
+
"""
|
|
10
|
+
import flowfile
|
|
11
|
+
|
|
12
|
+
print(f"FlowFile v{flowfile.__version__}")
|
|
13
|
+
print("A framework combining visual ETL with a Polars-like API")
|
|
14
|
+
print("\nUsage examples:")
|
|
15
|
+
print(" import flowfile as ff")
|
|
16
|
+
print(" df = ff.read_csv('data.csv')")
|
|
17
|
+
print(" result = df.filter(ff.col('value') > 10)")
|
|
18
|
+
print(" result.write_csv('output.csv')")
|
|
19
|
+
print("\nFor visual ETL:")
|
|
20
|
+
print(" ff.open_graph_in_editor(result.to_graph())")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
if __name__ == "__main__":
|
|
24
|
+
main()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Edward van Eechoud
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|