Flowfile 0.3.0__py3-none-any.whl → 0.3.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +13 -6
- flowfile/__main__.py +50 -15
- flowfile/api.py +383 -0
- flowfile/readme.md +130 -0
- flowfile/web/__init__.py +155 -0
- flowfile/web/static/assets/AirbyteReader-1ac35765.css +314 -0
- flowfile/web/static/assets/AirbyteReader-cb0c1d4a.js +921 -0
- flowfile/web/static/assets/CrossJoin-41efa4cb.css +100 -0
- flowfile/web/static/assets/CrossJoin-a514fa59.js +153 -0
- flowfile/web/static/assets/DatabaseConnectionSettings-0c04b2e5.css +77 -0
- flowfile/web/static/assets/DatabaseConnectionSettings-f2cecf33.js +151 -0
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +64 -0
- flowfile/web/static/assets/DatabaseManager-83ee3c98.js +484 -0
- flowfile/web/static/assets/DatabaseReader-dc0c6881.js +426 -0
- flowfile/web/static/assets/DatabaseReader-f50c6558.css +158 -0
- flowfile/web/static/assets/DatabaseWriter-2f570e53.css +96 -0
- flowfile/web/static/assets/DatabaseWriter-5afe9f8d.js +312 -0
- flowfile/web/static/assets/ExploreData-5bdae813.css +45 -0
- flowfile/web/static/assets/ExploreData-c7ee19cf.js +118306 -0
- flowfile/web/static/assets/ExternalSource-17b23a01.js +225 -0
- flowfile/web/static/assets/ExternalSource-e37b6275.css +94 -0
- flowfile/web/static/assets/Filter-90856b4f.js +238 -0
- flowfile/web/static/assets/Filter-a9d08ba1.css +20 -0
- flowfile/web/static/assets/Formula-38b71e9e.js +197 -0
- flowfile/web/static/assets/Formula-d60a74f4.css +17 -0
- flowfile/web/static/assets/FuzzyMatch-6857de82.css +254 -0
- flowfile/web/static/assets/FuzzyMatch-d0f1fe81.js +422 -0
- flowfile/web/static/assets/GoogleSheet-854294a4.js +2616 -0
- flowfile/web/static/assets/GoogleSheet-92084da7.css +233 -0
- flowfile/web/static/assets/GraphSolver-0c86bbc6.js +382 -0
- flowfile/web/static/assets/GraphSolver-17fd26db.css +68 -0
- flowfile/web/static/assets/GroupBy-ab1ea74b.css +51 -0
- flowfile/web/static/assets/GroupBy-f2772e9f.js +413 -0
- flowfile/web/static/assets/Join-41c0f331.css +109 -0
- flowfile/web/static/assets/Join-bc3e1cf7.js +247 -0
- flowfile/web/static/assets/ManualInput-03aa0245.js +391 -0
- flowfile/web/static/assets/ManualInput-ac7b9972.css +84 -0
- flowfile/web/static/assets/Output-48f81019.css +2642 -0
- flowfile/web/static/assets/Output-5b35eee8.js +536 -0
- flowfile/web/static/assets/Pivot-7164087c.js +408 -0
- flowfile/web/static/assets/Pivot-f415e85f.css +35 -0
- flowfile/web/static/assets/PolarsCode-3abf6507.js +2863 -0
- flowfile/web/static/assets/PolarsCode-650322d1.css +35 -0
- flowfile/web/static/assets/PopOver-b37ff9be.js +577 -0
- flowfile/web/static/assets/PopOver-bccfde04.css +32 -0
- flowfile/web/static/assets/Read-65966a3e.js +701 -0
- flowfile/web/static/assets/Read-80dc1675.css +197 -0
- flowfile/web/static/assets/RecordCount-c66c6d6d.js +121 -0
- flowfile/web/static/assets/RecordId-826dc095.js +339 -0
- flowfile/web/static/assets/Sample-4ed555c8.js +184 -0
- flowfile/web/static/assets/SecretManager-eac1e97d.js +382 -0
- flowfile/web/static/assets/Select-085f05cc.js +231 -0
- flowfile/web/static/assets/SettingsSection-1f5e79c1.js +87 -0
- flowfile/web/static/assets/SettingsSection-9c836ecc.css +47 -0
- flowfile/web/static/assets/Sort-3e6cb414.js +309 -0
- flowfile/web/static/assets/Sort-7ccfa0fe.css +51 -0
- flowfile/web/static/assets/TextToRows-606349bc.js +307 -0
- flowfile/web/static/assets/TextToRows-c92d1ec2.css +48 -0
- flowfile/web/static/assets/UnavailableFields-5edd5322.css +49 -0
- flowfile/web/static/assets/UnavailableFields-b41976ed.js +36 -0
- flowfile/web/static/assets/Union-8d9ac7f9.css +30 -0
- flowfile/web/static/assets/Union-fca91665.js +145 -0
- flowfile/web/static/assets/Unique-a59f830e.js +273 -0
- flowfile/web/static/assets/Unique-b5615727.css +51 -0
- flowfile/web/static/assets/Unpivot-246e9bbd.css +77 -0
- flowfile/web/static/assets/Unpivot-c3815565.js +441 -0
- flowfile/web/static/assets/airbyte-292aa232.png +0 -0
- flowfile/web/static/assets/api-22b338bd.js +60 -0
- flowfile/web/static/assets/cross_join-d30c0290.png +0 -0
- flowfile/web/static/assets/database_reader-ce1e55f3.svg +24 -0
- flowfile/web/static/assets/database_writer-b4ad0753.svg +23 -0
- flowfile/web/static/assets/designer-2394122a.css +10697 -0
- flowfile/web/static/assets/designer-e5bbe26f.js +69712 -0
- flowfile/web/static/assets/documentation-08045cf2.js +33 -0
- flowfile/web/static/assets/documentation-12216a74.css +50 -0
- flowfile/web/static/assets/dropDown-35135ba8.css +143 -0
- flowfile/web/static/assets/dropDown-5e7e9a5a.js +319 -0
- flowfile/web/static/assets/dropDownGeneric-50a91b99.js +72 -0
- flowfile/web/static/assets/dropDownGeneric-895680d6.css +10 -0
- flowfile/web/static/assets/element-icons-9c88a535.woff +0 -0
- flowfile/web/static/assets/element-icons-de5eb258.ttf +0 -0
- flowfile/web/static/assets/explore_data-8a0a2861.png +0 -0
- flowfile/web/static/assets/fa-brands-400-808443ae.ttf +0 -0
- flowfile/web/static/assets/fa-brands-400-d7236a19.woff2 +0 -0
- flowfile/web/static/assets/fa-regular-400-54cf6086.ttf +0 -0
- flowfile/web/static/assets/fa-regular-400-e3456d12.woff2 +0 -0
- flowfile/web/static/assets/fa-solid-900-aa759986.woff2 +0 -0
- flowfile/web/static/assets/fa-solid-900-d2f05935.ttf +0 -0
- flowfile/web/static/assets/fa-v4compatibility-0ce9033c.woff2 +0 -0
- flowfile/web/static/assets/fa-v4compatibility-30f6abf6.ttf +0 -0
- flowfile/web/static/assets/filter-d7708bda.png +0 -0
- flowfile/web/static/assets/formula-eeeb1611.png +0 -0
- flowfile/web/static/assets/fullEditor-178376bb.css +256 -0
- flowfile/web/static/assets/fullEditor-705c6ccb.js +630 -0
- flowfile/web/static/assets/fuzzy_match-40c161b2.png +0 -0
- flowfile/web/static/assets/genericNodeSettings-65587f20.js +137 -0
- flowfile/web/static/assets/genericNodeSettings-924759c7.css +46 -0
- flowfile/web/static/assets/graph_solver-8b7888b8.png +0 -0
- flowfile/web/static/assets/group_by-80561fc3.png +0 -0
- flowfile/web/static/assets/index-552863fd.js +58652 -0
- flowfile/web/static/assets/index-681a3ed0.css +8843 -0
- flowfile/web/static/assets/input_data-ab2eb678.png +0 -0
- flowfile/web/static/assets/join-349043ae.png +0 -0
- flowfile/web/static/assets/manual_input-ae98f31d.png +0 -0
- flowfile/web/static/assets/nodeTitle-cf9bae3c.js +227 -0
- flowfile/web/static/assets/nodeTitle-f4b12bcb.css +134 -0
- flowfile/web/static/assets/old_join-5d0eb604.png +0 -0
- flowfile/web/static/assets/output-06ec0371.png +0 -0
- flowfile/web/static/assets/pivot-9660df51.png +0 -0
- flowfile/web/static/assets/polars_code-05ce5dc6.png +0 -0
- flowfile/web/static/assets/record_count-dab44eb5.png +0 -0
- flowfile/web/static/assets/record_id-0b15856b.png +0 -0
- flowfile/web/static/assets/sample-693a88b5.png +0 -0
- flowfile/web/static/assets/secretApi-3ad510e1.js +46 -0
- flowfile/web/static/assets/select-b0d0437a.png +0 -0
- flowfile/web/static/assets/selectDynamic-b062bc9b.css +107 -0
- flowfile/web/static/assets/selectDynamic-bd644891.js +302 -0
- flowfile/web/static/assets/sort-2aa579f0.png +0 -0
- flowfile/web/static/assets/summarize-2a099231.png +0 -0
- flowfile/web/static/assets/text_to_rows-859b29ea.png +0 -0
- flowfile/web/static/assets/union-2d8609f4.png +0 -0
- flowfile/web/static/assets/unique-1958b98a.png +0 -0
- flowfile/web/static/assets/unpivot-d3cb4b5b.png +0 -0
- flowfile/web/static/assets/view-7a0f0be1.png +0 -0
- flowfile/web/static/assets/vue-codemirror.esm-dd17b478.js +22281 -0
- flowfile/web/static/assets/vue-content-loader.es-6b36f05e.js +210 -0
- flowfile/web/static/flowfile.svg +47 -0
- flowfile/web/static/icons/flowfile.png +0 -0
- flowfile/web/static/images/airbyte.png +0 -0
- flowfile/web/static/images/flowfile.svg +47 -0
- flowfile/web/static/images/google.svg +1 -0
- flowfile/web/static/images/sheets.png +0 -0
- flowfile/web/static/index.html +22 -0
- flowfile/web/static/vite.svg +1 -0
- flowfile/web/static/vue.svg +1 -0
- flowfile-0.3.0.2.dist-info/METADATA +235 -0
- {flowfile-0.3.0.dist-info → flowfile-0.3.0.2.dist-info}/RECORD +147 -15
- {flowfile-0.3.0.dist-info → flowfile-0.3.0.2.dist-info}/entry_points.txt +1 -1
- flowfile_core/configs/settings.py +7 -32
- flowfile_core/flowfile/FlowfileFlow.py +4 -2
- flowfile_core/flowfile/analytics/analytics_processor.py +1 -1
- flowfile_core/main.py +4 -1
- flowfile_core/schemas/input_schema.py +1 -8
- flowfile_frame/__init__.py +0 -1
- flowfile_frame/utils.py +0 -139
- flowfile-0.3.0.dist-info/METADATA +0 -219
- flowfile_frame/__main__.py +0 -12
- {flowfile-0.3.0.dist-info → flowfile-0.3.0.2.dist-info}/LICENSE +0 -0
- {flowfile-0.3.0.dist-info → flowfile-0.3.0.2.dist-info}/WHEEL +0 -0
flowfile_core/main.py
CHANGED
|
@@ -54,6 +54,8 @@ origins = [
|
|
|
54
54
|
"http://localhost:8081",
|
|
55
55
|
"http://localhost:4173",
|
|
56
56
|
"http://localhost:4174",
|
|
57
|
+
"http://localhost:63578",
|
|
58
|
+
"http://127.0.0.1:63578"
|
|
57
59
|
]
|
|
58
60
|
|
|
59
61
|
app.add_middleware(
|
|
@@ -117,7 +119,8 @@ def run(host: str = None, port: int = None):
|
|
|
117
119
|
app,
|
|
118
120
|
host=host,
|
|
119
121
|
port=port,
|
|
120
|
-
loop="asyncio"
|
|
122
|
+
loop="asyncio",
|
|
123
|
+
log_level="warning",
|
|
121
124
|
)
|
|
122
125
|
server = uvicorn.Server(config)
|
|
123
126
|
server_instance = server # Store server instance globally
|
|
@@ -164,8 +164,7 @@ class OutputSettings(BaseModel):
|
|
|
164
164
|
|
|
165
165
|
@model_validator(mode='after')
|
|
166
166
|
def populate_abs_file_path(self):
|
|
167
|
-
|
|
168
|
-
self.set_absolute_filepath()
|
|
167
|
+
self.set_absolute_filepath()
|
|
169
168
|
return self
|
|
170
169
|
|
|
171
170
|
|
|
@@ -297,12 +296,6 @@ class DatabaseSettings(BaseModel):
|
|
|
297
296
|
query: Optional[str] = None
|
|
298
297
|
query_mode: Literal['query', 'table', 'reference'] = 'table'
|
|
299
298
|
|
|
300
|
-
@model_validator(mode='after')
|
|
301
|
-
def validate_table_or_query(self):
|
|
302
|
-
if (not self.table_name and not self.query) and self.query_mode == 'inline':
|
|
303
|
-
raise ValueError("Either 'table' or 'query' must be provided")
|
|
304
|
-
return self
|
|
305
|
-
|
|
306
299
|
@model_validator(mode='after')
|
|
307
300
|
def validate_table_or_query(self):
|
|
308
301
|
# Validate that either table_name or query is provided
|
flowfile_frame/__init__.py
CHANGED
flowfile_frame/utils.py
CHANGED
|
@@ -43,142 +43,3 @@ def create_flow_graph() -> FlowGraph:
|
|
|
43
43
|
flow_graph = FlowGraph(flow_id=flow_id, flow_settings=flow_settings)
|
|
44
44
|
flow_graph.flow_settings.execution_location = 'local' # always create a local frame so that the run time does not attempt to use the flowfile_worker process
|
|
45
45
|
return flow_graph
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def is_flowfile_running() -> bool:
|
|
49
|
-
"""Check if the Flowfile application is running by testing its API endpoint."""
|
|
50
|
-
try:
|
|
51
|
-
response = requests.get("http://0.0.0.0:63578/docs", timeout=2)
|
|
52
|
-
return response.status_code == 200
|
|
53
|
-
except (requests.ConnectionError, requests.Timeout):
|
|
54
|
-
return False
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def start_flowfile_application() -> bool:
|
|
58
|
-
"""Start the Flowfile application on macOS."""
|
|
59
|
-
try:
|
|
60
|
-
# Attempt to start the Flowfile application
|
|
61
|
-
subprocess.Popen(['open', '-a', 'Flowfile'],
|
|
62
|
-
stdout=subprocess.PIPE,
|
|
63
|
-
stderr=subprocess.PIPE)
|
|
64
|
-
|
|
65
|
-
# Wait for the application to start up (max 10 seconds)
|
|
66
|
-
start_time = time.time()
|
|
67
|
-
while time.time() - start_time < 10:
|
|
68
|
-
if is_flowfile_running():
|
|
69
|
-
return True
|
|
70
|
-
time.sleep(0.5) # Check every half second
|
|
71
|
-
|
|
72
|
-
# If we get here, the app didn't start in time
|
|
73
|
-
return False
|
|
74
|
-
except Exception as e:
|
|
75
|
-
print(f"Error starting Flowfile application: {e}")
|
|
76
|
-
return False
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def get_auth_token() -> Optional[str]:
|
|
80
|
-
"""Get an authentication token from the Flowfile API."""
|
|
81
|
-
try:
|
|
82
|
-
response = requests.post(
|
|
83
|
-
"http://0.0.0.0:63578/auth/token",
|
|
84
|
-
json={}, # Empty body as specified
|
|
85
|
-
timeout=5
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
if response.status_code == 200:
|
|
89
|
-
token_data = response.json()
|
|
90
|
-
return token_data.get("access_token")
|
|
91
|
-
else:
|
|
92
|
-
print(f"Failed to get auth token: {response.status_code} - {response.text}")
|
|
93
|
-
return None
|
|
94
|
-
except Exception as e:
|
|
95
|
-
print(f"Error getting auth token: {e}")
|
|
96
|
-
return None
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def import_flow_to_editor(flow_path: str, auth_token: str) -> Optional[int]:
|
|
100
|
-
"""Import the flow into the Flowfile editor using the API endpoint."""
|
|
101
|
-
try:
|
|
102
|
-
flow_path = Path(flow_path).resolve() # Get absolute path
|
|
103
|
-
if not flow_path.exists():
|
|
104
|
-
print(f"Flow file not found: {flow_path}")
|
|
105
|
-
return None
|
|
106
|
-
|
|
107
|
-
# Set authorization header with the token
|
|
108
|
-
headers = {"Authorization": f"Bearer {auth_token}"}
|
|
109
|
-
|
|
110
|
-
# Make a GET request to the import endpoint
|
|
111
|
-
response = requests.get(
|
|
112
|
-
"http://0.0.0.0:63578/import_flow/",
|
|
113
|
-
params={"flow_path": str(flow_path)},
|
|
114
|
-
headers=headers,
|
|
115
|
-
timeout=10
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
if response.status_code == 200:
|
|
119
|
-
flow_id = response.json()
|
|
120
|
-
print(f"Flow imported successfully with ID: {flow_id}")
|
|
121
|
-
return flow_id
|
|
122
|
-
else:
|
|
123
|
-
print(f"Failed to import flow: {response.status_code} - {response.text}")
|
|
124
|
-
return None
|
|
125
|
-
except Exception as e:
|
|
126
|
-
print(f"Error importing flow: {e}")
|
|
127
|
-
return None
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def open_graph_in_editor(flow_graph: FlowGraph, storage_location: str = None) -> bool:
|
|
131
|
-
"""
|
|
132
|
-
Save the ETL graph and open it in the Flowfile editor.
|
|
133
|
-
|
|
134
|
-
Parameters:
|
|
135
|
-
-----------
|
|
136
|
-
flow_graph : FlowGraph
|
|
137
|
-
The graph to save and open
|
|
138
|
-
storage_location : str, optional
|
|
139
|
-
Where to save the flowfile. If None, a default name is used.
|
|
140
|
-
|
|
141
|
-
Returns:
|
|
142
|
-
--------
|
|
143
|
-
bool
|
|
144
|
-
True if the graph was successfully opened in the editor, False otherwise
|
|
145
|
-
"""
|
|
146
|
-
# Create a temporary directory if needed
|
|
147
|
-
temp_dir = None
|
|
148
|
-
if storage_location is None:
|
|
149
|
-
temp_dir = TemporaryDirectory()
|
|
150
|
-
storage_location = os.path.join(temp_dir.name, 'temp_flow.flowfile')
|
|
151
|
-
else:
|
|
152
|
-
# Ensure path is absolute
|
|
153
|
-
storage_location = os.path.abspath(storage_location)
|
|
154
|
-
|
|
155
|
-
flow_graph.apply_layout()
|
|
156
|
-
flow_graph.save_flow(storage_location)
|
|
157
|
-
print(f"Flow saved to: {storage_location}")
|
|
158
|
-
|
|
159
|
-
# Check if Flowfile is running, and start it if not
|
|
160
|
-
if not is_flowfile_running():
|
|
161
|
-
print("Flowfile application is not running. Starting it...")
|
|
162
|
-
if not start_flowfile_application():
|
|
163
|
-
print("Failed to start Flowfile application")
|
|
164
|
-
if temp_dir:
|
|
165
|
-
temp_dir.cleanup()
|
|
166
|
-
return False
|
|
167
|
-
print("Flowfile application started successfully")
|
|
168
|
-
|
|
169
|
-
# Get authentication token
|
|
170
|
-
auth_token = get_auth_token()
|
|
171
|
-
if not auth_token:
|
|
172
|
-
print("Failed to authenticate with Flowfile API")
|
|
173
|
-
if temp_dir:
|
|
174
|
-
temp_dir.cleanup()
|
|
175
|
-
return False
|
|
176
|
-
|
|
177
|
-
# Import the flow into the editor
|
|
178
|
-
flow_id = import_flow_to_editor(storage_location, auth_token)
|
|
179
|
-
|
|
180
|
-
# Clean up temporary directory if we created one
|
|
181
|
-
if temp_dir:
|
|
182
|
-
temp_dir.cleanup()
|
|
183
|
-
|
|
184
|
-
return flow_id is not None
|
|
@@ -1,219 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: Flowfile
|
|
3
|
-
Version: 0.3.0
|
|
4
|
-
Summary: Project combining flowfile core (backend) and flowfile_worker (compute offloader) and flowfile_frame (api)
|
|
5
|
-
Author: Edward van Eechoud
|
|
6
|
-
Author-email: evaneechoud@gmail.com
|
|
7
|
-
Requires-Python: >=3.10,<3.13
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
-
Requires-Dist: XlsxWriter (>=3.2.0,<3.3.0)
|
|
13
|
-
Requires-Dist: aiofiles (>=24.1.0,<25.0.0)
|
|
14
|
-
Requires-Dist: airbyte-cdk (==6.47.2)
|
|
15
|
-
Requires-Dist: bcrypt (>=4.3.0,<5.0.0)
|
|
16
|
-
Requires-Dist: connectorx (>=0.4.2,<0.5.0)
|
|
17
|
-
Requires-Dist: databases (>=0.9.0,<0.10.0)
|
|
18
|
-
Requires-Dist: faker (>=23.1.0,<23.2.0)
|
|
19
|
-
Requires-Dist: fastapi (>=0.115.2,<0.116.0)
|
|
20
|
-
Requires-Dist: fastexcel (>=0.12.0,<0.13.0)
|
|
21
|
-
Requires-Dist: google-api-python-client (>=2.149.0,<2.150.0)
|
|
22
|
-
Requires-Dist: gspread (>=6.1.3,<6.2.0)
|
|
23
|
-
Requires-Dist: loky (>=3.4.1,<3.5.0)
|
|
24
|
-
Requires-Dist: methodtools (>=0.4.7,<0.5.0)
|
|
25
|
-
Requires-Dist: openpyxl (>=3.1.2,<3.2.0)
|
|
26
|
-
Requires-Dist: passlib (>=1.7.4,<1.8.0)
|
|
27
|
-
Requires-Dist: pendulum (==2.1.2) ; python_version < "3.12"
|
|
28
|
-
Requires-Dist: polars (>1.8.2,<=1.25.2)
|
|
29
|
-
Requires-Dist: polars-distance (>=0.4.3,<0.5.0)
|
|
30
|
-
Requires-Dist: polars-ds (>=0.6.0)
|
|
31
|
-
Requires-Dist: polars-expr-transformer (>0.4.7.0)
|
|
32
|
-
Requires-Dist: polars-grouper (>=0.3.0,<0.4.0)
|
|
33
|
-
Requires-Dist: polars_simed (>=0.3.4,<0.4.0)
|
|
34
|
-
Requires-Dist: pyairbyte-flowfile (==0.20.2)
|
|
35
|
-
Requires-Dist: pyarrow (>=18.0.0,<19.0.0)
|
|
36
|
-
Requires-Dist: pydantic (>=2.9.2,<2.10.0)
|
|
37
|
-
Requires-Dist: pyinstaller (>=6.11.0,<7.0.0)
|
|
38
|
-
Requires-Dist: pytest (>=8.3.4,<9.0.0)
|
|
39
|
-
Requires-Dist: python-jose (>=3.4.0,<4.0.0)
|
|
40
|
-
Requires-Dist: python-multipart (>=0.0.12,<0.1.0)
|
|
41
|
-
Requires-Dist: uvicorn (>=0.32.0,<0.33.0)
|
|
42
|
-
Description-Content-Type: text/markdown
|
|
43
|
-
|
|
44
|
-
<h1 align="center">
|
|
45
|
-
<img src="https://raw.githubusercontent.com/Edwardvaneechoud/Flowfile/main/.github/images/logo.png" alt="Flowfile Logo" width="100">
|
|
46
|
-
<br>
|
|
47
|
-
Flowfile
|
|
48
|
-
</h1>
|
|
49
|
-
|
|
50
|
-
<p align="center">
|
|
51
|
-
<b>Main Repository</b>: <a href="https://github.com/Edwardvaneechoud/Flowfile">Edwardvaneechoud/Flowfile</a><br>
|
|
52
|
-
<b>Documentation</b>:
|
|
53
|
-
<a href="https://edwardvaneechoud.github.io/Flowfile/">Website</a> -
|
|
54
|
-
<a href="https://github.com/Edwardvaneechoud/Flowfile/blob/main/flowfile_core/README.md">Core</a> -
|
|
55
|
-
<a href="https://github.com/Edwardvaneechoud/Flowfile/blob/main/flowfile_worker/README.md">Worker</a> -
|
|
56
|
-
<a href="https://github.com/Edwardvaneechoud/Flowfile/blob/main/flowfile_frontend/README.md">Frontend</a> -
|
|
57
|
-
<a href="https://dev.to/edwardvaneechoud/building-flowfile-architecting-a-visual-etl-tool-with-polars-576c">Technical Architecture</a>
|
|
58
|
-
</p>
|
|
59
|
-
|
|
60
|
-
<p>
|
|
61
|
-
Flowfile is a visual ETL tool and Python library suite that combines drag-and-drop workflow building with the speed of Polars dataframes. Build data pipelines visually, transform data using powerful nodes, or define data flows programmatically with Python and analyze results - all with high-performance data processing.
|
|
62
|
-
</p>
|
|
63
|
-
|
|
64
|
-
<div align="center">
|
|
65
|
-
<img src="https://raw.githubusercontent.com/Edwardvaneechoud/Flowfile/main/.github/images/group_by_screenshot.png" alt="Flowfile Interface" width="800"/>
|
|
66
|
-
</div>
|
|
67
|
-
|
|
68
|
-
## ⚡ Technical Design
|
|
69
|
-
|
|
70
|
-
The `Flowfile` PyPI package provides the backend services and the `flowfile_frame` Python library:
|
|
71
|
-
|
|
72
|
-
- **Core (`flowfile_core`)** (FastAPI): The main ETL engine using Polars for high-performance data transformations. Typically runs on port `:63578`.
|
|
73
|
-
- **Worker (`flowfile_worker`)** (FastAPI): Handles computation-intensive tasks and caching of data operations, supporting the Core service. Typically runs on port `:63579`.
|
|
74
|
-
- **FlowFrame API (`flowfile_frame`)**: A Python library with a Polars-like API for defining data manipulation pipelines programmatically, which also generates an underlying ETL graph compatible with the Flowfile ecosystem.
|
|
75
|
-
|
|
76
|
-
Each flow is represented as a directed acyclic graph (DAG), where nodes represent data operations and edges represent data flow between operations.
|
|
77
|
-
|
|
78
|
-
For a deeper dive into the technical architecture, check out [this article](https://dev.to/edwardvaneechoud/building-flowfile-architecting-a-visual-etl-tool-with-polars-576c) on how Flowfile leverages Polars for efficient data processing.
|
|
79
|
-
|
|
80
|
-
## ✨ Introducing FlowFile Frame - A Polars-Like API for ETL
|
|
81
|
-
|
|
82
|
-
FlowFile Frame is a Python library that provides a familiar Polars-like API for data manipulation, while simultaneously building an ETL (Extract, Transform, Load) graph under the hood. This allows you to:
|
|
83
|
-
|
|
84
|
-
1. Write data transformation code using a simple, Pandas/Polars-like API
|
|
85
|
-
2. Automatically generate executable ETL workflows compatible with the Flowfile ecosystem
|
|
86
|
-
3. Visualize, save, and share your data pipelines
|
|
87
|
-
4. Get the performance benefits of Polars with the traceability of ETL graphs
|
|
88
|
-
|
|
89
|
-
### FlowFrame Quick Start
|
|
90
|
-
|
|
91
|
-
```python
|
|
92
|
-
import flowfile_frame as ff
|
|
93
|
-
from flowfile_frame.utils import open_graph_in_editor
|
|
94
|
-
|
|
95
|
-
# Create a complex data pipeline
|
|
96
|
-
df = ff.from_dict({
|
|
97
|
-
"id": [1, 2, 3, 4, 5],
|
|
98
|
-
"category": ["A", "B", "A", "C", "B"],
|
|
99
|
-
"value": [100, 200, 150, 300, 250]
|
|
100
|
-
})
|
|
101
|
-
|
|
102
|
-
open_graph_in_editor(df.flow_graph)
|
|
103
|
-
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
### Key FlowFrame Features
|
|
107
|
-
|
|
108
|
-
- **Familiar API**: Based on Polars, making it easy to learn if you know Pandas or Polars
|
|
109
|
-
- **ETL Graph Generation**: Automatically builds a directed acyclic graph of your data operations
|
|
110
|
-
- **Lazy Evaluation**: Operations are not executed until `collect()` or a write operation
|
|
111
|
-
- **Interoperability**: Saved `.flowfile` graphs can be opened in the visual Flowfile Designer
|
|
112
|
-
- **High Performance**: Leverages Polars for fast data processing
|
|
113
|
-
- **Reproducible**: Save and share your data transformation workflows
|
|
114
|
-
|
|
115
|
-
### Common FlowFrame Operations
|
|
116
|
-
|
|
117
|
-
```python
|
|
118
|
-
import flowfile_frame as ff
|
|
119
|
-
from flowfile_frame import col, when
|
|
120
|
-
|
|
121
|
-
# Create from dictionary
|
|
122
|
-
df = ff.from_dict({
|
|
123
|
-
"id": [1, 2, 3],
|
|
124
|
-
"name": ["Alice", "Bob", "Charlie"],
|
|
125
|
-
"age": [25, 35, 28]
|
|
126
|
-
})
|
|
127
|
-
|
|
128
|
-
flow_graph = df.flow_graph
|
|
129
|
-
# Reading data
|
|
130
|
-
# df_csv = ff.read_csv("data.csv")
|
|
131
|
-
# df_parquet = ff.read_parquet("data.parquet")
|
|
132
|
-
|
|
133
|
-
# Filtering
|
|
134
|
-
adults = df.filter(col("age") >= 30)
|
|
135
|
-
|
|
136
|
-
# Select and transform
|
|
137
|
-
result = df.select(
|
|
138
|
-
col("name"),
|
|
139
|
-
(col("age") * 2).alias("double_age")
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
# Add new columns
|
|
143
|
-
df_with_cols = df.with_columns([
|
|
144
|
-
(col("age") + 10).alias("future_age"),
|
|
145
|
-
when(col("age") >= 30).then(ff.lit("Senior")).otherwise(ff.lit("Junior")).alias("status")]
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
# Group by and aggregate
|
|
149
|
-
df_sales = ff.from_dict({
|
|
150
|
-
"region": ["North", "South", "North", "South"],
|
|
151
|
-
"sales": [100, 200, 150, 300]
|
|
152
|
-
})
|
|
153
|
-
sales_by_region = df_sales.group_by("region").agg([
|
|
154
|
-
col("sales").sum().alias("total_sales"),
|
|
155
|
-
col("sales").mean().alias("avg_sales")
|
|
156
|
-
])
|
|
157
|
-
|
|
158
|
-
# Joins
|
|
159
|
-
customers = ff.from_dict({"id": [1, 2, 3], "name": ["Alice", "Bob", "Charlie"]}, flow_graph=flow_graph)
|
|
160
|
-
orders = ff.from_dict({"id": [101, 102], "customer_id": [1, 2], "amount": [100, 200]}, flow_graph=flow_graph)
|
|
161
|
-
joined = customers.join(orders, left_on="id", right_on="customer_id")
|
|
162
|
-
|
|
163
|
-
# Save and visualize ETL graph
|
|
164
|
-
|
|
165
|
-
result.save_graph("my_pipeline.flowfile")
|
|
166
|
-
# open_graph_in_editor(result.flow_graph, "my_pipeline.flowfile") # Opens in Designer UI if installed
|
|
167
|
-
```
|
|
168
|
-
|
|
169
|
-
For more detailed information on all available operations, including pivoting, window functions, complex workflows, and more, please refer to the [FlowFrame documentation](https://github.com/Edwardvaneechoud/Flowfile/blob/main/flowfile_frame/README.md).
|
|
170
|
-
|
|
171
|
-
## 🔥 Example Use Cases
|
|
172
|
-
|
|
173
|
-
Flowfile is great for:
|
|
174
|
-
|
|
175
|
-
- **Data Cleaning & Transformation**
|
|
176
|
-
- Complex joins (fuzzy matching)
|
|
177
|
-
- Text-to-rows transformations
|
|
178
|
-
- Advanced filtering and grouping
|
|
179
|
-
- Custom formulas and expressions
|
|
180
|
-
- Filter data based on conditions
|
|
181
|
-
|
|
182
|
-
- **Performance**
|
|
183
|
-
- Built to scale out of core
|
|
184
|
-
- Using Polars for data processing
|
|
185
|
-
|
|
186
|
-
- **Data Integration**
|
|
187
|
-
- Standardize data formats
|
|
188
|
-
- Handle messy Excel files
|
|
189
|
-
|
|
190
|
-
- **ETL Operations**
|
|
191
|
-
- Data quality checks
|
|
192
|
-
|
|
193
|
-
(For more visual examples of these use cases, please see our [main GitHub repository](https://github.com/Edwardvaneechoud/Flowfile#-example-use-cases)).
|
|
194
|
-
|
|
195
|
-
## 🚀 Getting Started
|
|
196
|
-
|
|
197
|
-
### Installing the Flowfile Python Package
|
|
198
|
-
|
|
199
|
-
This package provides the `flowfile_core` and `flowfile_worker` backend services, and the `flowfile_frame` library.
|
|
200
|
-
|
|
201
|
-
```bash
|
|
202
|
-
pip install Flowfile
|
|
203
|
-
```
|
|
204
|
-
|
|
205
|
-
Once installed, you can use `flowfile_frame` as a library in your Python scripts (see Quick Start above).
|
|
206
|
-
|
|
207
|
-
### Full Application with Visual Designer
|
|
208
|
-
|
|
209
|
-
For the complete visual ETL experience with the Designer UI, please see the [installation instructions in the main repository](https://github.com/Edwardvaneechoud/Flowfile#-getting-started).
|
|
210
|
-
|
|
211
|
-
Available options include:
|
|
212
|
-
- Desktop application (recommended for most users)
|
|
213
|
-
- Docker setup (backend services + web frontend)
|
|
214
|
-
- Manual setup for development
|
|
215
|
-
|
|
216
|
-
## 📋 Development Roadmap
|
|
217
|
-
|
|
218
|
-
For the latest development roadmap and TODO list, please refer to the [main repository](https://github.com/Edwardvaneechoud/Flowfile#-todo).
|
|
219
|
-
|
flowfile_frame/__main__.py
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
"""Main entry point for the FlowFrame CLI."""
|
|
2
|
-
|
|
3
|
-
def main():
|
|
4
|
-
"""Main entry point for the FlowFrame CLI."""
|
|
5
|
-
print("FlowFrame - A Polars-like API for building ETL graphs")
|
|
6
|
-
print("Usage: import flowframe as ff")
|
|
7
|
-
print(" df = ff.from_dict({'a': [1, 2, 3]})")
|
|
8
|
-
print(" result = df.filter(ff.col('a') > 1)")
|
|
9
|
-
print(" print(result.collect())")
|
|
10
|
-
|
|
11
|
-
if __name__ == "__main__":
|
|
12
|
-
main()
|
|
File without changes
|
|
File without changes
|