Flowfile 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (170) hide show
  1. flowfile-0.2.2/LICENSE +21 -0
  2. flowfile-0.2.2/PKG-INFO +225 -0
  3. flowfile-0.2.2/README.md +181 -0
  4. flowfile-0.2.2/build_backends/build_backends/__init__.py +0 -0
  5. flowfile-0.2.2/build_backends/build_backends/main.py +313 -0
  6. flowfile-0.2.2/build_backends/build_backends/main_prd.py +202 -0
  7. flowfile-0.2.2/flowfile/__init__.py +71 -0
  8. flowfile-0.2.2/flowfile/__main__.py +24 -0
  9. flowfile-0.2.2/flowfile_core/flowfile_core/__init__.py +13 -0
  10. flowfile-0.2.2/flowfile_core/flowfile_core/auth/__init__.py +0 -0
  11. flowfile-0.2.2/flowfile_core/flowfile_core/auth/jwt.py +140 -0
  12. flowfile-0.2.2/flowfile_core/flowfile_core/auth/models.py +40 -0
  13. flowfile-0.2.2/flowfile_core/flowfile_core/auth/secrets.py +178 -0
  14. flowfile-0.2.2/flowfile_core/flowfile_core/configs/__init__.py +35 -0
  15. flowfile-0.2.2/flowfile_core/flowfile_core/configs/flow_logger.py +433 -0
  16. flowfile-0.2.2/flowfile_core/flowfile_core/configs/node_store/__init__.py +0 -0
  17. flowfile-0.2.2/flowfile_core/flowfile_core/configs/node_store/nodes.py +98 -0
  18. flowfile-0.2.2/flowfile_core/flowfile_core/configs/settings.py +120 -0
  19. flowfile-0.2.2/flowfile_core/flowfile_core/database/__init__.py +0 -0
  20. flowfile-0.2.2/flowfile_core/flowfile_core/database/connection.py +51 -0
  21. flowfile-0.2.2/flowfile_core/flowfile_core/database/init_db.py +45 -0
  22. flowfile-0.2.2/flowfile_core/flowfile_core/database/models.py +41 -0
  23. flowfile-0.2.2/flowfile_core/flowfile_core/fileExplorer/__init__.py +0 -0
  24. flowfile-0.2.2/flowfile_core/flowfile_core/fileExplorer/funcs.py +259 -0
  25. flowfile-0.2.2/flowfile_core/flowfile_core/fileExplorer/utils.py +53 -0
  26. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/FlowfileFlow.py +1403 -0
  27. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/__init__.py +0 -0
  28. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/_extensions/__init__.py +0 -0
  29. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
  30. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/analytics/__init__.py +0 -0
  31. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
  32. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
  33. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
  34. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/analytics/utils.py +9 -0
  35. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/connection_manager/__init__.py +3 -0
  36. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
  37. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/connection_manager/models.py +10 -0
  38. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
  39. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
  40. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/database_connection_manager/models.py +15 -0
  41. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/extensions.py +36 -0
  42. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
  43. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
  44. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
  45. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
  46. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
  47. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
  48. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
  49. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
  50. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
  51. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
  52. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
  53. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
  54. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
  55. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
  56. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
  57. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
  58. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
  59. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
  60. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
  61. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
  62. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
  63. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/types.py +0 -0
  64. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
  65. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_node/__init__.py +0 -0
  66. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_node/flow_node.py +771 -0
  67. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_node/models.py +111 -0
  68. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
  69. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/handler.py +123 -0
  70. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/manage/__init__.py +0 -0
  71. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
  72. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
  73. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/manage/open_flowfile.py +136 -0
  74. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/setting_generator/__init__.py +2 -0
  75. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
  76. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/setting_generator/settings.py +176 -0
  77. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/__init__.py +0 -0
  78. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
  79. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
  80. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
  81. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
  82. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
  83. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
  84. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
  85. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
  86. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
  87. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
  88. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
  89. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
  90. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
  91. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
  92. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
  93. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
  94. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/util/__init__.py +0 -0
  95. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/util/calculate_layout.py +137 -0
  96. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/util/execution_orderer.py +141 -0
  97. flowfile-0.2.2/flowfile_core/flowfile_core/flowfile/utils.py +106 -0
  98. flowfile-0.2.2/flowfile_core/flowfile_core/main.py +138 -0
  99. flowfile-0.2.2/flowfile_core/flowfile_core/routes/__init__.py +0 -0
  100. flowfile-0.2.2/flowfile_core/flowfile_core/routes/auth.py +34 -0
  101. flowfile-0.2.2/flowfile_core/flowfile_core/routes/logs.py +163 -0
  102. flowfile-0.2.2/flowfile_core/flowfile_core/routes/public.py +10 -0
  103. flowfile-0.2.2/flowfile_core/flowfile_core/routes/routes.py +601 -0
  104. flowfile-0.2.2/flowfile_core/flowfile_core/routes/secrets.py +85 -0
  105. flowfile-0.2.2/flowfile_core/flowfile_core/run_lock.py +11 -0
  106. flowfile-0.2.2/flowfile_core/flowfile_core/schemas/__init__.py +0 -0
  107. flowfile-0.2.2/flowfile_core/flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
  108. flowfile-0.2.2/flowfile_core/flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
  109. flowfile-0.2.2/flowfile_core/flowfile_core/schemas/defaults.py +9 -0
  110. flowfile-0.2.2/flowfile_core/flowfile_core/schemas/external_sources/__init__.py +0 -0
  111. flowfile-0.2.2/flowfile_core/flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
  112. flowfile-0.2.2/flowfile_core/flowfile_core/schemas/input_schema.py +477 -0
  113. flowfile-0.2.2/flowfile_core/flowfile_core/schemas/models.py +193 -0
  114. flowfile-0.2.2/flowfile_core/flowfile_core/schemas/output_model.py +115 -0
  115. flowfile-0.2.2/flowfile_core/flowfile_core/schemas/schemas.py +106 -0
  116. flowfile-0.2.2/flowfile_core/flowfile_core/schemas/transform_schema.py +569 -0
  117. flowfile-0.2.2/flowfile_core/flowfile_core/secrets/__init__.py +0 -0
  118. flowfile-0.2.2/flowfile_core/flowfile_core/secrets/secrets.py +64 -0
  119. flowfile-0.2.2/flowfile_core/flowfile_core/utils/__init__.py +0 -0
  120. flowfile-0.2.2/flowfile_core/flowfile_core/utils/arrow_reader.py +247 -0
  121. flowfile-0.2.2/flowfile_core/flowfile_core/utils/excel_file_manager.py +18 -0
  122. flowfile-0.2.2/flowfile_core/flowfile_core/utils/fileManager.py +45 -0
  123. flowfile-0.2.2/flowfile_core/flowfile_core/utils/fl_executor.py +38 -0
  124. flowfile-0.2.2/flowfile_core/flowfile_core/utils/utils.py +8 -0
  125. flowfile-0.2.2/flowfile_frame/flowfile_frame/__init__.py +56 -0
  126. flowfile-0.2.2/flowfile_frame/flowfile_frame/__main__.py +12 -0
  127. flowfile-0.2.2/flowfile_frame/flowfile_frame/adapters.py +17 -0
  128. flowfile-0.2.2/flowfile_frame/flowfile_frame/expr.py +1163 -0
  129. flowfile-0.2.2/flowfile_frame/flowfile_frame/flow_frame.py +2093 -0
  130. flowfile-0.2.2/flowfile_frame/flowfile_frame/group_frame.py +199 -0
  131. flowfile-0.2.2/flowfile_frame/flowfile_frame/join.py +75 -0
  132. flowfile-0.2.2/flowfile_frame/flowfile_frame/selectors.py +242 -0
  133. flowfile-0.2.2/flowfile_frame/flowfile_frame/utils.py +184 -0
  134. flowfile-0.2.2/flowfile_worker/flowfile_worker/__init__.py +55 -0
  135. flowfile-0.2.2/flowfile_worker/flowfile_worker/configs.py +95 -0
  136. flowfile-0.2.2/flowfile_worker/flowfile_worker/create/__init__.py +37 -0
  137. flowfile-0.2.2/flowfile_worker/flowfile_worker/create/funcs.py +146 -0
  138. flowfile-0.2.2/flowfile_worker/flowfile_worker/create/models.py +86 -0
  139. flowfile-0.2.2/flowfile_worker/flowfile_worker/create/pl_types.py +35 -0
  140. flowfile-0.2.2/flowfile_worker/flowfile_worker/create/read_excel_tables.py +110 -0
  141. flowfile-0.2.2/flowfile_worker/flowfile_worker/create/utils.py +84 -0
  142. flowfile-0.2.2/flowfile_worker/flowfile_worker/external_sources/__init__.py +0 -0
  143. flowfile-0.2.2/flowfile_worker/flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
  144. flowfile-0.2.2/flowfile_worker/flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
  145. flowfile-0.2.2/flowfile_worker/flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
  146. flowfile-0.2.2/flowfile_worker/flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
  147. flowfile-0.2.2/flowfile_worker/flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  148. flowfile-0.2.2/flowfile_worker/flowfile_worker/external_sources/sql_source/__init__.py +0 -0
  149. flowfile-0.2.2/flowfile_worker/flowfile_worker/external_sources/sql_source/main.py +56 -0
  150. flowfile-0.2.2/flowfile_worker/flowfile_worker/external_sources/sql_source/models.py +72 -0
  151. flowfile-0.2.2/flowfile_worker/flowfile_worker/flow_logger.py +58 -0
  152. flowfile-0.2.2/flowfile_worker/flowfile_worker/funcs.py +327 -0
  153. flowfile-0.2.2/flowfile_worker/flowfile_worker/main.py +108 -0
  154. flowfile-0.2.2/flowfile_worker/flowfile_worker/models.py +95 -0
  155. flowfile-0.2.2/flowfile_worker/flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
  156. flowfile-0.2.2/flowfile_worker/flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
  157. flowfile-0.2.2/flowfile_worker/flowfile_worker/polars_fuzzy_match/models.py +36 -0
  158. flowfile-0.2.2/flowfile_worker/flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
  159. flowfile-0.2.2/flowfile_worker/flowfile_worker/polars_fuzzy_match/process.py +86 -0
  160. flowfile-0.2.2/flowfile_worker/flowfile_worker/polars_fuzzy_match/utils.py +50 -0
  161. flowfile-0.2.2/flowfile_worker/flowfile_worker/process_manager.py +36 -0
  162. flowfile-0.2.2/flowfile_worker/flowfile_worker/routes.py +440 -0
  163. flowfile-0.2.2/flowfile_worker/flowfile_worker/secrets.py +148 -0
  164. flowfile-0.2.2/flowfile_worker/flowfile_worker/spawner.py +187 -0
  165. flowfile-0.2.2/flowfile_worker/flowfile_worker/utils.py +25 -0
  166. flowfile-0.2.2/pyproject.toml +75 -0
  167. flowfile-0.2.2/test_utils/__init__.py +3 -0
  168. flowfile-0.2.2/test_utils/postgres/__init__.py +1 -0
  169. flowfile-0.2.2/test_utils/postgres/commands.py +109 -0
  170. flowfile-0.2.2/test_utils/postgres/fixtures.py +417 -0
flowfile-0.2.2/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Edward van Eechoud
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,225 @@
1
+ Metadata-Version: 2.3
2
+ Name: Flowfile
3
+ Version: 0.2.2
4
+ Summary: Project combining flowfile core (backend) and flowfile_worker (compute offloader) and flowfile_frame (api)
5
+ Author: Edward van Eechoud
6
+ Author-email: evaneechoud@gmail.com
7
+ Requires-Python: >=3.10,<3.13
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Requires-Dist: XlsxWriter (>=3.2.0,<3.3.0)
13
+ Requires-Dist: aiofiles (>=24.1.0,<25.0.0)
14
+ Requires-Dist: airbyte-cdk (==6.47.2)
15
+ Requires-Dist: bcrypt (>=4.3.0,<5.0.0)
16
+ Requires-Dist: connectorx (>=0.4.2,<0.5.0)
17
+ Requires-Dist: databases (>=0.9.0,<0.10.0)
18
+ Requires-Dist: faker (>=23.1.0,<23.2.0)
19
+ Requires-Dist: fastapi (>=0.115.2,<0.116.0)
20
+ Requires-Dist: fastexcel (>=0.12.0,<0.13.0)
21
+ Requires-Dist: google-api-python-client (>=2.149.0,<2.150.0)
22
+ Requires-Dist: gspread (>=6.1.3,<6.2.0)
23
+ Requires-Dist: loky (>=3.4.1,<3.5.0)
24
+ Requires-Dist: methodtools (>=0.4.7,<0.5.0)
25
+ Requires-Dist: openpyxl (>=3.1.2,<3.2.0)
26
+ Requires-Dist: passlib (>=1.7.4,<1.8.0)
27
+ Requires-Dist: pendulum (==2.1.2) ; python_version < "3.12"
28
+ Requires-Dist: polars (>1.8.2,<=1.25.2)
29
+ Requires-Dist: polars-distance (>=0.4.3,<0.5.0)
30
+ Requires-Dist: polars-ds (>=0.6.0)
31
+ Requires-Dist: polars-expr-transformer (>0.4.7.0)
32
+ Requires-Dist: polars-grouper (>=0.3.0,<0.4.0)
33
+ Requires-Dist: polars_simed (>=0.3.4,<0.4.0)
34
+ Requires-Dist: pyairbyte-flowfile (==0.20.2)
35
+ Requires-Dist: pyarrow (>=18.0.0,<19.0.0)
36
+ Requires-Dist: pydantic (>=2.9.2,<2.10.0)
37
+ Requires-Dist: pyinstaller (>=6.11.0,<7.0.0)
38
+ Requires-Dist: pytest (>=8.3.4,<9.0.0)
39
+ Requires-Dist: python-jose (>=3.4.0,<4.0.0)
40
+ Requires-Dist: python-multipart (>=0.0.12,<0.1.0)
41
+ Requires-Dist: uvicorn (>=0.32.0,<0.33.0)
42
+ Description-Content-Type: text/markdown
43
+
44
+ <h1 align="center">
45
+ <img src=".github/images/logo.png" alt="Flowfile Logo" width="100">
46
+ <br>
47
+ Flowfile
48
+ </h1>
49
+ <p align="center">
50
+ <b>Documentation</b>:
51
+ <a href="https://edwardvaneechoud.github.io/Flowfile/">Website</a>
52
+ -
53
+ <a href="flowfile_core/README.md">Core</a>
54
+ -
55
+ <a href="flowfile_worker/README.md">Worker</a>
56
+ -
57
+ <a href="flowfile_frontend/README.md">Frontend</a>
58
+ -
59
+ <a href="https://dev.to/edwardvaneechoud/building-flowfile-architecting-a-visual-etl-tool-with-polars-576c">Technical Architecture</a>
60
+ </p>
61
+ <p>
62
+ Flowfile is a visual ETL tool that combines drag-and-drop workflow building with the speed of Polars dataframes. Build data pipelines visually, transform data using powerful nodes, and analyze results - all without writing code.
63
+ </p>
64
+
65
+ <div align="center">
66
+ <img src=".github/images/group_by_screenshot.png" alt="Flowfile Interface" width="800"/>
67
+ </div>
68
+
69
+ ## ⚡ Technical Design
70
+
71
+ Flowfile operates as three interconnected services:
72
+
73
+ - **Designer** (Electron + Vue): Visual interface for building data flows
74
+ - **Core** (FastAPI): ETL engine using Polars for data transformations (`:63578`)
75
+ - **Worker** (FastAPI): Handles computation and caching of data operations (`:63579`)
76
+
77
+ Each flow is represented as a directed acyclic graph (DAG), where nodes represent data operations and edges represent data flow between operations.
78
+
79
+ For a deeper dive into the technical architecture, check out [this article](https://dev.to/edwardvaneechoud/building-flowfile-architecting-a-visual-etl-tool-with-polars-576c) on how Flowfile leverages Polars for efficient data processing.
80
+
81
+ ## 🔥 Example Use Cases
82
+
83
+ - **Data Cleaning & Transformation**
84
+ - Complex joins (fuzzy matching)
85
+ - Text to rows transformations
86
+ - Advanced filtering and grouping
87
+ - Custom formulas and expressions
88
+ - Filter data based on conditions
89
+
90
+ <div align="center">
91
+ <img src=".github/images/flowfile_demo_1.gif" alt="Flowfile Layout" width="800"/>
92
+ </div>
93
+
94
+ ---
95
+
96
+ - **Performance**
97
+ - Build to scale out of core
98
+ - Using polars for data processing
99
+
100
+ <div align="center">
101
+ <img src=".github/images/demo_flowfile_write.gif" alt="Flowfile Layout" width="800"/>
102
+ </div>
103
+
104
+ ---
105
+
106
+ ### **Data Integration**
107
+ - Standardize data formats
108
+ - Handle messy Excel files
109
+
110
+
111
+ <div align="center">
112
+ <img src=".github/images/read_excel_flowfile.gif" alt="Flowfile Layout" width="800"/>
113
+ </div>
114
+
115
+
116
+ ---
117
+
118
+ - **ETL Operations**
119
+ - Data quality checks
120
+
121
+
122
+ ## 🚀 Getting Started
123
+
124
+ ### Prerequisites
125
+ - Python 3.10+
126
+ - Node.js 16+
127
+ - Poetry (Python package manager)
128
+ - Docker & Docker Compose (option, for Docker setup)
129
+ - Make (optional, for build automation)
130
+
131
+ ### Installation Options
132
+
133
+ #### 1. Desktop Application
134
+ The desktop version offers the best experience with a native interface and integrated services. You can either:
135
+
136
+ **Option A: Download Pre-built Application**
137
+ - Download the latest release from [GitHub Releases](https://github.com/Edwardvaneechoud/Flowfile/releases)
138
+ - Run the installer for your platform (Windows, macOS, or Linux)
139
+ - Note: You may see security warnings since the installer isn't signed. On Windows, click "More info" then "Run anyway". On macOS, right-click the app, select "Open", then confirm. These warnings appear because the app isn't signed with a developer certificate.
140
+
141
+ **Option B: Build from Source:**
142
+ ```bash
143
+ git clone https://github.com/edwardvaneechoud/Flowfile.git
144
+ cd Flowfile
145
+
146
+ # Build packaged executable
147
+ make # Creates platform-specific executable
148
+
149
+ # Or manually:
150
+ poetry install
151
+ poetry run build_backends
152
+ cd flowfile_frontend
153
+ npm install
154
+ npm run build # All platforms
155
+ ```
156
+
157
+ #### 2. Docker Setup
158
+ Perfect for quick testing, development or deployment scenarios. Runs all services in containers with proper networking and volume management:
159
+ ```bash
160
+ # Clone and start all services
161
+ git clone https://github.com/edwardvaneechoud/Flowfile.git
162
+ cd Flowfile
163
+ docker compose up -d
164
+
165
+ # Access services:
166
+ Frontend: http://localhost:8080 # main service
167
+ Core API: http://localhost:63578/docs
168
+ Worker API: http://localhost:63579/docs
169
+ ```
170
+ Just place your files that you want to transform in the directory in shared_data and you're all set!
171
+
172
+ Docker Compose is also excellent for development, as it automatically sets up all required services and ensures proper communication between them. Code changes in the mounted volumes will be reflected in the running containers.
173
+
174
+ #### 3. Manual Setup (Development)
175
+ Ideal for development work when you need direct access to all services and hot-reloading:
176
+
177
+ ```bash
178
+ git clone https://github.com/edwardvaneechoud/Flowfile.git
179
+ cd Flowfile
180
+
181
+ # Install Python dependencies
182
+ poetry install
183
+
184
+ # Start backend services
185
+ poetry run flowfile_worker # Starts worker on :63579
186
+ poetry run flowfile_core # Starts core on :63578
187
+
188
+ # Start web frontend
189
+ cd flowfile_frontend
190
+ npm install
191
+ npm run dev:web # Starts web interface on :8080
192
+ ```
193
+
194
+ ## 📋 TODO
195
+
196
+ ### Core Features
197
+ - [ ] Add cloud storage support
198
+ - S3 integration
199
+ - Azure Data Lake Storage (ADLS)
200
+ - [x] Multi-flow execution support
201
+ - [ ] Polars code reverse engineering
202
+ - Generate Polars code from visual flows
203
+ - Import existing Polars scripts
204
+
205
+ ### Documentation
206
+ - [ ] Add comprehensive docstrings
207
+ - [x] Create detailed node documentation
208
+ - [x] Add architectural documentation
209
+ - [ ] Improve inline code comments
210
+ - [ ] Create user guides and tutorials
211
+
212
+ ### Infrastructure
213
+ - [ ] Implement proper testing
214
+ - [x] Add CI/CD pipeline
215
+ - [x] Improve error handling
216
+ - [x] Add monitoring and logging
217
+
218
+ ## 📝 License
219
+
220
+ [MIT License](LICENSE)
221
+
222
+ ## Acknowledgments
223
+
224
+ Built with Polars, Vue.js, FastAPI, Vueflow and Electron.
225
+
@@ -0,0 +1,181 @@
1
+ <h1 align="center">
2
+ <img src=".github/images/logo.png" alt="Flowfile Logo" width="100">
3
+ <br>
4
+ Flowfile
5
+ </h1>
6
+ <p align="center">
7
+ <b>Documentation</b>:
8
+ <a href="https://edwardvaneechoud.github.io/Flowfile/">Website</a>
9
+ -
10
+ <a href="flowfile_core/README.md">Core</a>
11
+ -
12
+ <a href="flowfile_worker/README.md">Worker</a>
13
+ -
14
+ <a href="flowfile_frontend/README.md">Frontend</a>
15
+ -
16
+ <a href="https://dev.to/edwardvaneechoud/building-flowfile-architecting-a-visual-etl-tool-with-polars-576c">Technical Architecture</a>
17
+ </p>
18
+ <p>
19
+ Flowfile is a visual ETL tool that combines drag-and-drop workflow building with the speed of Polars dataframes. Build data pipelines visually, transform data using powerful nodes, and analyze results - all without writing code.
20
+ </p>
21
+
22
+ <div align="center">
23
+ <img src=".github/images/group_by_screenshot.png" alt="Flowfile Interface" width="800"/>
24
+ </div>
25
+
26
+ ## ⚡ Technical Design
27
+
28
+ Flowfile operates as three interconnected services:
29
+
30
+ - **Designer** (Electron + Vue): Visual interface for building data flows
31
+ - **Core** (FastAPI): ETL engine using Polars for data transformations (`:63578`)
32
+ - **Worker** (FastAPI): Handles computation and caching of data operations (`:63579`)
33
+
34
+ Each flow is represented as a directed acyclic graph (DAG), where nodes represent data operations and edges represent data flow between operations.
35
+
36
+ For a deeper dive into the technical architecture, check out [this article](https://dev.to/edwardvaneechoud/building-flowfile-architecting-a-visual-etl-tool-with-polars-576c) on how Flowfile leverages Polars for efficient data processing.
37
+
38
+ ## 🔥 Example Use Cases
39
+
40
+ - **Data Cleaning & Transformation**
41
+ - Complex joins (fuzzy matching)
42
+ - Text to rows transformations
43
+ - Advanced filtering and grouping
44
+ - Custom formulas and expressions
45
+ - Filter data based on conditions
46
+
47
+ <div align="center">
48
+ <img src=".github/images/flowfile_demo_1.gif" alt="Flowfile Layout" width="800"/>
49
+ </div>
50
+
51
+ ---
52
+
53
+ - **Performance**
54
+ - Build to scale out of core
55
+ - Using polars for data processing
56
+
57
+ <div align="center">
58
+ <img src=".github/images/demo_flowfile_write.gif" alt="Flowfile Layout" width="800"/>
59
+ </div>
60
+
61
+ ---
62
+
63
+ ### **Data Integration**
64
+ - Standardize data formats
65
+ - Handle messy Excel files
66
+
67
+
68
+ <div align="center">
69
+ <img src=".github/images/read_excel_flowfile.gif" alt="Flowfile Layout" width="800"/>
70
+ </div>
71
+
72
+
73
+ ---
74
+
75
+ - **ETL Operations**
76
+ - Data quality checks
77
+
78
+
79
+ ## 🚀 Getting Started
80
+
81
+ ### Prerequisites
82
+ - Python 3.10+
83
+ - Node.js 16+
84
+ - Poetry (Python package manager)
85
+ - Docker & Docker Compose (option, for Docker setup)
86
+ - Make (optional, for build automation)
87
+
88
+ ### Installation Options
89
+
90
+ #### 1. Desktop Application
91
+ The desktop version offers the best experience with a native interface and integrated services. You can either:
92
+
93
+ **Option A: Download Pre-built Application**
94
+ - Download the latest release from [GitHub Releases](https://github.com/Edwardvaneechoud/Flowfile/releases)
95
+ - Run the installer for your platform (Windows, macOS, or Linux)
96
+ - Note: You may see security warnings since the installer isn't signed. On Windows, click "More info" then "Run anyway". On macOS, right-click the app, select "Open", then confirm. These warnings appear because the app isn't signed with a developer certificate.
97
+
98
+ **Option B: Build from Source:**
99
+ ```bash
100
+ git clone https://github.com/edwardvaneechoud/Flowfile.git
101
+ cd Flowfile
102
+
103
+ # Build packaged executable
104
+ make # Creates platform-specific executable
105
+
106
+ # Or manually:
107
+ poetry install
108
+ poetry run build_backends
109
+ cd flowfile_frontend
110
+ npm install
111
+ npm run build # All platforms
112
+ ```
113
+
114
+ #### 2. Docker Setup
115
+ Perfect for quick testing, development or deployment scenarios. Runs all services in containers with proper networking and volume management:
116
+ ```bash
117
+ # Clone and start all services
118
+ git clone https://github.com/edwardvaneechoud/Flowfile.git
119
+ cd Flowfile
120
+ docker compose up -d
121
+
122
+ # Access services:
123
+ Frontend: http://localhost:8080 # main service
124
+ Core API: http://localhost:63578/docs
125
+ Worker API: http://localhost:63579/docs
126
+ ```
127
+ Just place your files that you want to transform in the directory in shared_data and you're all set!
128
+
129
+ Docker Compose is also excellent for development, as it automatically sets up all required services and ensures proper communication between them. Code changes in the mounted volumes will be reflected in the running containers.
130
+
131
+ #### 3. Manual Setup (Development)
132
+ Ideal for development work when you need direct access to all services and hot-reloading:
133
+
134
+ ```bash
135
+ git clone https://github.com/edwardvaneechoud/Flowfile.git
136
+ cd Flowfile
137
+
138
+ # Install Python dependencies
139
+ poetry install
140
+
141
+ # Start backend services
142
+ poetry run flowfile_worker # Starts worker on :63579
143
+ poetry run flowfile_core # Starts core on :63578
144
+
145
+ # Start web frontend
146
+ cd flowfile_frontend
147
+ npm install
148
+ npm run dev:web # Starts web interface on :8080
149
+ ```
150
+
151
+ ## 📋 TODO
152
+
153
+ ### Core Features
154
+ - [ ] Add cloud storage support
155
+ - S3 integration
156
+ - Azure Data Lake Storage (ADLS)
157
+ - [x] Multi-flow execution support
158
+ - [ ] Polars code reverse engineering
159
+ - Generate Polars code from visual flows
160
+ - Import existing Polars scripts
161
+
162
+ ### Documentation
163
+ - [ ] Add comprehensive docstrings
164
+ - [x] Create detailed node documentation
165
+ - [x] Add architectural documentation
166
+ - [ ] Improve inline code comments
167
+ - [ ] Create user guides and tutorials
168
+
169
+ ### Infrastructure
170
+ - [ ] Implement proper testing
171
+ - [x] Add CI/CD pipeline
172
+ - [x] Improve error handling
173
+ - [x] Add monitoring and logging
174
+
175
+ ## 📝 License
176
+
177
+ [MIT License](LICENSE)
178
+
179
+ ## Acknowledgments
180
+
181
+ Built with Polars, Vue.js, FastAPI, Vueflow and Electron.