Flowfile 0.3.2__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowfile-0.3.2 → flowfile-0.3.3}/PKG-INFO +1 -1
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/__init__.py +2 -1
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/__init__.py +3 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/configs/__init__.py +15 -4
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/configs/settings.py +5 -3
- flowfile-0.3.3/flowfile_core/flowfile_core/configs/utils.py +18 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/FlowfileFlow.py +13 -18
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/database_connection_manager/db_connections.py +1 -1
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +54 -17
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +42 -9
- flowfile-0.3.3/flowfile_core/flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +75 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +2 -1
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/sample_data.py +25 -7
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +4 -3
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/utils.py +1 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_node/flow_node.py +2 -1
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +2 -2
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +1 -1
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/utils.py +34 -3
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/main.py +2 -3
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/routes/secrets.py +1 -1
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/schemas/input_schema.py +10 -4
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/schemas/transform_schema.py +25 -47
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_frame/flowfile_frame/__init__.py +11 -4
- flowfile-0.3.3/flowfile_frame/flowfile_frame/adding_expr.py +280 -0
- flowfile-0.3.3/flowfile_frame/flowfile_frame/config.py +9 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_frame/flowfile_frame/expr.py +301 -83
- flowfile-0.3.3/flowfile_frame/flowfile_frame/expr.pyi +2174 -0
- flowfile-0.3.3/flowfile_frame/flowfile_frame/expr_name.py +258 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_frame/flowfile_frame/flow_frame.py +587 -1002
- flowfile-0.3.3/flowfile_frame/flowfile_frame/flow_frame.pyi +336 -0
- flowfile-0.3.3/flowfile_frame/flowfile_frame/flow_frame_methods.py +617 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_frame/flowfile_frame/group_frame.py +89 -42
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_frame/flowfile_frame/join.py +1 -2
- flowfile-0.3.3/flowfile_frame/flowfile_frame/lazy.py +704 -0
- flowfile-0.3.3/flowfile_frame/flowfile_frame/lazy_methods.py +201 -0
- flowfile-0.3.3/flowfile_frame/flowfile_frame/list_name_space.py +324 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_frame/flowfile_frame/selectors.py +3 -0
- flowfile-0.3.3/flowfile_frame/flowfile_frame/series.py +70 -0
- flowfile-0.3.3/flowfile_frame/flowfile_frame/utils.py +121 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/pyproject.toml +1 -1
- flowfile-0.3.2/flowfile_core/flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +0 -36
- flowfile-0.3.2/flowfile_frame/flowfile_frame/utils.py +0 -45
- {flowfile-0.3.2 → flowfile-0.3.3}/LICENSE +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/build_backends/build_backends/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/build_backends/build_backends/main.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/build_backends/build_backends/main_prd.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/__main__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/api.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/readme.md +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/AirbyteReader-2b1cf2d8.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/CrossJoin-41efa4cb.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/CrossJoin-cc3ab73c.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/DatabaseConnectionSettings-0c04b2e5.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/DatabaseConnectionSettings-307c4652.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/DatabaseManager-69faa6e1.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/DatabaseReader-e4134cd0.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/DatabaseReader-f50c6558.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/DatabaseWriter-2f570e53.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/DatabaseWriter-d32d75b1.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/ExploreData-5bdae813.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/ExploreData-5eb48389.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/ExternalSource-29489051.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/ExternalSource-e37b6275.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Filter-031332bb.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Filter-a9d08ba1.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Formula-3b900540.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Formula-b8cefc31.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/FuzzyMatch-6857de82.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/FuzzyMatch-dee31153.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/GraphSolver-ca74eb47.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/GroupBy-081b6591.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/GroupBy-ab1ea74b.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Join-41c0f331.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Join-b467376f.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/ManualInput-ac7b9972.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/ManualInput-ffffb80a.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Output-48f81019.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Output-9a87d4ba.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Pivot-ee3e6093.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Pivot-f415e85f.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/PolarsCode-03921254.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/PolarsCode-650322d1.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/PopOver-3bdf8951.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/PopOver-bccfde04.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Read-67fee3a0.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Read-80dc1675.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/RecordCount-a2acd02d.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/RecordId-0c8bcd77.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Sample-60594a3a.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/SecretManager-bbcec2ac.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Select-9540e6ca.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/SettingsSection-48f28104.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/SettingsSection-9c836ecc.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Sort-6dbe3633.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Sort-7ccfa0fe.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/TextToRows-27aab4a8.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/TextToRows-c92d1ec2.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/UnavailableFields-5edd5322.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/UnavailableFields-8143044b.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Union-52460248.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Union-8d9ac7f9.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Unique-b5615727.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Unique-f6962644.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Unpivot-1ff1e938.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/airbyte-292aa232.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/api-3b345d92.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/cross_join-d30c0290.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/database_reader-ce1e55f3.svg +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/database_writer-b4ad0753.svg +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/designer-2394122a.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/designer-4736134f.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/documentation-12216a74.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/documentation-b9545eba.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/dropDown-35135ba8.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/dropDown-d5a4014c.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/dropDownGeneric-1f4e32ec.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/element-icons-9c88a535.woff +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/element-icons-de5eb258.ttf +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/explore_data-8a0a2861.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/fa-brands-400-808443ae.ttf +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/fa-brands-400-d7236a19.woff2 +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/fa-regular-400-54cf6086.ttf +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/fa-regular-400-e3456d12.woff2 +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/fa-solid-900-aa759986.woff2 +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/fa-solid-900-d2f05935.ttf +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/fa-v4compatibility-0ce9033c.woff2 +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/fa-v4compatibility-30f6abf6.ttf +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/filter-d7708bda.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/formula-eeeb1611.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/fullEditor-178376bb.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/fullEditor-f4791c23.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/fuzzy_match-40c161b2.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/genericNodeSettings-1d456350.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/genericNodeSettings-924759c7.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/graph_solver-8b7888b8.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/group_by-80561fc3.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/index-681a3ed0.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/index-f25c9283.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/input_data-ab2eb678.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/join-349043ae.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/manual_input-ae98f31d.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/nodeTitle-cad6fd9d.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/old_join-5d0eb604.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/output-06ec0371.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/pivot-9660df51.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/polars_code-05ce5dc6.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/record_count-dab44eb5.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/record_id-0b15856b.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/sample-693a88b5.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/secretApi-01f07e2c.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/select-b0d0437a.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/selectDynamic-b062bc9b.css +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/selectDynamic-f46a4e3f.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/sort-2aa579f0.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/summarize-2a099231.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/text_to_rows-859b29ea.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/union-2d8609f4.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/unique-1958b98a.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/unpivot-d3cb4b5b.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/view-7a0f0be1.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/vue-codemirror.esm-eb98fc8b.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/assets/vue-content-loader.es-860c0380.js +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/favicon.ico +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/flowfile.svg +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/icons/flowfile.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/images/airbyte.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/images/flowfile.svg +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/images/google.svg +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/images/sheets.png +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/index.html +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/vite.svg +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile/web/static/vue.svg +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/auth/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/auth/jwt.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/auth/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/auth/secrets.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/configs/flow_logger.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/configs/node_store/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/configs/node_store/nodes.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/database/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/database/connection.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/database/init_db.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/database/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/fileExplorer/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/fileExplorer/funcs.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/fileExplorer/utils.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/_extensions/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/_extensions/real_time_interface.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/analytics/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/analytics/analytics_processor.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/analytics/graphic_walker.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/analytics/utils.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/connection_manager/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/connection_manager/_connection_manager.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/connection_manager/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/database_connection_manager/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/extensions.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/create/funcs.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/join/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/pivot_table.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/threaded_processes.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_data_engine/types.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_graph_utils.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_node/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_node/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/flow_node/schema_callback.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/handler.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/manage/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/manage/compatibility_enhancements.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/manage/open_flowfile.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/setting_generator/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/setting_generator/setting_generator.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/setting_generator/settings.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/base_class.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/factory.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/sql_source/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/util/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/util/calculate_layout.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/flowfile/util/execution_orderer.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/routes/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/routes/auth.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/routes/logs.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/routes/public.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/routes/routes.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/run_lock.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/schemas/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/schemas/defaults.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/schemas/external_sources/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/schemas/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/schemas/output_model.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/schemas/schemas.py +0 -0
- {flowfile-0.3.2/flowfile_core/flowfile_core/secrets → flowfile-0.3.3/flowfile_core/flowfile_core/secret_manager}/__init__.py +0 -0
- /flowfile-0.3.2/flowfile_core/flowfile_core/secrets/secrets.py → /flowfile-0.3.3/flowfile_core/flowfile_core/secret_manager/secret_manager.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/utils/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/utils/arrow_reader.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/utils/excel_file_manager.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/utils/fileManager.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/utils/fl_executor.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_core/flowfile_core/utils/utils.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_frame/flowfile_frame/adapters.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/configs.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/create/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/create/funcs.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/create/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/create/pl_types.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/create/read_excel_tables.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/create/utils.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/external_sources/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/external_sources/airbyte_sources/main.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/external_sources/airbyte_sources/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/external_sources/sql_source/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/external_sources/sql_source/main.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/external_sources/sql_source/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/flow_logger.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/funcs.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/main.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/polars_fuzzy_match/matcher.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/polars_fuzzy_match/models.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/polars_fuzzy_match/pre_process.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/polars_fuzzy_match/process.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/polars_fuzzy_match/utils.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/process_manager.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/routes.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/secrets.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/spawner.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/flowfile_worker/flowfile_worker/utils.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/readme-pypi.md +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/test_utils/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/test_utils/postgres/__init__.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/test_utils/postgres/commands.py +0 -0
- {flowfile-0.3.2 → flowfile-0.3.3}/test_utils/postgres/fixtures.py +0 -0
|
@@ -18,8 +18,9 @@ os.environ['SINGLE_FILE_MODE'] = "1"
|
|
|
18
18
|
from flowfile.web import start_server as start_web_ui
|
|
19
19
|
from flowfile.api import open_graph_in_editor
|
|
20
20
|
from flowfile_frame.flow_frame import (
|
|
21
|
-
FlowFrame
|
|
21
|
+
FlowFrame
|
|
22
22
|
)
|
|
23
|
+
from flowfile_frame import read_csv, read_parquet, from_dict, concat
|
|
23
24
|
from flowfile_frame.expr import (
|
|
24
25
|
col, lit, column, cum_count, len,
|
|
25
26
|
sum, min, max, mean, count, when
|
|
@@ -135,6 +135,9 @@ def start_server(host="127.0.0.1", port=63578, open_browser=True):
|
|
|
135
135
|
|
|
136
136
|
# Import core app
|
|
137
137
|
from flowfile_core.main import run, app as core_app
|
|
138
|
+
from flowfile_core.configs.settings import OFFLOAD_TO_WORKER
|
|
139
|
+
|
|
140
|
+
OFFLOAD_TO_WORKER.value = True
|
|
138
141
|
|
|
139
142
|
# Extend the core app with web UI routes and worker functionality
|
|
140
143
|
extend_app(core_app)
|
|
@@ -11,16 +11,27 @@ logger = logging.getLogger('PipelineHandler')
|
|
|
11
11
|
logger.setLevel(logging.INFO)
|
|
12
12
|
logger.propagate = False
|
|
13
13
|
|
|
14
|
-
#
|
|
15
|
-
|
|
14
|
+
# Clear any existing handlers
|
|
15
|
+
if logger.hasHandlers():
|
|
16
|
+
logger.handlers.clear()
|
|
17
|
+
|
|
18
|
+
# Try to determine the best output stream
|
|
19
|
+
output_stream = None
|
|
20
|
+
if hasattr(sys.stdout, 'isatty') and sys.stdout.isatty():
|
|
21
|
+
output_stream = sys.stdout
|
|
22
|
+
elif hasattr(sys.stderr, 'isatty') and sys.stderr.isatty():
|
|
23
|
+
output_stream = sys.stderr
|
|
24
|
+
else:
|
|
25
|
+
# Use __stdout__ for debugger environments (PyDev, PyCharm, etc.)
|
|
26
|
+
output_stream = sys.__stdout__
|
|
27
|
+
|
|
28
|
+
console_handler = logging.StreamHandler(output_stream)
|
|
16
29
|
console_handler.setLevel(logging.INFO)
|
|
17
30
|
|
|
18
31
|
# Create formatter
|
|
19
32
|
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
20
33
|
console_handler.setFormatter(formatter)
|
|
21
34
|
|
|
22
|
-
if logger.hasHandlers():
|
|
23
|
-
logger.handlers.clear()
|
|
24
35
|
logger.addHandler(console_handler)
|
|
25
36
|
|
|
26
37
|
# Create logs directory in temp at startup
|
|
@@ -5,10 +5,10 @@ import os
|
|
|
5
5
|
import tempfile
|
|
6
6
|
import argparse
|
|
7
7
|
|
|
8
|
-
from databases import DatabaseURL
|
|
9
8
|
from passlib.context import CryptContext
|
|
10
9
|
from starlette.config import Config
|
|
11
|
-
|
|
10
|
+
|
|
11
|
+
from flowfile_core.configs.utils import MutableBool
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
# Constants for server and worker configuration
|
|
@@ -18,6 +18,9 @@ DEFAULT_WORKER_PORT = 63579
|
|
|
18
18
|
SINGLE_FILE_MODE: bool = os.environ.get("SINGLE_FILE_MODE", "0") == "1"
|
|
19
19
|
|
|
20
20
|
|
|
21
|
+
OFFLOAD_TO_WORKER = MutableBool(True)
|
|
22
|
+
|
|
23
|
+
|
|
21
24
|
def parse_args():
|
|
22
25
|
"""Parse command line arguments"""
|
|
23
26
|
parser = argparse.ArgumentParser(description="Flowfile Backend Server")
|
|
@@ -79,7 +82,6 @@ args = parse_args()
|
|
|
79
82
|
SERVER_HOST = args.host if args.host is not None else DEFAULT_SERVER_HOST
|
|
80
83
|
SERVER_PORT = args.port if args.port is not None else DEFAULT_SERVER_PORT
|
|
81
84
|
WORKER_PORT = args.worker_port if args.worker_port is not None else int(os.getenv("WORKER_PORT", DEFAULT_WORKER_PORT))
|
|
82
|
-
# Worker configuration
|
|
83
85
|
WORKER_HOST = os.getenv("WORKER_HOST", "0.0.0.0" if platform.system() != "Windows" else "127.0.0.1")
|
|
84
86
|
|
|
85
87
|
config = Config(".env")
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class MutableBool:
|
|
6
|
+
value: bool
|
|
7
|
+
|
|
8
|
+
def __bool__(self) -> bool:
|
|
9
|
+
"""Allow direct boolean evaluation"""
|
|
10
|
+
return self.value
|
|
11
|
+
|
|
12
|
+
def __eq__(self, other) -> bool:
|
|
13
|
+
"""Allow equality comparison with booleans"""
|
|
14
|
+
if isinstance(other, bool):
|
|
15
|
+
return self.value == other
|
|
16
|
+
elif isinstance(other, MutableBool):
|
|
17
|
+
return self.value == other.value
|
|
18
|
+
return NotImplemented
|
|
@@ -15,7 +15,7 @@ from flowfile_core.configs import logger
|
|
|
15
15
|
from flowfile_core.configs.flow_logger import FlowLogger
|
|
16
16
|
from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
|
|
17
17
|
from flowfile_core.flowfile.sources.external_sources.airbyte_sources.settings import airbyte_settings_from_config
|
|
18
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import
|
|
18
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import cast_str_to_polars_type, FlowfileColumn
|
|
19
19
|
from flowfile_core.flowfile.flow_data_engine.fuzzy_matching.settings_validator import (calculate_fuzzy_match_schema,
|
|
20
20
|
pre_calculate_pivot_schema)
|
|
21
21
|
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
@@ -25,7 +25,7 @@ from flowfile_core.flowfile.flow_data_engine.read_excel_tables import get_open_x
|
|
|
25
25
|
from flowfile_core.flowfile.sources import external_sources
|
|
26
26
|
from flowfile_core.schemas import input_schema, schemas, transform_schema
|
|
27
27
|
from flowfile_core.schemas.output_model import TableExample, NodeData, NodeResult, RunInformation
|
|
28
|
-
from flowfile_core.flowfile.utils import snake_case_to_camel_case
|
|
28
|
+
from flowfile_core.flowfile.utils import snake_case_to_camel_case, _handle_raw_data
|
|
29
29
|
from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
|
|
30
30
|
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
31
31
|
from flowfile_core.flowfile.util.execution_orderer import determine_execution_order
|
|
@@ -34,7 +34,7 @@ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_op
|
|
|
34
34
|
ExternalDatabaseFetcher,
|
|
35
35
|
ExternalDatabaseWriter,
|
|
36
36
|
ExternalDfFetcher)
|
|
37
|
-
from flowfile_core.
|
|
37
|
+
from flowfile_core.secret_manager.secret_manager import get_encrypted_secret, decrypt_secret
|
|
38
38
|
from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils, models as sql_models
|
|
39
39
|
from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import SqlSource, BaseSqlSource
|
|
40
40
|
from flowfile_core.flowfile.database_connection_manager.db_connections import get_local_database_connection
|
|
@@ -205,19 +205,12 @@ class FlowGraph:
|
|
|
205
205
|
sample_size: int = 10000
|
|
206
206
|
|
|
207
207
|
def analysis_preparation(flowfile_table: FlowDataEngine):
|
|
208
|
-
if flowfile_table.number_of_records
|
|
209
|
-
|
|
210
|
-
number_of_records = ExternalDfFetcher(
|
|
211
|
-
lf=flowfile_table.data_frame,
|
|
212
|
-
operation_type="calculate_number_of_records",
|
|
213
|
-
flow_id=self.flow_id,
|
|
214
|
-
node_id=node.node_id,
|
|
215
|
-
).result
|
|
208
|
+
if flowfile_table.number_of_records <= 0:
|
|
209
|
+
number_of_records = flowfile_table.get_number_of_records(calculate_in_worker_process=True)
|
|
216
210
|
else:
|
|
217
211
|
number_of_records = flowfile_table.number_of_records
|
|
218
212
|
if number_of_records > sample_size:
|
|
219
213
|
flowfile_table = flowfile_table.get_sample(sample_size, random=True)
|
|
220
|
-
|
|
221
214
|
external_sampler = ExternalDfFetcher(
|
|
222
215
|
lf=flowfile_table.data_frame,
|
|
223
216
|
file_ref="__gf_walker"+node.hash,
|
|
@@ -225,7 +218,7 @@ class FlowGraph:
|
|
|
225
218
|
node_id=node.node_id,
|
|
226
219
|
flow_id=self.flow_id,
|
|
227
220
|
)
|
|
228
|
-
node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref
|
|
221
|
+
node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref)
|
|
229
222
|
return flowfile_table
|
|
230
223
|
|
|
231
224
|
def schema_callback():
|
|
@@ -441,7 +434,7 @@ class FlowGraph:
|
|
|
441
434
|
def add_formula(self, function_settings: input_schema.NodeFormula):
|
|
442
435
|
error = ""
|
|
443
436
|
if function_settings.function.field.data_type not in (None, "Auto"):
|
|
444
|
-
output_type =
|
|
437
|
+
output_type = cast_str_to_polars_type(function_settings.function.field.data_type)
|
|
445
438
|
else:
|
|
446
439
|
output_type = None
|
|
447
440
|
if output_type not in (None, "Auto"):
|
|
@@ -486,7 +479,8 @@ class FlowGraph:
|
|
|
486
479
|
function=_func,
|
|
487
480
|
input_columns=[],
|
|
488
481
|
node_type='cross_join',
|
|
489
|
-
setting_input=cross_join_settings
|
|
482
|
+
setting_input=cross_join_settings,
|
|
483
|
+
input_node_ids=cross_join_settings.depending_on_ids)
|
|
490
484
|
return self
|
|
491
485
|
|
|
492
486
|
def add_join(self, join_settings: input_schema.NodeJoin) -> "FlowGraph":
|
|
@@ -1044,11 +1038,10 @@ class FlowGraph:
|
|
|
1044
1038
|
return self
|
|
1045
1039
|
|
|
1046
1040
|
def add_datasource(self, input_file: input_schema.NodeDatasource | input_schema.NodeManualInput):
|
|
1047
|
-
|
|
1048
1041
|
if isinstance(input_file, input_schema.NodeManualInput):
|
|
1049
|
-
|
|
1042
|
+
_handle_raw_data(input_file)
|
|
1043
|
+
input_data = FlowDataEngine(input_file.raw_data_format)
|
|
1050
1044
|
ref = 'manual_input'
|
|
1051
|
-
|
|
1052
1045
|
else:
|
|
1053
1046
|
input_data = FlowDataEngine(path_ref=input_file.file_ref)
|
|
1054
1047
|
ref = 'datasource'
|
|
@@ -1061,7 +1054,9 @@ class FlowGraph:
|
|
|
1061
1054
|
|
|
1062
1055
|
if not input_file.node_id in set(start_node.node_id for start_node in self._flow_starts):
|
|
1063
1056
|
self._flow_starts.append(node)
|
|
1057
|
+
|
|
1064
1058
|
else:
|
|
1059
|
+
input_data.collect()
|
|
1065
1060
|
node = FlowNode(input_file.node_id, function=input_data,
|
|
1066
1061
|
setting_input=input_file,
|
|
1067
1062
|
name=ref, node_type=ref, parent_uuid=self.uuid)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from flowfile_core.schemas.input_schema import FullDatabaseConnection, FullDatabaseConnectionInterface
|
|
2
2
|
from sqlalchemy.orm import Session
|
|
3
3
|
from flowfile_core.database.models import DatabaseConnection as DBConnectionModel, Secret
|
|
4
|
-
from flowfile_core.
|
|
4
|
+
from flowfile_core.secret_manager.secret_manager import store_secret, SecretInput, decrypt_secret
|
|
5
5
|
from flowfile_core.database.connection import get_db_context
|
|
6
6
|
|
|
7
7
|
|
|
@@ -17,6 +17,7 @@ from pyarrow.parquet import ParquetFile
|
|
|
17
17
|
# Local imports - Core
|
|
18
18
|
from flowfile_core.configs import logger
|
|
19
19
|
from flowfile_core.configs.flow_logger import NodeLogger
|
|
20
|
+
from flowfile_core.configs.settings import OFFLOAD_TO_WORKER
|
|
20
21
|
from flowfile_core.schemas import (
|
|
21
22
|
input_schema,
|
|
22
23
|
transform_schema as transform_schemas
|
|
@@ -29,7 +30,7 @@ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import (
|
|
|
29
30
|
FlowfileColumn,
|
|
30
31
|
convert_stats_to_column_info
|
|
31
32
|
)
|
|
32
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import
|
|
33
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
|
|
33
34
|
from flowfile_core.flowfile.flow_data_engine.fuzzy_matching.prepare_for_fuzzy_match import prepare_for_fuzzy_match
|
|
34
35
|
from flowfile_core.flowfile.flow_data_engine.join import (
|
|
35
36
|
verify_join_select_integrity,
|
|
@@ -109,7 +110,7 @@ class FlowDataEngine:
|
|
|
109
110
|
# flow_id: int = None # TODO: Implement flow_id
|
|
110
111
|
|
|
111
112
|
def __init__(self,
|
|
112
|
-
raw_data: Union[List[Dict], List[Any], 'ParquetFile', pl.DataFrame, pl.LazyFrame] = None,
|
|
113
|
+
raw_data: Union[List[Dict], List[Any], 'ParquetFile', pl.DataFrame, pl.LazyFrame, input_schema.RawData] = None,
|
|
113
114
|
path_ref: str = None,
|
|
114
115
|
name: str = None,
|
|
115
116
|
optimize_memory: bool = True,
|
|
@@ -147,7 +148,10 @@ class FlowDataEngine:
|
|
|
147
148
|
|
|
148
149
|
def _handle_raw_data(self, raw_data, number_of_records, optimize_memory):
|
|
149
150
|
"""Process different types of input data."""
|
|
150
|
-
|
|
151
|
+
|
|
152
|
+
if isinstance(raw_data, input_schema.RawData):
|
|
153
|
+
self._handle_raw_data_format(raw_data)
|
|
154
|
+
elif isinstance(raw_data, pl.DataFrame):
|
|
151
155
|
self._handle_polars_dataframe(raw_data, number_of_records)
|
|
152
156
|
elif isinstance(raw_data, pl.LazyFrame):
|
|
153
157
|
self._handle_polars_lazy_frame(raw_data, number_of_records, optimize_memory)
|
|
@@ -190,6 +194,20 @@ class FlowDataEngine:
|
|
|
190
194
|
self.number_of_records = 1
|
|
191
195
|
self.data_frame = pl.DataFrame([data])
|
|
192
196
|
|
|
197
|
+
def _handle_raw_data_format(self, raw_data: input_schema.RawData):
|
|
198
|
+
"""Create a FlowDataEngine from a RawData object."""
|
|
199
|
+
flowfile_schema = list(FlowfileColumn.create_from_minimal_field_info(c) for c in raw_data.columns)
|
|
200
|
+
polars_schema = pl.Schema([(flowfile_column.column_name, flowfile_column.get_polars_type().pl_datatype)
|
|
201
|
+
for flowfile_column in flowfile_schema])
|
|
202
|
+
try:
|
|
203
|
+
df = pl.DataFrame(raw_data.data, polars_schema)
|
|
204
|
+
except TypeError as e:
|
|
205
|
+
logger.warning(f"Could not parse the data with the schema:\n{e}")
|
|
206
|
+
df = pl.DataFrame(raw_data.data)
|
|
207
|
+
self.number_of_records = len(df)
|
|
208
|
+
self.data_frame = df.lazy()
|
|
209
|
+
self.lazy = True
|
|
210
|
+
|
|
193
211
|
def _handle_list_input(self, data: List):
|
|
194
212
|
"""Handle list input."""
|
|
195
213
|
number_of_records = len(data)
|
|
@@ -462,6 +480,9 @@ class FlowDataEngine:
|
|
|
462
480
|
return self.data_frame.collect(engine="streaming" if self._streamable else "auto").to_dicts()
|
|
463
481
|
return self.data_frame.to_dicts()
|
|
464
482
|
|
|
483
|
+
def to_dict(self) -> Dict[str, List]:
|
|
484
|
+
return self.data_frame.collect(engine="streaming" if self._streamable else "auto").to_dict(as_series=False)
|
|
485
|
+
|
|
465
486
|
@classmethod
|
|
466
487
|
def create_from_external_source(cls, external_source: ExternalDataSource) -> "FlowDataEngine":
|
|
467
488
|
"""Create a FlowDataEngine from an external data source."""
|
|
@@ -484,7 +505,7 @@ class FlowDataEngine:
|
|
|
484
505
|
"""Create a FlowDataEngine from a schema definition."""
|
|
485
506
|
pl_schema = []
|
|
486
507
|
for i, flow_file_column in enumerate(schema):
|
|
487
|
-
pl_schema.append((flow_file_column.name,
|
|
508
|
+
pl_schema.append((flow_file_column.name, cast_str_to_polars_type(flow_file_column.data_type)))
|
|
488
509
|
schema[i].col_index = i
|
|
489
510
|
df = pl.LazyFrame(schema=pl_schema)
|
|
490
511
|
return cls(df, schema=schema, calculate_schema_stats=False, number_of_records=0)
|
|
@@ -824,7 +845,7 @@ class FlowDataEngine:
|
|
|
824
845
|
Returns:
|
|
825
846
|
FlowDataEngine: New instance with sampled data
|
|
826
847
|
"""
|
|
827
|
-
n_records = min(n_rows, self.
|
|
848
|
+
n_records = min(n_rows, self.get_number_of_records(calculate_in_worker_process=True))
|
|
828
849
|
logging.info(f'Getting sample of {n_rows} rows')
|
|
829
850
|
|
|
830
851
|
if random:
|
|
@@ -1158,14 +1179,25 @@ class FlowDataEngine:
|
|
|
1158
1179
|
self.number_of_records = 0
|
|
1159
1180
|
self._lazy = True
|
|
1160
1181
|
|
|
1161
|
-
def
|
|
1182
|
+
def _calculate_number_of_records_in_worker(self) -> int:
|
|
1183
|
+
number_of_records = ExternalDfFetcher(
|
|
1184
|
+
lf=self.data_frame,
|
|
1185
|
+
operation_type="calculate_number_of_records",
|
|
1186
|
+
flow_id=-1,
|
|
1187
|
+
node_id=-1,
|
|
1188
|
+
wait_on_completion=True
|
|
1189
|
+
).result
|
|
1190
|
+
return number_of_records
|
|
1191
|
+
|
|
1192
|
+
def get_number_of_records(self, warn: bool = False, force_calculate: bool = False,
|
|
1193
|
+
calculate_in_worker_process: bool = False) -> int:
|
|
1162
1194
|
"""
|
|
1163
1195
|
Get the total number of records in the DataFrame.
|
|
1164
1196
|
|
|
1165
1197
|
Args:
|
|
1166
1198
|
warn: Whether to warn about expensive operations
|
|
1167
1199
|
force_calculate: Whether to force recalculation
|
|
1168
|
-
|
|
1200
|
+
calculate_in_worker_process: Whether to offload compute to the worker process
|
|
1169
1201
|
Returns:
|
|
1170
1202
|
int: Number of records
|
|
1171
1203
|
|
|
@@ -1174,22 +1206,24 @@ class FlowDataEngine:
|
|
|
1174
1206
|
"""
|
|
1175
1207
|
if self.is_future and not self.is_collected:
|
|
1176
1208
|
return -1
|
|
1177
|
-
|
|
1209
|
+
calculate_in_worker_process = False if not OFFLOAD_TO_WORKER.value else calculate_in_worker_process
|
|
1178
1210
|
if self.number_of_records is None or self.number_of_records < 0 or force_calculate:
|
|
1179
1211
|
if self._number_of_records_callback is not None:
|
|
1180
1212
|
self._number_of_records_callback(self)
|
|
1181
1213
|
|
|
1182
1214
|
if self.lazy:
|
|
1183
|
-
if
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1215
|
+
if calculate_in_worker_process:
|
|
1216
|
+
self.number_of_records = self._calculate_number_of_records_in_worker()
|
|
1217
|
+
else:
|
|
1218
|
+
if warn:
|
|
1219
|
+
logger.warning('Calculating the number of records this can be expensive on a lazy frame')
|
|
1220
|
+
try:
|
|
1221
|
+
self.number_of_records = self.data_frame.select(pl.len()).collect(
|
|
1222
|
+
engine="streaming" if self._streamable else "auto")[0, 0]
|
|
1223
|
+
except Exception:
|
|
1224
|
+
raise ValueError('Could not get number of records')
|
|
1190
1225
|
else:
|
|
1191
1226
|
self.number_of_records = self.data_frame.__len__()
|
|
1192
|
-
|
|
1193
1227
|
return self.number_of_records
|
|
1194
1228
|
|
|
1195
1229
|
# Properties
|
|
@@ -1518,4 +1552,7 @@ def execute_polars_code(*flowfile_tables: "FlowDataEngine", code: str) -> "FlowD
|
|
|
1518
1552
|
kwargs = {'input_df': flowfile_tables[0].data_frame}
|
|
1519
1553
|
else:
|
|
1520
1554
|
kwargs = {f'input_df_{i+1}': flowfile_table.data_frame for i, flowfile_table in enumerate(flowfile_tables)}
|
|
1521
|
-
|
|
1555
|
+
df = polars_executable(**kwargs)
|
|
1556
|
+
if isinstance(df, pl.DataFrame):
|
|
1557
|
+
logger.warning("Got a non lazy DataFrame, possibly harming performance, if possible, try to use a lazy method")
|
|
1558
|
+
return FlowDataEngine(df)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from typing import Optional, Any, List, Dict, Literal
|
|
3
3
|
from flowfile_core.schemas import input_schema
|
|
4
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import
|
|
4
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
|
|
5
5
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.polars_type import PlType
|
|
6
6
|
from polars import datatypes
|
|
7
7
|
import polars as pl
|
|
@@ -9,6 +9,37 @@ import polars as pl
|
|
|
9
9
|
DataTypeGroup = Literal['numeric', 'str', 'date']
|
|
10
10
|
|
|
11
11
|
|
|
12
|
+
def convert_pl_type_to_string(pl_type: pl.DataType, inner: bool = False) -> str:
|
|
13
|
+
if isinstance(pl_type, pl.List):
|
|
14
|
+
inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
|
|
15
|
+
return f"pl.List({inner_str})"
|
|
16
|
+
elif isinstance(pl_type, pl.Array):
|
|
17
|
+
inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
|
|
18
|
+
return f"pl.Array({inner_str})"
|
|
19
|
+
elif isinstance(pl_type, pl.Decimal):
|
|
20
|
+
precision = pl_type.precision if hasattr(pl_type, 'precision') else None
|
|
21
|
+
scale = pl_type.scale if hasattr(pl_type, 'scale') else None
|
|
22
|
+
if precision is not None and scale is not None:
|
|
23
|
+
return f"pl.Decimal({precision}, {scale})"
|
|
24
|
+
elif precision is not None:
|
|
25
|
+
return f"pl.Decimal({precision})"
|
|
26
|
+
else:
|
|
27
|
+
return "pl.Decimal()"
|
|
28
|
+
elif isinstance(pl_type, pl.Struct):
|
|
29
|
+
# Handle Struct with field definitions
|
|
30
|
+
fields = []
|
|
31
|
+
if hasattr(pl_type, 'fields'):
|
|
32
|
+
for field in pl_type.fields:
|
|
33
|
+
field_name = field.name
|
|
34
|
+
field_type = convert_pl_type_to_string(field.dtype, inner=True)
|
|
35
|
+
fields.append(f'pl.Field("{field_name}", {field_type})')
|
|
36
|
+
field_str = ", ".join(fields)
|
|
37
|
+
return f"pl.Struct([{field_str}])"
|
|
38
|
+
else:
|
|
39
|
+
# For base types, we want the full pl.TypeName format
|
|
40
|
+
return str(pl_type.base_type()) if not inner else f"pl.{pl_type}"
|
|
41
|
+
|
|
42
|
+
|
|
12
43
|
@dataclass
|
|
13
44
|
class FlowfileColumn:
|
|
14
45
|
column_name: str
|
|
@@ -28,7 +59,7 @@ class FlowfileColumn:
|
|
|
28
59
|
__perc_unique: Optional[float]
|
|
29
60
|
|
|
30
61
|
def __init__(self, polars_type: PlType):
|
|
31
|
-
self.data_type =
|
|
62
|
+
self.data_type = convert_pl_type_to_string(polars_type.pl_datatype)
|
|
32
63
|
self.size = polars_type.count - polars_type.null_count
|
|
33
64
|
self.max_value = polars_type.max
|
|
34
65
|
self.min_value = polars_type.min
|
|
@@ -53,7 +84,7 @@ class FlowfileColumn:
|
|
|
53
84
|
|
|
54
85
|
@classmethod
|
|
55
86
|
def from_input(cls, column_name: str, data_type: str, **kwargs) -> "FlowfileColumn":
|
|
56
|
-
pl_type =
|
|
87
|
+
pl_type = cast_str_to_polars_type(data_type)
|
|
57
88
|
if pl_type is not None:
|
|
58
89
|
data_type = pl_type
|
|
59
90
|
return cls(PlType(column_name=column_name, pl_datatype=data_type, **kwargs))
|
|
@@ -129,12 +160,9 @@ class FlowfileColumn:
|
|
|
129
160
|
return 'date'
|
|
130
161
|
|
|
131
162
|
def get_polars_type(self) -> PlType:
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
pl_datatype = None
|
|
136
|
-
|
|
137
|
-
return PlType(pl_datatype=pl_datatype, **self.__dict__)
|
|
163
|
+
pl_datatype = cast_str_to_polars_type(self.data_type)
|
|
164
|
+
pl_type = PlType(pl_datatype=pl_datatype, **self.__dict__)
|
|
165
|
+
return pl_type
|
|
138
166
|
|
|
139
167
|
def update_type_from_polars_type(self, pl_type: PlType):
|
|
140
168
|
self.data_type = str(pl_type.pl_datatype.base_type())
|
|
@@ -142,3 +170,8 @@ class FlowfileColumn:
|
|
|
142
170
|
|
|
143
171
|
def convert_stats_to_column_info(stats: List[Dict]) -> List[FlowfileColumn]:
|
|
144
172
|
return [FlowfileColumn.create_from_polars_type(PlType(**c)) for c in stats]
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def convert_pl_schema_to_raw_data_format(pl_schema: pl.Schema) -> List[input_schema.MinimalFieldInfo]:
|
|
176
|
+
return [FlowfileColumn.create_from_polars_type(PlType(column_name=k, pl_datatype=v)).get_minimal_field_info()
|
|
177
|
+
for k, v in pl_schema.items()]
|
flowfile-0.3.3/flowfile_core/flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import polars as pl
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
dtype_to_pl = {
|
|
5
|
+
'int': pl.Int64,
|
|
6
|
+
'integer': pl.Int64,
|
|
7
|
+
'char': pl.String,
|
|
8
|
+
'fixed decimal': pl.Float32,
|
|
9
|
+
'double': pl.Float64,
|
|
10
|
+
'float': pl.Float64,
|
|
11
|
+
'bool': pl.Boolean,
|
|
12
|
+
'byte': pl.UInt8,
|
|
13
|
+
'bit': pl.Binary,
|
|
14
|
+
'date': pl.Date,
|
|
15
|
+
'datetime': pl.Datetime,
|
|
16
|
+
'string': pl.String,
|
|
17
|
+
'str': pl.String,
|
|
18
|
+
'time': pl.Time,
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def safe_eval_pl_type(type_string: str):
|
|
23
|
+
"""
|
|
24
|
+
Safely evaluate a Polars type string with restricted namespace.
|
|
25
|
+
Only allows Polars types and basic Python literals.
|
|
26
|
+
"""
|
|
27
|
+
# Define allowed names in the evaluation namespace
|
|
28
|
+
safe_dict = {
|
|
29
|
+
# Polars module and types
|
|
30
|
+
'pl': pl,
|
|
31
|
+
|
|
32
|
+
# Basic Python built-ins for literals
|
|
33
|
+
'int': int,
|
|
34
|
+
'str': str,
|
|
35
|
+
'float': float,
|
|
36
|
+
'bool': bool,
|
|
37
|
+
'list': list,
|
|
38
|
+
'dict': dict,
|
|
39
|
+
'tuple': tuple,
|
|
40
|
+
|
|
41
|
+
# Disable dangerous built-ins
|
|
42
|
+
'__builtins__': {},
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
return eval(type_string, safe_dict, {})
|
|
47
|
+
except Exception as e:
|
|
48
|
+
raise ValueError(f"Failed to safely evaluate type string '{type_string}': {e}")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
dtype_to_pl_str = {k: v.__name__ for k, v in dtype_to_pl.items()}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_polars_type(dtype: str):
|
|
55
|
+
if 'pl.' in dtype:
|
|
56
|
+
try:
|
|
57
|
+
return safe_eval_pl_type(dtype)
|
|
58
|
+
except Exception as e:
|
|
59
|
+
return pl.String
|
|
60
|
+
pl_datetype = dtype_to_pl.get(dtype.lower())
|
|
61
|
+
if pl_datetype is not None:
|
|
62
|
+
return pl_datetype
|
|
63
|
+
elif hasattr(pl, dtype):
|
|
64
|
+
return getattr(pl, dtype)
|
|
65
|
+
else:
|
|
66
|
+
return pl.String
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def cast_str_to_polars_type(dtype: str) -> pl.DataType:
|
|
70
|
+
pl_type = get_polars_type(dtype)
|
|
71
|
+
if hasattr(pl_type, '__call__'):
|
|
72
|
+
return pl_type()
|
|
73
|
+
else:
|
|
74
|
+
return pl_type
|
|
75
|
+
|
|
@@ -3,6 +3,7 @@ from typing import Dict, Any, Callable
|
|
|
3
3
|
import textwrap
|
|
4
4
|
import ast
|
|
5
5
|
import time
|
|
6
|
+
from io import BytesIO
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
def remove_comments_and_docstrings(source: str) -> str:
|
|
@@ -174,6 +175,7 @@ class PolarsCodeParser:
|
|
|
174
175
|
'False': False,
|
|
175
176
|
'None': None,
|
|
176
177
|
'time': time,
|
|
178
|
+
'BytesIO': BytesIO
|
|
177
179
|
}
|
|
178
180
|
|
|
179
181
|
@staticmethod
|
|
@@ -256,7 +258,6 @@ class PolarsCodeParser:
|
|
|
256
258
|
|
|
257
259
|
# Wrap the code in a function
|
|
258
260
|
wrapped_code = self._wrap_in_function(code, num_inputs)
|
|
259
|
-
|
|
260
261
|
try:
|
|
261
262
|
# Create namespace for execution
|
|
262
263
|
local_namespace: Dict[str, Any] = {}
|
|
@@ -1,14 +1,27 @@
|
|
|
1
1
|
from faker import Faker
|
|
2
2
|
from functools import partial
|
|
3
|
+
from math import ceil
|
|
3
4
|
from random import randint
|
|
4
5
|
import polars as pl
|
|
5
6
|
from typing import List, Dict, Any, Generator
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
def create_fake_data(n_records: int = 1000) -> pl.DataFrame:
|
|
9
|
+
def create_fake_data(n_records: int = 1000, optimized: bool = True) -> pl.DataFrame:
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
n_records (): Number of records to return
|
|
14
|
+
optimized (): Indicator if creation should be optimized, will result in more identical rows when True
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
pl.DataFrame
|
|
18
|
+
"""
|
|
9
19
|
fake = Faker()
|
|
10
|
-
selector = partial(randint,0)
|
|
11
|
-
|
|
20
|
+
selector = partial(randint, 0)
|
|
21
|
+
|
|
22
|
+
max_n_records = min(10_000, n_records) if optimized else n_records
|
|
23
|
+
|
|
24
|
+
min_range = partial(min, max_n_records)
|
|
12
25
|
# Pre-generation of static data
|
|
13
26
|
cities = [fake.city() for _ in range(min_range(7000))]
|
|
14
27
|
companies = [fake.company() for _ in range(min_range(100_000))]
|
|
@@ -19,7 +32,7 @@ def create_fake_data(n_records: int = 1000) -> pl.DataFrame:
|
|
|
19
32
|
first_names = [fake.first_name() for _ in range(min_range(100_000))]
|
|
20
33
|
last_names = [fake.last_name() for _ in range(min_range(50_000))]
|
|
21
34
|
domain_names = [fake.domain_name() for _ in range(10)]
|
|
22
|
-
sales_data = [fake.random_int(0, 1000) for _ in range(
|
|
35
|
+
sales_data = [fake.random_int(0, 1000) for _ in range(max_n_records)]
|
|
23
36
|
|
|
24
37
|
def generate_name():
|
|
25
38
|
return f"{first_names[selector(min_range(100_000))-1]} {last_names[selector(min_range(50_000))-1]}"
|
|
@@ -32,9 +45,8 @@ def create_fake_data(n_records: int = 1000) -> pl.DataFrame:
|
|
|
32
45
|
|
|
33
46
|
def generate_phone_number():
|
|
34
47
|
return fake.phone_number()
|
|
35
|
-
|
|
36
48
|
data = []
|
|
37
|
-
for i in range(
|
|
49
|
+
for i in range(max_n_records):
|
|
38
50
|
name = generate_name()
|
|
39
51
|
data.append(dict(
|
|
40
52
|
ID=randint(1, 1000000),
|
|
@@ -47,8 +59,14 @@ def create_fake_data(n_records: int = 1000) -> pl.DataFrame:
|
|
|
47
59
|
Work=companies[selector(min_range(100_000))-1],
|
|
48
60
|
Zipcode=zipcodes[selector(min_range(200_000))-1],
|
|
49
61
|
Country=countries[selector(min_range(50))-1],
|
|
50
|
-
sales_data=sales_data[selector(
|
|
62
|
+
sales_data=sales_data[selector(max_n_records)-1]
|
|
51
63
|
))
|
|
64
|
+
if max_n_records < n_records:
|
|
65
|
+
n_duplicates: int = ceil(n_records / max_n_records)
|
|
66
|
+
output = []
|
|
67
|
+
for _ in range(n_duplicates):
|
|
68
|
+
output.extend(data)
|
|
69
|
+
data = output[:n_records]
|
|
52
70
|
|
|
53
71
|
return pl.DataFrame(data)
|
|
54
72
|
|
|
@@ -190,7 +190,7 @@ class BaseFetcher:
|
|
|
190
190
|
logger.info('Already running the fetching')
|
|
191
191
|
return
|
|
192
192
|
|
|
193
|
-
sleep_time =
|
|
193
|
+
sleep_time = .5
|
|
194
194
|
self.running = True
|
|
195
195
|
while not self.stop_event.is_set():
|
|
196
196
|
try:
|
|
@@ -205,7 +205,8 @@ class BaseFetcher:
|
|
|
205
205
|
break
|
|
206
206
|
elif status.status == 'Unknown Error':
|
|
207
207
|
self._handle_error(-1,
|
|
208
|
-
'There was an unknown error with the process,
|
|
208
|
+
'There was an unknown error with the process, '
|
|
209
|
+
'and the process got killed by the server')
|
|
209
210
|
break
|
|
210
211
|
else:
|
|
211
212
|
self._handle_error(2, r.text)
|
|
@@ -284,7 +285,7 @@ class ExternalDfFetcher(BaseFetcher):
|
|
|
284
285
|
|
|
285
286
|
def __init__(self, flow_id: int, node_id: int | str, lf: pl.LazyFrame | pl.DataFrame, file_ref: str = None,
|
|
286
287
|
wait_on_completion: bool = True,
|
|
287
|
-
operation_type: OperationType = 'store'):
|
|
288
|
+
operation_type: OperationType = 'store', offload_to_worker: bool = True):
|
|
288
289
|
super().__init__(file_ref=file_ref)
|
|
289
290
|
lf = lf.lazy() if isinstance(lf, pl.DataFrame) else lf
|
|
290
291
|
r = trigger_df_operation(lf=lf, file_ref=self.file_ref, operation_type=operation_type,
|