flowcept 0.8.8__tar.gz → 0.8.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowcept-0.8.8 → flowcept-0.8.10}/.github/workflows/checks.yml +1 -1
- {flowcept-0.8.8 → flowcept-0.8.10}/PKG-INFO +14 -5
- {flowcept-0.8.8 → flowcept-0.8.10}/README.md +13 -4
- {flowcept-0.8.8 → flowcept-0.8.10}/docs/getstarted.rst +2 -1
- flowcept-0.8.10/docs/index.rst +18 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/pyproject.toml +3 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/resources/sample_settings.yaml +10 -13
- flowcept-0.8.10/src/flowcept/cli.py +260 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/configs.py +3 -5
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/base_interceptor.py +4 -13
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/consumers/document_inserter.py +3 -17
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/instrumentation/task_capture.py +11 -7
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/version.py +1 -1
- flowcept-0.8.8/docs/index.rst +0 -15
- {flowcept-0.8.8 → flowcept-0.8.10}/.github/workflows/create-release-n-publish.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/.github/workflows/run-llm-tests.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/.github/workflows/run-tests-all-dbs.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/.github/workflows/run-tests-in-container.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/.github/workflows/run-tests-kafka.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/.github/workflows/run-tests-py313.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/.github/workflows/run-tests-simple.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/.github/workflows/run-tests.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/.github/workflows/run_examples.sh +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/.github/workflows/version_bumper.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/.gitignore +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/.readthedocs.yaml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/CONTRIBUTING.md +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/LICENSE +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/Makefile +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/deployment/Dockerfile +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/deployment/compose-grafana.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/deployment/compose-kafka.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/deployment/compose-mofka.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/deployment/compose-mongo.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/deployment/compose.yml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/docs/api-reference.rst +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/docs/conf.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/docs/contributing.rst +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/docs/schemas.rst +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/docs/task_schema.rst +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/docs/workflow_schema.rst +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/dask_example.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/distributed_consumer_example.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/instrumented_loop_example.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/instrumented_simple_example.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/llm_complex/README.md +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/llm_complex/custom_provenance_id_mapping.yaml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/llm_complex/llm_dataprep.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/llm_complex/llm_main_example.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/llm_complex/llm_model.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/llm_complex/llm_test_runner.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/mlflow_example.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/single_layer_perceptron_example.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/tensorboard_example.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/unmanaged/main.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/examples/unmanaged/simple_task.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/notebooks/analytics.ipynb +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/notebooks/dask.ipynb +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/notebooks/dask_from_CLI.ipynb +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/notebooks/mlflow.ipynb +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/notebooks/reset_dask_nb_exec_counts.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/notebooks/tensorboard.ipynb +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/resources/mofka/bedrock_setup.sh +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/resources/mofka/consumer.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/resources/mofka/mofka-requirements.yaml +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/resources/mofka/mofka_config.json +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/resources/simple_redis_consumer.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/analytics/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/analytics/analytics_utils.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/analytics/data_augmentation.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/analytics/plot.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/autoflush_buffer.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/daos/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/daos/docdb_dao/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/daos/docdb_dao/docdb_dao_base.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/daos/docdb_dao/lmdb_dao.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/daos/docdb_dao/mongodb_dao.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/daos/keyvalue_dao.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/daos/mq_dao/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/daos/mq_dao/mq_dao_base.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/daos/mq_dao/mq_dao_kafka.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/daos/mq_dao/mq_dao_mofka.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/daos/mq_dao/mq_dao_redis.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/flowcept_dataclasses/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/flowcept_dataclasses/base_settings_dataclasses.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/flowcept_dataclasses/task_object.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/flowcept_dataclasses/telemetry.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/flowcept_dataclasses/workflow_object.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/flowcept_logger.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/query_utils.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/settings_factory.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/utils.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/commons/vocabulary.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowcept_api/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowcept_api/db_api.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowcept_api/flowcept_controller.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowcept_api/task_query_api.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowcept_webserver/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowcept_webserver/app.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowcept_webserver/resources/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowcept_webserver/resources/query_rsrc.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowcept_webserver/resources/task_messages_rsrc.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/dask/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/dask/dask_dataclasses.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/dask/dask_interceptor.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/dask/dask_plugins.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/instrumentation_interceptor.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/interceptor_state_manager.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/mlflow/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/mlflow/interception_event_handler.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/mlflow/mlflow_dao.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/mlflow/mlflow_dataclasses.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/tensorboard/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/tensorboard/tensorboard_dataclasses.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/zambeze/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/consumers/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/consumers/consumer_utils.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/flowceptor/telemetry_capture.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/instrumentation/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/instrumentation/flowcept_loop.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/instrumentation/flowcept_task.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/src/flowcept/instrumentation/flowcept_torch.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/adapters/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/adapters/dask_test_utils.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/adapters/test_dask.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/adapters/test_dask_with_context_mgmt.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/adapters/test_file_observer.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/adapters/test_mlflow.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/adapters/test_tensorboard.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/api/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/api/db_api_test.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/api/flowcept_api_test.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/api/sample_data.json +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/api/sample_data_with_telemetry_and_rai.json +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/api/task_query_api_test.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/doc_db_inserter/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/doc_db_inserter/doc_db_inserter_test.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/instrumentation_tests/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/instrumentation_tests/flowcept_explicit_tasks.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/instrumentation_tests/flowcept_loop_test.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/instrumentation_tests/flowcept_task_decorator_test.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/instrumentation_tests/ml_tests/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/instrumentation_tests/ml_tests/dl_trainer.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/instrumentation_tests/ml_tests/ml_decorator_dask_test.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/instrumentation_tests/ml_tests/ml_decorator_test.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/misc_tests/__init__.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/misc_tests/log_test.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/misc_tests/singleton_test.py +0 -0
- {flowcept-0.8.8 → flowcept-0.8.10}/tests/misc_tests/telemetry_test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flowcept
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.10
|
|
4
4
|
Summary: Capture and query workflow provenance data using data observability
|
|
5
5
|
Project-URL: GitHub, https://github.com/ORNL/flowcept
|
|
6
6
|
Author: Oak Ridge National Laboratory
|
|
@@ -88,6 +88,7 @@ Requires-Dist: tensorboard; extra == 'tensorboard'
|
|
|
88
88
|
Requires-Dist: tensorflow; extra == 'tensorboard'
|
|
89
89
|
Description-Content-Type: text/markdown
|
|
90
90
|
|
|
91
|
+
[](https://flowcept.readthedocs.io/)
|
|
91
92
|
[](https://github.com/ORNL/flowcept/actions/workflows/create-release-n-publish.yml)
|
|
92
93
|
[](https://pypi.org/project/flowcept)
|
|
93
94
|
[](https://github.com/ORNL/flowcept/actions/workflows/run-tests.yml)
|
|
@@ -107,10 +108,13 @@ Description-Content-Type: text/markdown
|
|
|
107
108
|
- [Data Persistence](#data-persistence)
|
|
108
109
|
- [Performance Tuning](#performance-tuning-for-performance-evaluation)
|
|
109
110
|
- [AMD GPU Setup](#install-amd-gpu-lib)
|
|
111
|
+
- [Further Documentation](#documentation)
|
|
110
112
|
|
|
111
113
|
## Overview
|
|
112
114
|
|
|
113
|
-
Flowcept is a runtime data integration system that captures and queries workflow provenance with minimal or no code changes. It unifies data
|
|
115
|
+
Flowcept is a runtime data integration system that captures and queries workflow provenance with minimal or no code changes. It unifies data from diverse workflows and tools, enabling integrated analysis and insights, especially in federated environments.
|
|
116
|
+
|
|
117
|
+
Designed for scenarios involving critical data from multiple workflows, Flowcept supports end-to-end monitoring, analysis, querying, and enhanced support for Machine Learning (ML) workflows.
|
|
114
118
|
|
|
115
119
|
## Features
|
|
116
120
|
|
|
@@ -133,8 +137,9 @@ Notes:
|
|
|
133
137
|
- TensorBoard
|
|
134
138
|
- Python scripts can be easily instrumented via `@decorators` using `@flowcept_task` (for generic Python method) or `@torch_task` (for methods that encapsulate PyTorch model manipulation, such as training or evaluation).
|
|
135
139
|
- Currently supported MQ systems:
|
|
136
|
-
- Kafka
|
|
137
|
-
- Redis
|
|
140
|
+
- [Kafka](https://kafka.apache.org)
|
|
141
|
+
- [Redis](https://redis.io)
|
|
142
|
+
- [Mofka](https://mofka.readthedocs.io)
|
|
138
143
|
- Currently supported database systems:
|
|
139
144
|
- MongoDB
|
|
140
145
|
- Lightning Memory-Mapped Database (lightweight file-only database system)
|
|
@@ -179,7 +184,7 @@ If you want to install all optional dependencies, use:
|
|
|
179
184
|
pip install flowcept[all]
|
|
180
185
|
```
|
|
181
186
|
|
|
182
|
-
This is
|
|
187
|
+
This is useful mostly for Flowcept developers. Please avoid installing like this if you can, as it may install several dependencies you will never use.
|
|
183
188
|
|
|
184
189
|
### 4. Installing from Source
|
|
185
190
|
To install Flowcept from the source repository:
|
|
@@ -359,6 +364,10 @@ Which was installed using Frontier's /opt/rocm-6.3.1/share/amd_smi
|
|
|
359
364
|
|
|
360
365
|
Some unit tests utilize `torch==2.2.2`, `torchtext=0.17.2`, and `torchvision==0.17.2`. They are only really needed to run some tests and will be installed if you run `pip install flowcept[ml_dev]` or `pip install flowcept[all]`. If you want to use Flowcept with Torch, please adapt torch dependencies according to your project's dependencies.
|
|
361
366
|
|
|
367
|
+
## Documentation
|
|
368
|
+
|
|
369
|
+
Full documentation is available on [Read the Docs](https://flowcept.readthedocs.io/).
|
|
370
|
+
|
|
362
371
|
## Cite us
|
|
363
372
|
|
|
364
373
|
If you used Flowcept in your research, consider citing our paper.
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
[](https://flowcept.readthedocs.io/)
|
|
1
2
|
[](https://github.com/ORNL/flowcept/actions/workflows/create-release-n-publish.yml)
|
|
2
3
|
[](https://pypi.org/project/flowcept)
|
|
3
4
|
[](https://github.com/ORNL/flowcept/actions/workflows/run-tests.yml)
|
|
@@ -17,10 +18,13 @@
|
|
|
17
18
|
- [Data Persistence](#data-persistence)
|
|
18
19
|
- [Performance Tuning](#performance-tuning-for-performance-evaluation)
|
|
19
20
|
- [AMD GPU Setup](#install-amd-gpu-lib)
|
|
21
|
+
- [Further Documentation](#documentation)
|
|
20
22
|
|
|
21
23
|
## Overview
|
|
22
24
|
|
|
23
|
-
Flowcept is a runtime data integration system that captures and queries workflow provenance with minimal or no code changes. It unifies data
|
|
25
|
+
Flowcept is a runtime data integration system that captures and queries workflow provenance with minimal or no code changes. It unifies data from diverse workflows and tools, enabling integrated analysis and insights, especially in federated environments.
|
|
26
|
+
|
|
27
|
+
Designed for scenarios involving critical data from multiple workflows, Flowcept supports end-to-end monitoring, analysis, querying, and enhanced support for Machine Learning (ML) workflows.
|
|
24
28
|
|
|
25
29
|
## Features
|
|
26
30
|
|
|
@@ -43,8 +47,9 @@ Notes:
|
|
|
43
47
|
- TensorBoard
|
|
44
48
|
- Python scripts can be easily instrumented via `@decorators` using `@flowcept_task` (for generic Python method) or `@torch_task` (for methods that encapsulate PyTorch model manipulation, such as training or evaluation).
|
|
45
49
|
- Currently supported MQ systems:
|
|
46
|
-
- Kafka
|
|
47
|
-
- Redis
|
|
50
|
+
- [Kafka](https://kafka.apache.org)
|
|
51
|
+
- [Redis](https://redis.io)
|
|
52
|
+
- [Mofka](https://mofka.readthedocs.io)
|
|
48
53
|
- Currently supported database systems:
|
|
49
54
|
- MongoDB
|
|
50
55
|
- Lightning Memory-Mapped Database (lightweight file-only database system)
|
|
@@ -89,7 +94,7 @@ If you want to install all optional dependencies, use:
|
|
|
89
94
|
pip install flowcept[all]
|
|
90
95
|
```
|
|
91
96
|
|
|
92
|
-
This is
|
|
97
|
+
This is useful mostly for Flowcept developers. Please avoid installing like this if you can, as it may install several dependencies you will never use.
|
|
93
98
|
|
|
94
99
|
### 4. Installing from Source
|
|
95
100
|
To install Flowcept from the source repository:
|
|
@@ -269,6 +274,10 @@ Which was installed using Frontier's /opt/rocm-6.3.1/share/amd_smi
|
|
|
269
274
|
|
|
270
275
|
Some unit tests utilize `torch==2.2.2`, `torchtext=0.17.2`, and `torchvision==0.17.2`. They are only really needed to run some tests and will be installed if you run `pip install flowcept[ml_dev]` or `pip install flowcept[all]`. If you want to use Flowcept with Torch, please adapt torch dependencies according to your project's dependencies.
|
|
271
276
|
|
|
277
|
+
## Documentation
|
|
278
|
+
|
|
279
|
+
Full documentation is available on [Read the Docs](https://flowcept.readthedocs.io/).
|
|
280
|
+
|
|
272
281
|
## Cite us
|
|
273
282
|
|
|
274
283
|
If you used Flowcept in your research, consider citing our paper.
|
|
@@ -40,7 +40,8 @@ Customizing Settings
|
|
|
40
40
|
Flowcept allows extensive configuration via a YAML file. To use a custom configuration, set the environment variable
|
|
41
41
|
``FLOWCEPT_SETTINGS_PATH`` to point to the absolute path of your settings file. A sample file is provided at For more options, see the `sample_settings.yaml <https://github.com/ORNL/flowcept/blob/main/resources/sample_settings.yaml>`_.
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
Key Settings to Adjust
|
|
44
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
44
45
|
|
|
45
46
|
|
|
46
47
|
- **Service Connections:** Set host, port, and credentials for MQ (`mq:`), key-value DB (`kv_db:`), and optionally MongoDB (`mongodb:`).
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Flowcept
|
|
2
|
+
========
|
|
3
|
+
|
|
4
|
+
GitHub Repository: https://github.com/ornl/flowcept
|
|
5
|
+
|
|
6
|
+
Flowcept is a runtime data integration system that captures and queries workflow provenance with minimal or no code changes. It unifies data from diverse workflows and tools, enabling integrated analysis and insights, especially in federated environments.
|
|
7
|
+
|
|
8
|
+
Designed for scenarios involving critical data from multiple workflows, Flowcept supports end-to-end monitoring, analysis, querying, and enhanced support for Machine Learning (ML) workflows.
|
|
9
|
+
|
|
10
|
+
.. toctree::
|
|
11
|
+
:maxdepth: 2
|
|
12
|
+
:caption: Contents:
|
|
13
|
+
|
|
14
|
+
getstarted
|
|
15
|
+
schemas
|
|
16
|
+
contributing
|
|
17
|
+
api-reference
|
|
18
|
+
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
flowcept_version: 0.8.
|
|
1
|
+
flowcept_version: 0.8.10 # Version of the Flowcept package. This setting file is compatible with this version.
|
|
2
2
|
|
|
3
3
|
project:
|
|
4
4
|
debug: true # Toggle debug mode. This will add a property `debug: true` to all saved data, making it easier to retrieve/delete them later.
|
|
@@ -25,7 +25,6 @@ telemetry_capture: # This toggles each individual type of telemetry capture. GPU
|
|
|
25
25
|
|
|
26
26
|
instrumentation:
|
|
27
27
|
enabled: true # This toggles data capture for instrumentation.
|
|
28
|
-
singleton: true # Use a single instrumentation instance per process. Defaults to true
|
|
29
28
|
torch:
|
|
30
29
|
what: parent_and_children # Scope of instrumentation: "parent_only" -- will capture only at the main model level, "parent_and_children" -- will capture the inner layers, or ~ (disable).
|
|
31
30
|
children_mode: telemetry_and_tensor_inspection # What to capture if parent_and_children is chosen in the scope. Possible values: "tensor_inspection" (i.e., tensor metadata), "telemetry", "telemetry_and_tensor_inspection"
|
|
@@ -49,7 +48,7 @@ mq:
|
|
|
49
48
|
timing: false
|
|
50
49
|
chunk_size: -1 # use 0 or -1 to disable this. Or simply omit this from the config file.
|
|
51
50
|
|
|
52
|
-
kv_db:
|
|
51
|
+
kv_db: # You can optionally use KV == MQ if MQ is Redis. Otherwise, these will be the Redis instance credentials.
|
|
53
52
|
host: localhost
|
|
54
53
|
port: 6379
|
|
55
54
|
# uri: use Redis connection uri here
|
|
@@ -59,9 +58,9 @@ web_server:
|
|
|
59
58
|
port: 5000
|
|
60
59
|
|
|
61
60
|
sys_metadata:
|
|
62
|
-
environment_id: "laptop"
|
|
61
|
+
environment_id: "laptop" # We use this to keep track of the environment used to run an experiment. Typical values include the cluster name, but it can be anything that you think will help identify your experimentation environment.
|
|
63
62
|
|
|
64
|
-
extra_metadata:
|
|
63
|
+
extra_metadata: # We use this to store any extra metadata you want to keep track of during an experiment.
|
|
65
64
|
place_holder: ""
|
|
66
65
|
|
|
67
66
|
analytics:
|
|
@@ -70,13 +69,11 @@ analytics:
|
|
|
70
69
|
generated.accuracy: maximum_first
|
|
71
70
|
|
|
72
71
|
db_buffer:
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
stop_max_trials: 240
|
|
79
|
-
stop_trials_sleep: 0.01
|
|
72
|
+
insertion_buffer_time_secs: 5 # Time interval (in seconds) to buffer incoming records before flushing to the database
|
|
73
|
+
buffer_size: 50 # Maximum number of records to hold in the buffer before forcing a flush
|
|
74
|
+
remove_empty_fields: false # If true, fields with null/empty values will be removed before insertion
|
|
75
|
+
stop_max_trials: 240 # Maximum number of trials before giving up when waiting for a fully safe stop (i.e., all records have been inserted as expected).
|
|
76
|
+
stop_trials_sleep: 0.01 # Sleep duration (in seconds) between trials when waiting for a fully safe stop.
|
|
80
77
|
|
|
81
78
|
databases:
|
|
82
79
|
|
|
@@ -89,7 +86,7 @@ databases:
|
|
|
89
86
|
host: localhost
|
|
90
87
|
port: 27017
|
|
91
88
|
db: flowcept
|
|
92
|
-
create_collection_index: true
|
|
89
|
+
create_collection_index: true # Whether flowcept should create collection indices if they haven't been created yet. This is done only at the Flowcept start up.
|
|
93
90
|
|
|
94
91
|
adapters:
|
|
95
92
|
# For each key below, you can have multiple instances. Like mlflow1, mlflow2; zambeze1, zambeze2. Use an empty dict, {}, if you won't use any adapter.
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Flowcept CLI.
|
|
3
|
+
|
|
4
|
+
How to add a new command:
|
|
5
|
+
--------------------------
|
|
6
|
+
1. Write a function with type-annotated arguments and a NumPy-style docstring.
|
|
7
|
+
2. Add it to one of the groups in `COMMAND_GROUPS`.
|
|
8
|
+
3. It will automatically become available as `flowcept --<function-name>` (underscores become hyphens).
|
|
9
|
+
|
|
10
|
+
Supports:
|
|
11
|
+
- `flowcept --command`
|
|
12
|
+
- `flowcept --command --arg=value`
|
|
13
|
+
- `flowcept -h` or `flowcept` for full help
|
|
14
|
+
- `flowcept --help --command` for command-specific help
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import os
|
|
19
|
+
import sys
|
|
20
|
+
import json
|
|
21
|
+
import textwrap
|
|
22
|
+
import inspect
|
|
23
|
+
from functools import wraps
|
|
24
|
+
from typing import List
|
|
25
|
+
|
|
26
|
+
from flowcept import Flowcept, configs
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def no_docstring(func):
|
|
30
|
+
"""Decorator to silence linter for missing docstrings."""
|
|
31
|
+
|
|
32
|
+
@wraps(func)
|
|
33
|
+
def wrapper(*args, **kwargs):
|
|
34
|
+
return func(*args, **kwargs)
|
|
35
|
+
|
|
36
|
+
return wrapper
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def show_config():
|
|
40
|
+
"""
|
|
41
|
+
Show Flowcept configuration.
|
|
42
|
+
"""
|
|
43
|
+
config_data = {
|
|
44
|
+
"session_settings_path": configs.SETTINGS_PATH,
|
|
45
|
+
"env_FLOWCEPT_SETTINGS_PATH": os.environ.get("FLOWCEPT_SETTINGS_PATH", None),
|
|
46
|
+
}
|
|
47
|
+
print(f"This is the settings path in this session: {configs.SETTINGS_PATH}")
|
|
48
|
+
print(
|
|
49
|
+
f"This is your FLOWCEPT_SETTINGS_PATH environment variable value: "
|
|
50
|
+
f"{config_data['env_FLOWCEPT_SETTINGS_PATH']}"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def start_consumption_services(bundle_exec_id: str = None, check_safe_stops: bool = False, consumers: List[str] = None):
|
|
55
|
+
"""
|
|
56
|
+
Start services that consume data from a queue or other source.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
bundle_exec_id : str, optional
|
|
61
|
+
The ID of the bundle execution to associate with the consumers.
|
|
62
|
+
check_safe_stops : bool, optional
|
|
63
|
+
Whether to check for safe stopping conditions before starting.
|
|
64
|
+
consumers : list of str, optional
|
|
65
|
+
List of consumer IDs to start. If not provided, all consumers will be started.
|
|
66
|
+
"""
|
|
67
|
+
print("Starting consumption services...")
|
|
68
|
+
print(f" bundle_exec_id: {bundle_exec_id}")
|
|
69
|
+
print(f" check_safe_stops: {check_safe_stops}")
|
|
70
|
+
print(f" consumers: {consumers or []}")
|
|
71
|
+
|
|
72
|
+
Flowcept.start_consumption_services(
|
|
73
|
+
bundle_exec_id=bundle_exec_id,
|
|
74
|
+
check_safe_stops=check_safe_stops,
|
|
75
|
+
consumers=consumers,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def stop_consumption_services():
|
|
80
|
+
"""
|
|
81
|
+
Stop the document inserter.
|
|
82
|
+
"""
|
|
83
|
+
print("Not implemented yet.")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def start_services(with_mongo: bool = False):
|
|
87
|
+
"""
|
|
88
|
+
Start Flowcept services (optionally including MongoDB).
|
|
89
|
+
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
with_mongo : bool, optional
|
|
93
|
+
Whether to also start MongoDB.
|
|
94
|
+
"""
|
|
95
|
+
print(f"Starting services{' with Mongo' if with_mongo else ''}")
|
|
96
|
+
print("Not implemented yet.")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def stop_services():
|
|
100
|
+
"""
|
|
101
|
+
Stop Flowcept services.
|
|
102
|
+
"""
|
|
103
|
+
print("Not implemented yet.")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def workflow_count(workflow_id: str):
|
|
107
|
+
"""
|
|
108
|
+
Count number of documents in the DB.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
workflow_id : str
|
|
113
|
+
The ID of the workflow to count tasks for.
|
|
114
|
+
"""
|
|
115
|
+
result = {
|
|
116
|
+
"workflow_id": workflow_id,
|
|
117
|
+
"tasks": len(Flowcept.db.query({"workflow_id": workflow_id})),
|
|
118
|
+
"workflows": len(Flowcept.db.query({"workflow_id": workflow_id}, collection="workflows")),
|
|
119
|
+
"objects": len(Flowcept.db.query({"workflow_id": workflow_id}, collection="objects")),
|
|
120
|
+
}
|
|
121
|
+
print(json.dumps(result, indent=2))
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def query(query_str: str):
|
|
125
|
+
"""
|
|
126
|
+
Query the Document DB.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
query_str : str
|
|
131
|
+
A JSON string representing the Mongo query.
|
|
132
|
+
"""
|
|
133
|
+
query = json.loads(query_str)
|
|
134
|
+
print(Flowcept.db.query(query))
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
COMMAND_GROUPS = [
|
|
138
|
+
("Basic Commands", [show_config, start_services, stop_services]),
|
|
139
|
+
("Consumption Commands", [start_consumption_services, stop_consumption_services]),
|
|
140
|
+
("Database Commands", [workflow_count, query]),
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
COMMANDS = set(f for _, fs in COMMAND_GROUPS for f in fs)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _parse_numpy_doc(docstring: str):
|
|
147
|
+
parsed = {}
|
|
148
|
+
lines = docstring.splitlines() if docstring else []
|
|
149
|
+
in_params = False
|
|
150
|
+
for line in lines:
|
|
151
|
+
line = line.strip()
|
|
152
|
+
if line.lower().startswith("parameters"):
|
|
153
|
+
in_params = True
|
|
154
|
+
continue
|
|
155
|
+
if in_params:
|
|
156
|
+
if " : " in line:
|
|
157
|
+
name, typeinfo = line.split(" : ", 1)
|
|
158
|
+
parsed[name.strip()] = {"type": typeinfo.strip(), "desc": ""}
|
|
159
|
+
elif parsed:
|
|
160
|
+
last = list(parsed)[-1]
|
|
161
|
+
parsed[last]["desc"] += " " + line
|
|
162
|
+
return parsed
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@no_docstring
|
|
166
|
+
def main(): # noqa: D103
|
|
167
|
+
parser = argparse.ArgumentParser(
|
|
168
|
+
description="Flowcept CLI", formatter_class=argparse.RawTextHelpFormatter, add_help=False
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
for func in COMMANDS:
|
|
172
|
+
doc = func.__doc__ or ""
|
|
173
|
+
func_name = func.__name__
|
|
174
|
+
flag = f"--{func_name.replace('_', '-')}"
|
|
175
|
+
short_help = doc.strip().splitlines()[0] if doc else ""
|
|
176
|
+
parser.add_argument(flag, action="store_true", help=short_help)
|
|
177
|
+
|
|
178
|
+
for pname, param in inspect.signature(func).parameters.items():
|
|
179
|
+
arg_name = f"--{pname.replace('_', '-')}"
|
|
180
|
+
params_doc = _parse_numpy_doc(doc).get(pname, {})
|
|
181
|
+
help_text = f"{params_doc.get('type', '')} - {params_doc.get('desc', '').strip()}"
|
|
182
|
+
if isinstance(param.annotation, bool):
|
|
183
|
+
parser.add_argument(arg_name, action="store_true", help=help_text)
|
|
184
|
+
elif param.annotation == List[str]:
|
|
185
|
+
parser.add_argument(arg_name, type=lambda s: s.split(","), help=help_text)
|
|
186
|
+
else:
|
|
187
|
+
parser.add_argument(arg_name, type=str, help=help_text)
|
|
188
|
+
|
|
189
|
+
# Handle --help --command
|
|
190
|
+
help_flag = "--help" in sys.argv
|
|
191
|
+
command_flags = {f"--{f.__name__.replace('_', '-')}" for f in COMMANDS}
|
|
192
|
+
matched_command_flag = next((arg for arg in sys.argv if arg in command_flags), None)
|
|
193
|
+
|
|
194
|
+
if help_flag and matched_command_flag:
|
|
195
|
+
command_func = next(f for f in COMMANDS if f"--{f.__name__.replace('_', '-')}" == matched_command_flag)
|
|
196
|
+
doc = command_func.__doc__ or ""
|
|
197
|
+
sig = inspect.signature(command_func)
|
|
198
|
+
print(f"\nHelp for `flowcept {matched_command_flag}`:\n")
|
|
199
|
+
print(textwrap.indent(doc.strip(), " "))
|
|
200
|
+
print("\n Arguments:")
|
|
201
|
+
params = _parse_numpy_doc(doc)
|
|
202
|
+
for pname, p in sig.parameters.items():
|
|
203
|
+
meta = params.get(pname, {})
|
|
204
|
+
opt = p.default != inspect.Parameter.empty
|
|
205
|
+
print(
|
|
206
|
+
f" --{pname:<18} {meta.get('type', 'str')}, "
|
|
207
|
+
f"{'optional' if opt else 'required'} - {meta.get('desc', '').strip()}"
|
|
208
|
+
)
|
|
209
|
+
print()
|
|
210
|
+
sys.exit(0)
|
|
211
|
+
|
|
212
|
+
if len(sys.argv) == 1 or help_flag:
|
|
213
|
+
print("\nFlowcept CLI\n")
|
|
214
|
+
for group, funcs in COMMAND_GROUPS:
|
|
215
|
+
print(f"{group}:\n")
|
|
216
|
+
for func in funcs:
|
|
217
|
+
name = func.__name__
|
|
218
|
+
flag = f"--{name.replace('_', '-')}"
|
|
219
|
+
doc = func.__doc__ or ""
|
|
220
|
+
summary = doc.strip().splitlines()[0] if doc else ""
|
|
221
|
+
sig = inspect.signature(func)
|
|
222
|
+
print(f" flowcept {flag}", end="")
|
|
223
|
+
for pname, p in sig.parameters.items():
|
|
224
|
+
is_opt = p.default != inspect.Parameter.empty
|
|
225
|
+
print(f" [--{pname.replace('_', '-')}] " if is_opt else f" --{pname.replace('_', '-')}", end="")
|
|
226
|
+
print(f"\n {summary}")
|
|
227
|
+
params = _parse_numpy_doc(doc)
|
|
228
|
+
if params:
|
|
229
|
+
print(" Arguments:")
|
|
230
|
+
for argname, meta in params.items():
|
|
231
|
+
opt = sig.parameters[argname].default != inspect.Parameter.empty
|
|
232
|
+
print(
|
|
233
|
+
f" --"
|
|
234
|
+
f"{argname:<18} {meta['type']}, "
|
|
235
|
+
f"{'optional' if opt else 'required'} - {meta['desc'].strip()}"
|
|
236
|
+
)
|
|
237
|
+
print()
|
|
238
|
+
print("Run `flowcept --<command>` to invoke a command.\n")
|
|
239
|
+
sys.exit(0)
|
|
240
|
+
|
|
241
|
+
args = vars(parser.parse_args())
|
|
242
|
+
|
|
243
|
+
for func in COMMANDS:
|
|
244
|
+
flag = f"--{func.__name__.replace('_', '-')}"
|
|
245
|
+
if args.get(func.__name__.replace("-", "_")):
|
|
246
|
+
sig = inspect.signature(func)
|
|
247
|
+
kwargs = {}
|
|
248
|
+
for pname in sig.parameters:
|
|
249
|
+
val = args.get(pname.replace("-", "_"))
|
|
250
|
+
if val is not None:
|
|
251
|
+
kwargs[pname] = val
|
|
252
|
+
func(**kwargs)
|
|
253
|
+
break
|
|
254
|
+
else:
|
|
255
|
+
print("Unknown command. Use `flowcept -h` to see available commands.")
|
|
256
|
+
sys.exit(1)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
if __name__ == "__main__":
|
|
260
|
+
main()
|
|
@@ -126,11 +126,9 @@ if not LMDB_ENABLED and not MONGO_ENABLED:
|
|
|
126
126
|
# DB Buffer Settings #
|
|
127
127
|
##########################
|
|
128
128
|
db_buffer_settings = settings["db_buffer"]
|
|
129
|
-
|
|
130
|
-
INSERTION_BUFFER_TIME = db_buffer_settings.get("insertion_buffer_time_secs", None)
|
|
131
|
-
|
|
132
|
-
DB_MAX_BUFFER_SIZE = int(db_buffer_settings.get("max_buffer_size", 50))
|
|
133
|
-
DB_MIN_BUFFER_SIZE = max(1, int(db_buffer_settings.get("min_buffer_size", 10)))
|
|
129
|
+
|
|
130
|
+
INSERTION_BUFFER_TIME = db_buffer_settings.get("insertion_buffer_time_secs", None) # In seconds:
|
|
131
|
+
DB_BUFFER_SIZE = int(db_buffer_settings.get("buffer_size", 50))
|
|
134
132
|
REMOVE_EMPTY_FIELDS = db_buffer_settings.get("remove_empty_fields", False)
|
|
135
133
|
DB_INSERTER_MAX_TRIALS_STOP = db_buffer_settings.get("stop_max_trials", 240)
|
|
136
134
|
DB_INSERTER_SLEEP_TRIALS_STOP = db_buffer_settings.get("stop_trials_sleep", 0.01)
|
|
@@ -9,7 +9,6 @@ from flowcept.commons.flowcept_dataclasses.workflow_object import (
|
|
|
9
9
|
)
|
|
10
10
|
from flowcept.configs import (
|
|
11
11
|
ENRICH_MESSAGES,
|
|
12
|
-
INSTRUMENTATION,
|
|
13
12
|
)
|
|
14
13
|
from flowcept.commons.flowcept_logger import FlowceptLogger
|
|
15
14
|
from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
|
|
@@ -50,23 +49,15 @@ class BaseInterceptor(object):
|
|
|
50
49
|
elif kind in "dask":
|
|
51
50
|
# This is dask's client interceptor. We essentially use it to store the dask workflow.
|
|
52
51
|
# That's why we don't need another special interceptor and we can reuse the instrumentation one.
|
|
53
|
-
|
|
54
|
-
elif kind == "instrumentation":
|
|
55
|
-
return BaseInterceptor._build_instrumentation_interceptor()
|
|
56
|
-
else:
|
|
57
|
-
raise NotImplementedError
|
|
52
|
+
from flowcept.flowceptor.adapters.instrumentation_interceptor import InstrumentationInterceptor
|
|
58
53
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
# By using singleton, we lose the thread safety for the Interceptor, particularly, its MQ buffer.
|
|
62
|
-
# Since some use cases need threads, this allows disabling the singleton for more thread safety.
|
|
63
|
-
is_singleton = INSTRUMENTATION.get("singleton", True)
|
|
64
|
-
if is_singleton:
|
|
54
|
+
return InstrumentationInterceptor.get_instance()
|
|
55
|
+
elif kind == "instrumentation":
|
|
65
56
|
from flowcept.flowceptor.adapters.instrumentation_interceptor import InstrumentationInterceptor
|
|
66
57
|
|
|
67
58
|
return InstrumentationInterceptor.get_instance()
|
|
68
59
|
else:
|
|
69
|
-
|
|
60
|
+
raise NotImplementedError
|
|
70
61
|
|
|
71
62
|
def __init__(self, plugin_key=None, kind=None):
|
|
72
63
|
self.logger = FlowceptLogger()
|
|
@@ -16,11 +16,9 @@ from flowcept.commons.utils import GenericJSONDecoder
|
|
|
16
16
|
from flowcept.commons.vocabulary import Status
|
|
17
17
|
from flowcept.configs import (
|
|
18
18
|
INSERTION_BUFFER_TIME,
|
|
19
|
-
|
|
20
|
-
DB_MIN_BUFFER_SIZE,
|
|
19
|
+
DB_BUFFER_SIZE,
|
|
21
20
|
DB_INSERTER_MAX_TRIALS_STOP,
|
|
22
21
|
DB_INSERTER_SLEEP_TRIALS_STOP,
|
|
23
|
-
ADAPTIVE_DB_BUFFER_SIZE,
|
|
24
22
|
REMOVE_EMPTY_FIELDS,
|
|
25
23
|
JSON_SERIALIZER,
|
|
26
24
|
ENRICH_MESSAGES,
|
|
@@ -67,28 +65,16 @@ class DocumentInserter:
|
|
|
67
65
|
self._previous_time = time()
|
|
68
66
|
self.logger = FlowceptLogger()
|
|
69
67
|
self._main_thread: Thread = None
|
|
70
|
-
self.
|
|
68
|
+
self._curr_db_buffer_size = DB_BUFFER_SIZE
|
|
71
69
|
self._bundle_exec_id = bundle_exec_id
|
|
72
70
|
self.check_safe_stops = check_safe_stops
|
|
73
71
|
self.buffer: AutoflushBuffer = AutoflushBuffer(
|
|
74
72
|
flush_function=DocumentInserter.flush_function,
|
|
75
73
|
flush_function_kwargs={"logger": self.logger, "doc_daos": self._doc_daos},
|
|
76
|
-
max_size=self.
|
|
74
|
+
max_size=self._curr_db_buffer_size,
|
|
77
75
|
flush_interval=INSERTION_BUFFER_TIME,
|
|
78
76
|
)
|
|
79
77
|
|
|
80
|
-
def _set_buffer_size(self):
|
|
81
|
-
if not ADAPTIVE_DB_BUFFER_SIZE:
|
|
82
|
-
return
|
|
83
|
-
else:
|
|
84
|
-
self._curr_max_buffer_size = max(
|
|
85
|
-
DB_MIN_BUFFER_SIZE,
|
|
86
|
-
min(
|
|
87
|
-
DB_MAX_BUFFER_SIZE,
|
|
88
|
-
int(self._curr_max_buffer_size * 1.1),
|
|
89
|
-
),
|
|
90
|
-
)
|
|
91
|
-
|
|
92
78
|
@staticmethod
|
|
93
79
|
def flush_function(buffer, doc_daos, logger):
|
|
94
80
|
"""Flush it."""
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
from time import time
|
|
2
2
|
from typing import Dict
|
|
3
|
+
import os
|
|
4
|
+
import threading
|
|
5
|
+
import random
|
|
3
6
|
|
|
4
7
|
from flowcept.commons.flowcept_dataclasses.task_object import (
|
|
5
8
|
TaskObject,
|
|
@@ -57,21 +60,16 @@ class FlowceptTask(object):
|
|
|
57
60
|
activity_id: str = None,
|
|
58
61
|
used: Dict = None,
|
|
59
62
|
custom_metadata: Dict = None,
|
|
60
|
-
flowcept: "Flowcept" = None,
|
|
61
63
|
):
|
|
62
64
|
if not INSTRUMENTATION_ENABLED:
|
|
63
65
|
self._ended = True
|
|
64
66
|
return
|
|
65
|
-
if flowcept is not None and flowcept._interceptor_instances[0].kind == "instrumentation":
|
|
66
|
-
self._interceptor = flowcept._interceptor_instances[0]
|
|
67
|
-
else:
|
|
68
|
-
self._interceptor = InstrumentationInterceptor.get_instance()
|
|
69
|
-
|
|
70
67
|
self._task = TaskObject()
|
|
68
|
+
self._interceptor = InstrumentationInterceptor.get_instance()
|
|
71
69
|
self._task.telemetry_at_start = self._interceptor.telemetry_capture.capture()
|
|
72
70
|
self._task.activity_id = activity_id
|
|
73
71
|
self._task.started_at = time()
|
|
74
|
-
self._task.task_id = task_id or
|
|
72
|
+
self._task.task_id = task_id or self._gen_task_id()
|
|
75
73
|
self._task.workflow_id = workflow_id or Flowcept.current_workflow_id
|
|
76
74
|
self._task.campaign_id = campaign_id or Flowcept.campaign_id
|
|
77
75
|
self._task.used = used
|
|
@@ -85,6 +83,12 @@ class FlowceptTask(object):
|
|
|
85
83
|
if not self._ended:
|
|
86
84
|
self.end()
|
|
87
85
|
|
|
86
|
+
def _gen_task_id(self):
|
|
87
|
+
pid = os.getpid()
|
|
88
|
+
tid = threading.get_ident()
|
|
89
|
+
rand = random.getrandbits(32)
|
|
90
|
+
return f"{self._task.started_at}_{pid}_{tid}_{rand}"
|
|
91
|
+
|
|
88
92
|
def end(
|
|
89
93
|
self,
|
|
90
94
|
generated: Dict = None,
|
flowcept-0.8.8/docs/index.rst
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
Flowcept
|
|
2
|
-
========
|
|
3
|
-
|
|
4
|
-
Flowcept is a runtime data integration system that captures and queries workflow provenance with minimal or no code changes. It unifies data across diverse workflows and tools, enabling integrated analysis and insights, especially in federated environments. Designed for scenarios involving critical data from multiple workflows, Flowcept seamlessly integrates data at runtime, providing a unified view for end-to-end monitoring and analysis, and enhanced support for Machine Learning (ML) workflows.
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
.. toctree::
|
|
8
|
-
:maxdepth: 2
|
|
9
|
-
:caption: Contents:
|
|
10
|
-
|
|
11
|
-
getstarted
|
|
12
|
-
schemas
|
|
13
|
-
contributing
|
|
14
|
-
api-reference
|
|
15
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|