flowcept 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
- flowcept/__init__.py,sha256=ZDHSYTpv7qNrCgx7km3mCNRaJ2jfc0KRKKvRXdVxFwA,2101
2
- flowcept/cli.py,sha256=NB7rzu38Rc8Zyb8ou1XNa7X2NN--EQ7GKdyh0_Kx1Ts,22852
3
- flowcept/configs.py,sha256=DOpwjKMGE-4GDT22DhNrYbeGPMgPOlngDJnuC2rQuDM,8195
4
- flowcept/version.py,sha256=2Qh5hgLW5lQsjWbKUouOffybJbNNBK6QnGSvL0HbWz0,306
1
+ flowcept/__init__.py,sha256=urpwIEJeikV0P6ORXKsM5Lq4o6wCwhySS9A487BYGy4,2241
2
+ flowcept/cli.py,sha256=eVnUrmZtVhZ1ldRMGB1QsqBzNC1Pf2CX33efnlaZ4gs,22842
3
+ flowcept/configs.py,sha256=xw9cdk-bDkR4_bV2jBkDCe9__na9LKJW5tUG32by-m4,8216
4
+ flowcept/version.py,sha256=vKyazpFciSPMlst6m2HH-9RdZcbRHHnKT0jk92I-APc,306
5
5
  flowcept/agents/__init__.py,sha256=8eeD2CiKBtHiDsWdrHK_UreIkKlTq4dUbhHDyzw372o,175
6
6
  flowcept/agents/agent_client.py,sha256=UiBQkC9WE2weLZR2OTkEOEQt9-zqQOkPwRA17HfI-jk,2027
7
7
  flowcept/agents/agents_utils.py,sha256=Az5lvWTsBHs_3sWWwy7jSdDjNn-PvZ7KmYd79wxvdyU,6666
@@ -15,7 +15,7 @@ flowcept/agents/llms/__init__.py,sha256=kzOaJic5VhMBnGvy_Fr5C6sRKVrRntH1ZnYz7f5_
15
15
  flowcept/agents/llms/claude_gcp.py,sha256=fzz7235DgzVueuFj5odsr93jWtYHpYlXkSGW1kmmJwU,4915
16
16
  flowcept/agents/llms/gemini25.py,sha256=VARrjb3tITIh3_Wppmocp_ocSKVZNon0o0GeFEwTnTI,4229
17
17
  flowcept/agents/prompts/__init__.py,sha256=7ICsNhLYzvPS1esG3Vg519s51b1c4yN0WegJUb6Qvww,26
18
- flowcept/agents/prompts/general_prompts.py,sha256=5UYBGti2Mdr5VIPm2Ewn1wxZsVXgRE8jWNvQ-8HZ0Oo,3685
18
+ flowcept/agents/prompts/general_prompts.py,sha256=q0KmR2QYEtBqQOssoF8W5EhZidqC59wL6XFVjF_dbWQ,3675
19
19
  flowcept/agents/prompts/in_memory_query_prompts.py,sha256=oWvZQNUHBBrGq-f94ulhIZW4bkkze02EzAuHY5640QM,17934
20
20
  flowcept/agents/tools/__init__.py,sha256=Xqz2E4-LL_7DDcm1XYJFx2f5RdAsjeTpOJb_DPC7xyc,27
21
21
  flowcept/agents/tools/general_tools.py,sha256=Dw1vYNzVUp8dIB48KFPNxGenERoS8UqJj0HIEfhjQeA,2752
@@ -27,7 +27,7 @@ flowcept/analytics/analytics_utils.py,sha256=FRJdBtQa7Hrk2oR_FFhmhmMf3X6YyZ4nbH5
27
27
  flowcept/analytics/data_augmentation.py,sha256=Dyr5x316Zf-k1e8rVoQMCpFOrklYVHjfejRPrtoycmc,1641
28
28
  flowcept/analytics/plot.py,sha256=L56y1HRnTE6-Fxs62Y0rV2OtDwjSwgSP3yLdalkiRBQ,2932
29
29
  flowcept/commons/__init__.py,sha256=W94CqapS0IGuuIGHHaz4sNuuiYhgtJWtpDEbnI0pGwI,26
30
- flowcept/commons/autoflush_buffer.py,sha256=8M0fcIeHck-mSGQ2HFpW3_Af8-dHswhIbUMX5FATm48,2589
30
+ flowcept/commons/autoflush_buffer.py,sha256=Ohy_RNbq6BXn0_R83OL5iaTgGPmV8cT1moIR1Njg0F8,2753
31
31
  flowcept/commons/flowcept_logger.py,sha256=0asRucrDMeRXvsdhuCmH6lWO7lAt_Z5o5uW7rrQhcjc,1857
32
32
  flowcept/commons/query_utils.py,sha256=3tyK5VYA10iDtmtzNwa8OQGn93DBxsu6rTjHDphftSc,2208
33
33
  flowcept/commons/settings_factory.py,sha256=bMTjgXRfb5HsL2lPnLfem-9trqELbNWE04Ie7lSlxYM,1731
@@ -39,21 +39,21 @@ flowcept/commons/daos/keyvalue_dao.py,sha256=g7zgC9hVC1NTllwUAqGt44YqdqYUgAKgPlX
39
39
  flowcept/commons/daos/redis_conn.py,sha256=gFyW-5yf6B8ExEYopCmbap8ki-iEwuIw-KH9f6o7UGQ,1495
40
40
  flowcept/commons/daos/docdb_dao/__init__.py,sha256=qRvXREeUJ4mkhxdC9bzpOsVX6M2FB5hDyLFxhMxTGhs,30
41
41
  flowcept/commons/daos/docdb_dao/docdb_dao_base.py,sha256=YbfSVJPwZGK2GBYkeapRC83HkmP0c6Msv5TriD88RcI,11812
42
- flowcept/commons/daos/docdb_dao/lmdb_dao.py,sha256=dJOLgCx_lwdz6MKiMpM_UE4rm0angDCPaVz_WU5KqIA,10407
42
+ flowcept/commons/daos/docdb_dao/lmdb_dao.py,sha256=ZuCsdEhI2wGAmjAf82j-1t3tbR6YMmDeaJ_C3HcsLYo,10461
43
43
  flowcept/commons/daos/docdb_dao/mongodb_dao.py,sha256=5x0un15uCDTcnuITOyOhvF9mKj_bUmF2du0AHQfjN9k,40055
44
44
  flowcept/commons/daos/mq_dao/__init__.py,sha256=Xxm4FmbBUZDQ7XIAmSFbeKE_AdHsbgFmSuftvMWSykQ,21
45
- flowcept/commons/daos/mq_dao/mq_dao_base.py,sha256=jo98CIyaEjMMHtaw9XIQRPhnN8IgKj2x-cTmWV4u0Ws,9596
45
+ flowcept/commons/daos/mq_dao/mq_dao_base.py,sha256=EL8eQedvNLsVLMz4oHemBAsR1S6xFZiezM8dIqKmmCA,9696
46
46
  flowcept/commons/daos/mq_dao/mq_dao_kafka.py,sha256=kjZqPLIu5PaNeM4IDvOxkDRVGTd5UWwq3zhDvVirqW8,5067
47
47
  flowcept/commons/daos/mq_dao/mq_dao_mofka.py,sha256=tRdMGYDzdeIJxad-B4-DE6u8Wzs61eTzOW4ojZrnTxs,4057
48
48
  flowcept/commons/daos/mq_dao/mq_dao_redis.py,sha256=WKPoMPBSce4shqbBkgsnuqJAJoZZ4U_hdebhyFqtejQ,5535
49
49
  flowcept/commons/flowcept_dataclasses/__init__.py,sha256=8KkiJh0WSRAB50waVluxCSI8Tb9X1L9nup4c8RN3ulc,30
50
50
  flowcept/commons/flowcept_dataclasses/base_settings_dataclasses.py,sha256=Cjw2PGYtZDfnwecz6G3S42Ncmxj7AIZVEBx05bsxRUo,399
51
- flowcept/commons/flowcept_dataclasses/task_object.py,sha256=ITTfGNRCPhHdbM9kJxb-4_ROR1yrJuQQ8kM780oe8NQ,5610
51
+ flowcept/commons/flowcept_dataclasses/task_object.py,sha256=XLFD8YTWsyDLSRcgZc5qK2a9yk97XnqZoUAL4T6HNPE,8110
52
52
  flowcept/commons/flowcept_dataclasses/telemetry.py,sha256=9_5ONCo-06r5nKHXmi5HfIhiZSuPgmTECiq_u9MlxXM,2822
53
- flowcept/commons/flowcept_dataclasses/workflow_object.py,sha256=JHvPo1BfF38fxRqZO1OTA3rTMINnPlkNw8e3l2fWn-M,4624
53
+ flowcept/commons/flowcept_dataclasses/workflow_object.py,sha256=cauWtXHhBv9lHS-q6cb7yUsNiwQ6PkZPuSinR1TKcqU,6161
54
54
  flowcept/flowcept_api/__init__.py,sha256=T1ty86YlocQ5Z18l5fUqHj_CC6Unq_iBv0lFyiI7Ao8,22
55
55
  flowcept/flowcept_api/db_api.py,sha256=hKXep-n50rp9cAzV0ljk2QVEF8O64yxi3ujXv5_Ibac,9723
56
- flowcept/flowcept_api/flowcept_controller.py,sha256=D5-HeG3LKIqtFvIQ54b9B6qyk1Vx_lWAxS_9_I5MKF4,14895
56
+ flowcept/flowcept_api/flowcept_controller.py,sha256=NFYBvv8OeDbZs2Q8o2dnFWC5N7fofvx7iiOfvmcoraE,15246
57
57
  flowcept/flowcept_api/task_query_api.py,sha256=SrwB0OCVtbpvCPECkE2ySM10G_g8Wlk5PJ8h-0xEaNc,23821
58
58
  flowcept/flowcept_webserver/__init__.py,sha256=8411GIXGddKTKoHUvbo_Rq6svosNG7tG8VzvUEBd7WI,28
59
59
  flowcept/flowcept_webserver/app.py,sha256=VUV8_JZbIbx9u_1O7m7XtRdhZb_7uifUa-iNlPhmZws,658
@@ -61,7 +61,7 @@ flowcept/flowcept_webserver/resources/__init__.py,sha256=XOk5yhLeLU6JmVXxbl3TY2z
61
61
  flowcept/flowcept_webserver/resources/query_rsrc.py,sha256=Mk1XDC_wVYkMk0eaazqWWrTC07gQU9U0toKfip0ihZE,1353
62
62
  flowcept/flowcept_webserver/resources/task_messages_rsrc.py,sha256=0u68it2W-9NzUUx5fWOZCqvRKe5EsLI8oyvto9634Ng,666
63
63
  flowcept/flowceptor/__init__.py,sha256=wVxRXUv07iNx6SMRRma2vqhR_GIcRl0re_WCYG65PUs,29
64
- flowcept/flowceptor/telemetry_capture.py,sha256=9-Q09LjANAntG6dAz3L1rHWkb7zqtqU9GSFj__FCyyc,13810
64
+ flowcept/flowceptor/telemetry_capture.py,sha256=CWyR8E1rTAjFbUFI9BxaGfJyDd2UbiK0uLGt4m8BnSU,13932
65
65
  flowcept/flowceptor/adapters/__init__.py,sha256=SuZbSZVVQeBJ9zXW-M9jF09dw3XIjre3lSGrUO1Y8Po,27
66
66
  flowcept/flowceptor/adapters/base_interceptor.py,sha256=kbdYW6VuvmBibOVy7Pg3OzeD3OUaHc6jnAhRBpj9f14,6517
67
67
  flowcept/flowceptor/adapters/instrumentation_interceptor.py,sha256=DhK2bBnpghqPSeA62BUqRg6pl8zxuYrP33dK4x6PhRE,733
@@ -81,8 +81,8 @@ flowcept/flowceptor/adapters/tensorboard/__init__.py,sha256=LrcR4WCIlBwwHIUSteQ8
81
81
  flowcept/flowceptor/adapters/tensorboard/tensorboard_dataclasses.py,sha256=lSfDd6TucVNzGxbm69BYyCVgMr2p9iUEQjnsS4jIfeI,554
82
82
  flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py,sha256=PUKGlCsYcybsk1HK573Brs6FiXQRoaj6MKgZ3Oyeec4,4881
83
83
  flowcept/flowceptor/consumers/__init__.py,sha256=foxtVEb2ZEe9g1slfYIKM4tIFv-He1l7XS--SYs7nlQ,28
84
- flowcept/flowceptor/consumers/base_consumer.py,sha256=cKEkZAmfzirBcnVNjx3To57zP1Qwdz4lkMbjeZ8D4Q8,3163
85
- flowcept/flowceptor/consumers/consumer_utils.py,sha256=a7GJYgYiTZnxsm3W3MOalgnC8oyQSs7OjqF4LWYI_vI,5704
84
+ flowcept/flowceptor/consumers/base_consumer.py,sha256=hrZ3VFV7pJBMXZsvh7Q2Y36b_ifcnbJkgwe2MiuZL70,3324
85
+ flowcept/flowceptor/consumers/consumer_utils.py,sha256=E6R07zIKNXJTCxvL-OCrCKNYRpqtwRiXiZx0D2BKidk,5893
86
86
  flowcept/flowceptor/consumers/document_inserter.py,sha256=IeVl6Y4Q1KlpYGvE7uDI0vKQf-MGf2pgnIpxCYtyzKE,13392
87
87
  flowcept/flowceptor/consumers/agent/__init__.py,sha256=R1uvjBPeTLw9SpYgyUc6Qmo16pE84PFHcELTTFvyTWU,56
88
88
  flowcept/flowceptor/consumers/agent/base_agent_context_manager.py,sha256=5fBPYs-k4bsKDcIXyUbps9KoiQkfAWLHJB52lypYKas,4161
@@ -92,10 +92,10 @@ flowcept/instrumentation/flowcept_decorator.py,sha256=X4Lp_FSsoL08K8ZhRM4mC0OjKu
92
92
  flowcept/instrumentation/flowcept_loop.py,sha256=7hkcolXxbwwccNzoSbAeCCEu02i4zT317YeJ6dO1MDs,12208
93
93
  flowcept/instrumentation/flowcept_task.py,sha256=EmKODpjl8usNklKSVmsKYyCa6gC_QMqKhAr3DKaw44s,8199
94
94
  flowcept/instrumentation/flowcept_torch.py,sha256=kkZQRYq6cDBpdBU6J39_4oKRVkhyF3ODlz8ydV5WGKw,23455
95
- flowcept/instrumentation/task_capture.py,sha256=la4VaMuihpDycJjHMb490RgujJTgn8s5ilv8o7ZJ5MA,8317
96
- resources/sample_settings.yaml,sha256=P6pMYQSlNI7Rw2dcoP7K_sgBvqcud2kWkyVBpQC3F5E,6689
97
- flowcept-0.9.1.dist-info/METADATA,sha256=tylfzf5Sweb0iZEfHvP5H6ym6BX9LYcxGxTFvpPUOsY,20050
98
- flowcept-0.9.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
99
- flowcept-0.9.1.dist-info/entry_points.txt,sha256=i8q67WE0201rVxYI2lyBtS52shvgl93x2Szp4q8zMlw,47
100
- flowcept-0.9.1.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
101
- flowcept-0.9.1.dist-info/RECORD,,
95
+ flowcept/instrumentation/task_capture.py,sha256=1g9EtLdqsTB0RHsF-eRmA2Xh9l_YqTd953d4v89IC24,8287
96
+ resources/sample_settings.yaml,sha256=NxiDXh_IAVBsHdxyhB2U-v212hGPLtHTqAVE6_3GyJ0,6756
97
+ flowcept-0.9.2.dist-info/METADATA,sha256=H91pextdVg1DgvI_HPgt4CnRJeH4UmrC5dsZpLwOAiY,31424
98
+ flowcept-0.9.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
99
+ flowcept-0.9.2.dist-info/entry_points.txt,sha256=i8q67WE0201rVxYI2lyBtS52shvgl93x2Szp4q8zMlw,47
100
+ flowcept-0.9.2.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
101
+ flowcept-0.9.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
- flowcept_version: 0.9.1 # Version of the Flowcept package. This setting file is compatible with this version.
1
+ flowcept_version: 0.9.2 # Version of the Flowcept package. This setting file is compatible with this version.
2
2
 
3
3
  project:
4
4
  debug: true # Toggle debug mode. This will add a property `debug: true` to all saved data, making it easier to retrieve/delete them later.
@@ -7,7 +7,7 @@ project:
7
7
  performance_logging: false # Enable performance logging if true. Particularly useful for MQ flushes.
8
8
  enrich_messages: true # Add extra metadata to task messages, such as IP addresses and UTC timestamps.
9
9
  db_flush_mode: online # Mode for flushing DB entries: "online" or "offline". If online, flushes to the DB will happen before the workflow ends.
10
- # dump_buffer_path: flowcept_messages.jsonl # This is useful if you need to run completely offline.
10
+ # dump_buffer_path: flowcept_messages.jsonl # This is useful if you need to run completely offline. If you omit this, even offline, buffer data will not be persisted.
11
11
 
12
12
  log:
13
13
  log_path: "default" # Path for log file output; "default" will write the log in the directory where the main executable is running from.
@@ -1,439 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: flowcept
3
- Version: 0.9.1
4
- Summary: Capture and query workflow provenance data using data observability
5
- Author: Oak Ridge National Laboratory
6
- License-Expression: MIT
7
- License-File: LICENSE
8
- Keywords: agentic-ai,agentic-workflows,ai,big-data,dask,data-analytics,data-integration,databases,lineage,llm,machine-learning,ml,mlflow,model-management,parallel-processing,provenance,reproducibility,responsible-ai,scientific-workflows,tensorboard,workflows
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Classifier: Programming Language :: Python :: 3
12
- Requires-Python: >=3.10
13
- Requires-Dist: msgpack
14
- Requires-Dist: numpy
15
- Requires-Dist: omegaconf
16
- Requires-Dist: orjson
17
- Provides-Extra: all
18
- Requires-Dist: alembic; extra == 'all'
19
- Requires-Dist: confluent-kafka<=2.8.0; extra == 'all'
20
- Requires-Dist: cryptography; extra == 'all'
21
- Requires-Dist: dask[distributed]<=2024.10.0; extra == 'all'
22
- Requires-Dist: flask-restful; extra == 'all'
23
- Requires-Dist: furo; extra == 'all'
24
- Requires-Dist: gitpython; extra == 'all'
25
- Requires-Dist: google-genai; extra == 'all'
26
- Requires-Dist: jupyterlab; extra == 'all'
27
- Requires-Dist: langchain-community; extra == 'all'
28
- Requires-Dist: lmdb; extra == 'all'
29
- Requires-Dist: mcp[cli]; extra == 'all'
30
- Requires-Dist: mlflow-skinny; extra == 'all'
31
- Requires-Dist: nbmake; extra == 'all'
32
- Requires-Dist: paho-mqtt; extra == 'all'
33
- Requires-Dist: pandas; extra == 'all'
34
- Requires-Dist: pika; extra == 'all'
35
- Requires-Dist: plotly; extra == 'all'
36
- Requires-Dist: psutil>=6.1.1; extra == 'all'
37
- Requires-Dist: py-cpuinfo; extra == 'all'
38
- Requires-Dist: pyarrow; extra == 'all'
39
- Requires-Dist: pymongo; extra == 'all'
40
- Requires-Dist: pytest; extra == 'all'
41
- Requires-Dist: pyyaml; extra == 'all'
42
- Requires-Dist: redis; extra == 'all'
43
- Requires-Dist: requests; extra == 'all'
44
- Requires-Dist: ruff; extra == 'all'
45
- Requires-Dist: scipy; extra == 'all'
46
- Requires-Dist: seaborn; extra == 'all'
47
- Requires-Dist: sphinx; extra == 'all'
48
- Requires-Dist: sqlalchemy; extra == 'all'
49
- Requires-Dist: streamlit; extra == 'all'
50
- Requires-Dist: tbparse; extra == 'all'
51
- Requires-Dist: tensorboard; extra == 'all'
52
- Requires-Dist: tensorflow; extra == 'all'
53
- Requires-Dist: tomli; extra == 'all'
54
- Requires-Dist: watchdog; extra == 'all'
55
- Provides-Extra: analytics
56
- Requires-Dist: plotly; extra == 'analytics'
57
- Requires-Dist: scipy; extra == 'analytics'
58
- Requires-Dist: seaborn; extra == 'analytics'
59
- Provides-Extra: dask
60
- Requires-Dist: dask[distributed]<=2024.10.0; extra == 'dask'
61
- Requires-Dist: tomli; extra == 'dask'
62
- Provides-Extra: dev
63
- Requires-Dist: furo; extra == 'dev'
64
- Requires-Dist: jupyterlab; extra == 'dev'
65
- Requires-Dist: nbmake; extra == 'dev'
66
- Requires-Dist: pika; extra == 'dev'
67
- Requires-Dist: pytest; extra == 'dev'
68
- Requires-Dist: pyyaml; extra == 'dev'
69
- Requires-Dist: ruff; extra == 'dev'
70
- Requires-Dist: sphinx; extra == 'dev'
71
- Provides-Extra: docs
72
- Requires-Dist: furo; extra == 'docs'
73
- Requires-Dist: sphinx; extra == 'docs'
74
- Provides-Extra: extras
75
- Requires-Dist: flask-restful; extra == 'extras'
76
- Requires-Dist: gitpython; extra == 'extras'
77
- Requires-Dist: lmdb; extra == 'extras'
78
- Requires-Dist: pandas; extra == 'extras'
79
- Requires-Dist: psutil>=6.1.1; extra == 'extras'
80
- Requires-Dist: py-cpuinfo; extra == 'extras'
81
- Requires-Dist: redis; extra == 'extras'
82
- Requires-Dist: requests; extra == 'extras'
83
- Provides-Extra: kafka
84
- Requires-Dist: confluent-kafka<=2.8.0; extra == 'kafka'
85
- Provides-Extra: llm-agent
86
- Requires-Dist: langchain-community; extra == 'llm-agent'
87
- Requires-Dist: mcp[cli]; extra == 'llm-agent'
88
- Requires-Dist: streamlit; extra == 'llm-agent'
89
- Provides-Extra: llm-google
90
- Requires-Dist: google-genai; extra == 'llm-google'
91
- Requires-Dist: langchain-community; extra == 'llm-google'
92
- Requires-Dist: mcp[cli]; extra == 'llm-google'
93
- Requires-Dist: streamlit; extra == 'llm-google'
94
- Provides-Extra: lmdb
95
- Requires-Dist: lmdb; extra == 'lmdb'
96
- Provides-Extra: ml-dev
97
- Requires-Dist: datasets==2.17.0; extra == 'ml-dev'
98
- Requires-Dist: nltk; extra == 'ml-dev'
99
- Requires-Dist: numpy<2.0; extra == 'ml-dev'
100
- Requires-Dist: sacremoses; extra == 'ml-dev'
101
- Requires-Dist: torch==2.2.2; extra == 'ml-dev'
102
- Requires-Dist: torchtext==0.17.2; extra == 'ml-dev'
103
- Requires-Dist: torchvision==0.17.2; extra == 'ml-dev'
104
- Provides-Extra: mlflow
105
- Requires-Dist: alembic; extra == 'mlflow'
106
- Requires-Dist: cryptography; extra == 'mlflow'
107
- Requires-Dist: mlflow-skinny; extra == 'mlflow'
108
- Requires-Dist: sqlalchemy; extra == 'mlflow'
109
- Requires-Dist: watchdog; extra == 'mlflow'
110
- Provides-Extra: mongo
111
- Requires-Dist: pyarrow; extra == 'mongo'
112
- Requires-Dist: pymongo; extra == 'mongo'
113
- Provides-Extra: mqtt
114
- Requires-Dist: paho-mqtt; extra == 'mqtt'
115
- Provides-Extra: nvidia
116
- Requires-Dist: nvidia-ml-py; extra == 'nvidia'
117
- Provides-Extra: redis
118
- Requires-Dist: redis; extra == 'redis'
119
- Provides-Extra: telemetry
120
- Requires-Dist: psutil>=6.1.1; extra == 'telemetry'
121
- Requires-Dist: py-cpuinfo; extra == 'telemetry'
122
- Provides-Extra: tensorboard
123
- Requires-Dist: tbparse; extra == 'tensorboard'
124
- Requires-Dist: tensorboard; extra == 'tensorboard'
125
- Requires-Dist: tensorflow; extra == 'tensorboard'
126
- Description-Content-Type: text/markdown
127
-
128
- [![Documentation](https://img.shields.io/badge/docs-readthedocs.io-green.svg)](https://flowcept.readthedocs.io/)
129
- [![Build](https://github.com/ORNL/flowcept/actions/workflows/create-release-n-publish.yml/badge.svg)](https://github.com/ORNL/flowcept/actions/workflows/create-release-n-publish.yml)
130
- [![PyPI](https://badge.fury.io/py/flowcept.svg)](https://pypi.org/project/flowcept)
131
- [![Tests](https://github.com/ORNL/flowcept/actions/workflows/run-tests.yml/badge.svg)](https://github.com/ORNL/flowcept/actions/workflows/run-tests.yml)
132
- [![Code Formatting](https://github.com/ORNL/flowcept/actions/workflows/checks.yml/badge.svg?branch=dev)](https://github.com/ORNL/flowcept/actions/workflows/checks.yml)
133
- [![License: MIT](https://img.shields.io/github/license/ORNL/flowcept)](LICENSE)
134
-
135
- # Flowcept
136
-
137
- ## Table of Contents
138
-
139
- - [Overview](#overview)
140
- - [Features](#features)
141
- - [Installation](#installation)
142
- - [Setup and the Settings File](#setup)
143
- - [Running with Containers](#running-with-containers)
144
- - [Examples](#examples)
145
- - [Data Persistence](#data-persistence)
146
- - [Performance Tuning](#performance-tuning-for-performance-evaluation)
147
- - [AMD GPU Setup](#install-amd-gpu-lib)
148
- - [Further Documentation](#documentation)
149
-
150
- ## Overview
151
-
152
- Flowcept is a runtime data integration system that captures and queries workflow provenance with minimal or no code changes. It unifies data from diverse workflows and tools, enabling integrated analysis and insights, especially in federated environments.
153
-
154
- Designed for scenarios involving critical data from multiple workflows, Flowcept supports end-to-end monitoring, analysis, querying, and enhanced support for Machine Learning (ML) workflows.
155
-
156
- ## Features
157
-
158
- - Automatic workflow provenance data capture from heterogeneous workflows
159
- - Data observability with no or minimal intrusion to application workflows
160
- - Explicit application instrumentation, if this is preferred over data observability
161
- - ML data capture in various levels of details: workflow, model fitting or evaluation task, epoch iteration, layer forwarding
162
- - ML model management (e.g., model storage and retrieval, along with their metadata and provenance)
163
- - Adapter-based, loosely-coupled system architecture, making it easy to plug and play with different data processing systems and backend database (e.g., MongoDB) or MQ services (e.g., Redis, Kafka)
164
- - Low-overhead focused system architecture, to avoid adding performance overhead particularly to workloads that run on HPC machines
165
- - Telemetry data capture (e.g., CPU, GPU, Memory consumption) linked to the application dataflow
166
- - Highly customizable to multiple use cases, enabling easy toggle between settings (e.g., with/without provenance capture; with/without telemetry and which telemetry type to capture; which adapters or backend services to run with)
167
- - [W3C PROV](https://www.w3.org/TR/prov-overview/) adherence
168
-
169
- Notes:
170
-
171
- - Currently implemented data observability adapters:
172
- - MLFlow
173
- - Dask
174
- - TensorBoard
175
- - Python scripts can be easily instrumented via `@decorators` using `@flowcept_task` (for generic Python method) or `@torch_task` (for methods that encapsulate PyTorch model manipulation, such as training or evaluation).
176
- - Currently supported MQ systems:
177
- - [Kafka](https://kafka.apache.org)
178
- - [Redis](https://redis.io)
179
- - [Mofka](https://mofka.readthedocs.io)
180
- - Currently supported database systems:
181
- - MongoDB
182
- - Lightning Memory-Mapped Database (lightweight file-only database system)
183
-
184
- Explore [Jupyter Notebooks](notebooks) and [Examples](examples) for usage.
185
-
186
- Refer to [Contributing](CONTRIBUTING.md) for adding new adapters. Note: The term "plugin" in the codebase is synonymous with "adapter," and future updates will standardize terminology.
187
-
188
- # Installation
189
-
190
- Flowcept can be installed in multiple ways, depending on your needs.
191
-
192
- ### 1. Default Installation
193
- To install Flowcept with its basic dependencies from [PyPI](https://pypi.org/project/flowcept/), run:
194
-
195
- ```
196
- pip install flowcept
197
- ```
198
-
199
- This installs the core Flowcept package but does **not** include MongoDB or any adapter-specific dependencies.
200
-
201
-
202
-
203
- ### 2. Installing Specific Adapters and Additional Dependencies
204
- To install extra dependencies required for specific adapters or features, use:
205
-
206
- ```
207
- pip install flowcept[mongo] # Install Flowcept with MongoDB support.
208
- pip install flowcept[mlflow] # Install MLflow adapter.
209
- pip install flowcept[dask] # Install Dask adapter.
210
- pip install flowcept[tensorboard] # Install TensorBoard adapter.
211
- pip install flowcept[kafka] # Use Kafka as the MQ instead of Redis.
212
- pip install flowcept[nvidia] # Capture NVIDIA GPU runtime information.
213
- pip install flowcept[analytics] # Enable extra analytics features.
214
- pip install flowcept[dev] # Install Flowcept's developer dependencies.
215
- ```
216
-
217
- ### 3. Install All Optional Dependencies at Once
218
- If you want to install all optional dependencies, use:
219
-
220
- ```
221
- pip install flowcept[all]
222
- ```
223
-
224
- This is useful mostly for Flowcept developers. Please avoid installing like this if you can, as it may install several dependencies you will never use.
225
-
226
- ### 4. Installing from Source
227
- To install Flowcept from the source repository:
228
-
229
- ```
230
- git clone https://github.com/ORNL/flowcept.git
231
- cd flowcept
232
- pip install .
233
- ```
234
-
235
- You can also install specific dependencies using:
236
-
237
- ```
238
- pip install .[dependency_name]
239
- ```
240
-
241
- This follows the same pattern as step 2, allowing for a customized installation from source.
242
-
243
- # Setup
244
-
245
- ### Start the MQ System:
246
-
247
- To use Flowcept, one needs to start a MQ system `$> make services`. This will start up Redis but see other options in the [deployment](deployment) directory and see [Data Persistence](#data-persistence) notes below.
248
-
249
- ### Flowcept Settings File
250
-
251
- Flowcept requires a settings file for configuration.
252
- You can find an example configuration file [here](resources/sample_settings.yaml), with documentation for each parameter provided as inline comments.
253
-
254
- #### What You Can Configure:
255
-
256
- - Message queue and database routes, ports, and paths;
257
- - Buffer sizes and flush settings;
258
- - Telemetry data capture settings;
259
- - Instrumentation and PyTorch details;
260
- - Log levels;
261
- - Data observability adapters; and more.
262
-
263
- #### How to use a custom settings file:
264
-
265
- Create or modify your settings file based on the [example](resources/sample_settings.yaml).
266
-
267
- Set the `FLOWCEPT_SETTINGS_PATH` environment variable to its absolute path:
268
- ```sh
269
- export FLOWCEPT_SETTINGS_PATH=/absolute/path/to/your/settings.yaml
270
- ```
271
-
272
- If this variable is not set, Flowcept will use the default values from the [example](resources/sample_settings.yaml) file.
273
-
274
- # Running with Containers
275
-
276
- To use containers instead of installing Flowcept's dependencies on your host system, we provide a [Dockerfile](deployment/Dockerfile) alongside a [docker-compose.yml](deployment/compose.yml) for dependent services (e.g., Redis, MongoDB).
277
-
278
- #### Notes:
279
- - As seen in the steps below, there are [Makefile](Makefile) commands to build and run the image. Please use them instead of running the Docker commands to build and run the image.
280
- - The Dockerfile builds from a local `miniconda` image, which will be built first using the [build-image.sh](deployment/build-image.sh) script.
281
- - All dependencies for all adapters are installed, increasing build time. Edit the Dockerfile to customize dependencies based on our [pyproject.toml](pyproject.toml) to reduce build time if needed.
282
-
283
- #### Steps:
284
-
285
- 1. Build the Docker image:
286
- ```bash
287
- make build
288
- ```
289
-
290
- 2. Start dependent services:
291
- ```bash
292
- make services
293
- ```
294
-
295
- 3. Run the image interactively:
296
- ```bash
297
- make run
298
- ```
299
-
300
- 4. Optionally, run Unit tests in the container:
301
- ```bash
302
- make tests-in-container
303
- ```
304
- # Examples
305
-
306
- ### Adapters and Notebooks
307
-
308
- See the [Jupyter Notebooks](notebooks) and [Examples directory](examples) for utilization examples.
309
-
310
-
311
- ### Simple Example with Decorators Instrumentation
312
-
313
- In addition to existing adapters to Dask, MLFlow, and others (it's extensible for any system that generates data), Flowcept also offers instrumentation via @decorators.
314
-
315
- ```python
316
- from flowcept import Flowcept, flowcept_task
317
-
318
- @flowcept_task
319
- def sum_one(n):
320
- return n + 1
321
-
322
-
323
- @flowcept_task
324
- def mult_two(n):
325
- return n * 2
326
-
327
-
328
- with Flowcept(workflow_name='test_workflow'):
329
- n = 3
330
- o1 = sum_one(n)
331
- o2 = mult_two(o1)
332
- print(o2)
333
-
334
- print(Flowcept.db.query(filter={"workflow_id": Flowcept.current_workflow_id}))
335
- ```
336
-
337
- ## Data Persistence
338
-
339
- Flowcept uses an ephemeral message queue (MQ) with a pub/sub system to flush observed data. For optional data persistence, you can choose between:
340
-
341
- - [LMDB](https://lmdb.readthedocs.io/) (default): A lightweight, file-based database requiring no external services (but note it might require `gcc`). Ideal for simple tests or cases needing basic data persistence without query capabilities. Data stored in LMDB can be loaded into tools like Pandas for complex analysis. Flowcept's database API provides methods to export data in LMDB into Pandas DataFrames.
342
- - [MongoDB](https://www.mongodb.com/): A robust, service-based database with advanced query capabilities. Required to use Flowcept's Query API (i.e., `flowcept.Flowcept.db`) to run more complex queries and other features like ML model management or runtime queries (i.e., query while writing). To use MongoDB, initialize the service with `make services-mongo`.
343
-
344
- Flowcept supports writing to both databases simultaneously (default configuration), individually, or to neither, depending on configuration.
345
-
346
- If data persistence is disabled, captured data is sent to the MQ without any default consumer subscribing to persist it. In this case, querying the data requires creating a custom consumer to subscribe to the MQ.
347
-
348
- However, for querying, Flowcept Database API uses only one at a time. If both are enabled, Flowcept defaults to MongoDB. If neither is enabled, an error will occur.
349
-
350
- Data stored in MongoDB and LMDB are interchangeable. You can switch between them by transferring data from one to the other as needed.
351
-
352
- ## Performance Tuning for Performance Evaluation
353
-
354
- In the settings.yaml file, many variables may impact interception efficiency.
355
- Please be mindful of the following parameters:
356
-
357
- * `mq`
358
- - `buffer_size` and `insertion_buffer_time_secs`. -- `buffer_size: 1` is really bad for performance, but it will give the most up-to-date info possible to the MQ.
359
-
360
- * `log`
361
- - set both stream and files to disable
362
-
363
- * `telemetry_capture`
364
- The more things you enable, the more overhead you'll get. For GPU, you can turn on/off specific metrics.
365
-
366
- * `instrumentation`
367
- This will configure whether every single granular step in the model training process will be captured. Disable very granular model inspection and try to use more lightweight methods. There are commented instructions in the settings.yaml sample file.
368
-
369
- Other thing to consider:
370
-
371
- ```
372
- project:
373
- replace_non_json_serializable: false # Here it will assume that all captured data are JSON serializable
374
- db_flush_mode: offline # This disables the feature of runtime analysis in the database.
375
- mq:
376
- chunk_size: -1 # This disables chunking the messages to be sent to the MQ. Use this only if the main memory of the compute notes is large enough.
377
- ```
378
-
379
- Other variables depending on the adapter may impact too. For instance, in Dask, timestamp creation by workers add interception overhead. As we evolve the software, other variables that impact overhead appear and we might not stated them in this README file yet. If you are doing extensive performance evaluation experiments using this software, please reach out to us (e.g., create an issue in the repository) for hints on how to reduce the overhead of our software.
380
-
381
- ## Install AMD GPU Lib
382
-
383
- This section is only important if you want to enable GPU runtime data capture and the GPU is from AMD. NVIDIA GPUs don't need this step.
384
-
385
- For AMD GPUs, we rely on the official AMD ROCM library to capture GPU data.
386
-
387
- Unfortunately, this library is not available as a pypi/conda package, so you must manually install it. See instructions in the link: https://rocm.docs.amd.com/projects/amdsmi/en/latest/
388
-
389
- Here is a summary:
390
-
391
- 1. Install the AMD drivers on the machine (check if they are available already under `/opt/rocm-*`).
392
- 2. Suppose it is /opt/rocm-6.2.0. Then, make sure it has a share/amd_smi subdirectory and pyproject.toml or setup.py in it.
393
- 3. Copy the amd_smi to your home directory: `cp -r /opt/rocm-6.2.0/share/amd_smi ~`
394
- 4. cd ~/amd_smi
395
- 5. In your python environment, do a pip install .
396
-
397
- Current code is compatible with this version: amdsmi==24.7.1+0012a68
398
- Which was installed using Frontier's /opt/rocm-6.3.1/share/amd_smi
399
-
400
- ## Torch Dependencies
401
-
402
- Some unit tests utilize `torch==2.2.2`, `torchtext=0.17.2`, and `torchvision==0.17.2`. They are only really needed to run some tests and will be installed if you run `pip install flowcept[ml_dev]` or `pip install flowcept[all]`. If you want to use Flowcept with Torch, please adapt torch dependencies according to your project's dependencies.
403
-
404
- ## Documentation
405
-
406
- Full documentation is available on [Read the Docs](https://flowcept.readthedocs.io/).
407
-
408
- ## Cite us
409
-
410
- If you used Flowcept in your research, consider citing our paper.
411
-
412
- ```
413
- Towards Lightweight Data Integration using Multi-workflow Provenance and Data Observability
414
- R. Souza, T. Skluzacek, S. Wilkinson, M. Ziatdinov, and R. da Silva
415
- 19th IEEE International Conference on e-Science, 2023.
416
- ```
417
-
418
- **Bibtex:**
419
-
420
- ```latex
421
- @inproceedings{souza2023towards,
422
- author = {Souza, Renan and Skluzacek, Tyler J and Wilkinson, Sean R and Ziatdinov, Maxim and da Silva, Rafael Ferreira},
423
- booktitle = {IEEE International Conference on e-Science},
424
- doi = {10.1109/e-Science58273.2023.10254822},
425
- link = {https://doi.org/10.1109/e-Science58273.2023.10254822},
426
- pdf = {https://arxiv.org/pdf/2308.09004.pdf},
427
- title = {Towards Lightweight Data Integration using Multi-workflow Provenance and Data Observability},
428
- year = {2023}
429
- }
430
-
431
- ```
432
-
433
- ## Disclaimer & Get in Touch
434
-
435
- Please note that this a research software. We encourage you to give it a try and use it with your own stack. We are continuously working on improving documentation and adding more examples and notebooks, but we are continuously improving documentation and examples. If you are interested in working with Flowcept in your own scientific project, we can give you a jump start if you reach out to us. Feel free to [create an issue](https://github.com/ORNL/flowcept/issues/new), [create a new discussion thread](https://github.com/ORNL/flowcept/discussions/new/choose) or drop us an email (we trust you'll find a way to reach out to us :wink:).
436
-
437
- ## Acknowledgement
438
-
439
- This research uses resources of the Oak Ridge Leadership Computing Facility at the Oak Ridge National Laboratory, which is supported by the Office of Science of the U.S. Department of Energy under Contract No. DE-AC05-00OR22725.