flowcept 0.8.5__py3-none-any.whl → 0.8.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,9 +61,22 @@ class MongoDBDAO(DocumentDBDAO):
61
61
  self.logger = FlowceptLogger()
62
62
 
63
63
  if MONGO_URI is not None:
64
- self._client = MongoClient(MONGO_URI)
64
+ self._client = MongoClient(
65
+ MONGO_URI,
66
+ maxPoolSize=1000, # TODO: conf file
67
+ socketTimeoutMS=60000,
68
+ connectTimeoutMS=60000,
69
+ serverSelectionTimeoutMS=60000,
70
+ )
65
71
  else:
66
- self._client = MongoClient(MONGO_HOST, MONGO_PORT)
72
+ self._client = MongoClient(
73
+ MONGO_HOST,
74
+ MONGO_PORT,
75
+ maxPoolSize=1000,
76
+ socketTimeoutMS=60000,
77
+ connectTimeoutMS=60000,
78
+ serverSelectionTimeoutMS=60000,
79
+ )
67
80
  self._db = self._client[MONGO_DB]
68
81
 
69
82
  self._tasks_collection = self._db["tasks"]
@@ -65,7 +65,7 @@ class MQDaoMofka(MQDao):
65
65
 
66
66
  def _bulk_publish(self, buffer, channel=MQ_CHANNEL, serializer=msgpack.dumps):
67
67
  try:
68
- self.logger.debug(f"Going to send Message:\n\t[BEGIN_MSG]{buffer}\n[END_MSG]\t")
68
+ # self.logger.debug(f"Going to send Message:\n\t[BEGIN_MSG]{buffer}\n[END_MSG]\t")
69
69
  for m in buffer:
70
70
  self.producer.push(m)
71
71
 
@@ -75,14 +75,14 @@ class MQDaoMofka(MQDao):
75
75
  self.logger.error(f"Message that caused error: {buffer}")
76
76
  try:
77
77
  self.producer.flush()
78
- self.logger.info(f"Flushed {len(buffer)} msgs to MQ!")
78
+ # self.logger.info(f"Flushed {len(buffer)} msgs to MQ!")
79
79
  except Exception as e:
80
80
  self.logger.exception(e)
81
81
 
82
82
  def _bulk_publish_timed(self, buffer, channel=MQ_CHANNEL, serializer=msgpack.dumps):
83
83
  total = 0
84
84
  try:
85
- self.logger.debug(f"Going to send Message:\n\t[BEGIN_MSG]{buffer}\n[END_MSG]\t")
85
+ # self.logger.debug(f"Going to send Message:\n\t[BEGIN_MSG]{buffer}\n[END_MSG]\t")
86
86
 
87
87
  for m in buffer:
88
88
  self.producer.push(m)
@@ -97,7 +97,7 @@ class MQDaoMofka(MQDao):
97
97
  self.producer.flush()
98
98
  t2 = time()
99
99
  self._flush_events.append(["bulk", t1, t2, t2 - t1, total])
100
- self.logger.info(f"Flushed {len(buffer)} msgs to MQ!")
100
+ # self.logger.info(f"Flushed {len(buffer)} msgs to MQ!")
101
101
  except Exception as e:
102
102
  self.logger.exception(e)
103
103
 
@@ -1,4 +1,4 @@
1
- """Workflow module."""
1
+ """Workflow Object module."""
2
2
 
3
3
  from typing import Dict, AnyStr, List
4
4
  import msgpack
flowcept/configs.py CHANGED
@@ -146,7 +146,7 @@ PERF_LOG = settings["project"].get("performance_logging", False)
146
146
  JSON_SERIALIZER = settings["project"].get("json_serializer", "default")
147
147
  REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get("replace_non_json_serializable", True)
148
148
  ENRICH_MESSAGES = settings["project"].get("enrich_messages", True)
149
- REGISTER_WORKFLOW = settings["project"].get("register_workflow", True)
149
+
150
150
 
151
151
  TELEMETRY_CAPTURE = settings.get("telemetry_capture", None)
152
152
 
@@ -9,6 +9,7 @@ from flowcept.commons.flowcept_dataclasses.workflow_object import (
9
9
  )
10
10
  from flowcept.configs import (
11
11
  ENRICH_MESSAGES,
12
+ INSTRUMENTATION,
12
13
  )
13
14
  from flowcept.commons.flowcept_logger import FlowceptLogger
14
15
  from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
@@ -49,15 +50,23 @@ class BaseInterceptor(object):
49
50
  elif kind in "dask":
50
51
  # This is dask's client interceptor. We essentially use it to store the dask workflow.
51
52
  # That's why we don't need another special interceptor and we can reuse the instrumentation one.
52
- from flowcept.flowceptor.adapters.instrumentation_interceptor import InstrumentationInterceptor
53
-
54
- return InstrumentationInterceptor.get_instance()
53
+ return BaseInterceptor._build_instrumentation_interceptor()
55
54
  elif kind == "instrumentation":
55
+ return BaseInterceptor._build_instrumentation_interceptor()
56
+ else:
57
+ raise NotImplementedError
58
+
59
+ @staticmethod
60
+ def _build_instrumentation_interceptor():
61
+ # By using singleton, we lose the thread safety for the Interceptor, particularly, its MQ buffer.
62
+ # Since some use cases need threads, this allows disabling the singleton for more thread safety.
63
+ is_singleton = INSTRUMENTATION.get("singleton", True)
64
+ if is_singleton:
56
65
  from flowcept.flowceptor.adapters.instrumentation_interceptor import InstrumentationInterceptor
57
66
 
58
67
  return InstrumentationInterceptor.get_instance()
59
68
  else:
60
- raise NotImplementedError
69
+ return BaseInterceptor(kind="instrumentation")
61
70
 
62
71
  def __init__(self, plugin_key=None, kind=None):
63
72
  self.logger = FlowceptLogger()
@@ -211,10 +211,14 @@ class DocumentInserter:
211
211
  return True
212
212
 
213
213
  def stop(self, bundle_exec_id=None):
214
- """Stop it."""
214
+ """Stop document inserter."""
215
215
  if self.check_safe_stops:
216
216
  trial = 0
217
217
  while not self._mq_dao.all_time_based_threads_ended(bundle_exec_id):
218
+ self.logger.debug(
219
+ f"# time_based_threads for bundle_exec_id {bundle_exec_id} is"
220
+ f"{self._mq_dao._keyvalue_dao.set_count(bundle_exec_id)}"
221
+ )
218
222
  trial += 1
219
223
  self.logger.info(
220
224
  f"Doc Inserter {id(self)}: It's still not safe to stop DocInserter. "
@@ -168,7 +168,7 @@ class GPUCapture:
168
168
  }
169
169
 
170
170
  if "id" in gpu_conf:
171
- flowcept_gpu_info["id"] = (amdsmi_get_gpu_device_uuid(device),)
171
+ flowcept_gpu_info["id"] = amdsmi_get_gpu_device_uuid(device)
172
172
 
173
173
  return flowcept_gpu_info
174
174
 
@@ -17,7 +17,6 @@ from flowcept.commons.flowcept_dataclasses.workflow_object import (
17
17
  )
18
18
  from flowcept.commons.vocabulary import Status
19
19
  from flowcept.configs import (
20
- REGISTER_WORKFLOW,
21
20
  INSTRUMENTATION,
22
21
  TELEMETRY_CAPTURE,
23
22
  REPLACE_NON_JSON_SERIALIZABLE,
@@ -30,6 +29,8 @@ from flowcept.instrumentation.flowcept_task import get_current_context_task_id
30
29
 
31
30
  TORCH_CONFIG = INSTRUMENTATION.get("torch")
32
31
 
32
+ REGISTER_WORKFLOW = TORCH_CONFIG.get("register_workflow", True)
33
+
33
34
 
34
35
  def flowcept_torch(cls):
35
36
  """
@@ -49,9 +49,6 @@ class FlowceptTask(object):
49
49
  are no-ops, and no data is captured.
50
50
  """
51
51
 
52
- if INSTRUMENTATION_ENABLED:
53
- _interceptor = InstrumentationInterceptor.get_instance()
54
-
55
52
  def __init__(
56
53
  self,
57
54
  task_id: str = None,
@@ -60,12 +57,18 @@ class FlowceptTask(object):
60
57
  activity_id: str = None,
61
58
  used: Dict = None,
62
59
  custom_metadata: Dict = None,
60
+ flowcept: "Flowcept" = None,
63
61
  ):
64
62
  if not INSTRUMENTATION_ENABLED:
65
63
  self._ended = True
66
64
  return
65
+ if flowcept is not None and flowcept._interceptor_instances[0].kind == "instrumentation":
66
+ self._interceptor = flowcept._interceptor_instances[0]
67
+ else:
68
+ self._interceptor = InstrumentationInterceptor.get_instance()
69
+
67
70
  self._task = TaskObject()
68
- self._task.telemetry_at_start = FlowceptTask._interceptor.telemetry_capture.capture()
71
+ self._task.telemetry_at_start = self._interceptor.telemetry_capture.capture()
69
72
  self._task.activity_id = activity_id
70
73
  self._task.started_at = time()
71
74
  self._task.task_id = task_id or str(self._task.started_at)
@@ -117,11 +120,11 @@ class FlowceptTask(object):
117
120
  """
118
121
  if not INSTRUMENTATION_ENABLED:
119
122
  return
120
- self._task.telemetry_at_end = FlowceptTask._interceptor.telemetry_capture.capture()
123
+ self._task.telemetry_at_end = self._interceptor.telemetry_capture.capture()
121
124
  self._task.ended_at = ended_at or time()
122
125
  self._task.status = status
123
126
  self._task.stderr = stderr
124
127
  self._task.stdout = stdout
125
128
  self._task.generated = generated
126
- FlowceptTask._interceptor.intercept(self._task.to_dict())
129
+ self._interceptor.intercept(self._task.to_dict())
127
130
  self._ended = True
flowcept/version.py CHANGED
@@ -4,4 +4,4 @@
4
4
  # The expected format is: <Major>.<Minor>.<Patch>
5
5
  # This file is supposed to be automatically modified by the CI Bot.
6
6
  # See .github/workflows/version_bumper.py
7
- __version__ = "0.8.5"
7
+ __version__ = "0.8.7"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flowcept
3
- Version: 0.8.5
3
+ Version: 0.8.7
4
4
  Summary: Capture and query workflow provenance data using data observability
5
5
  Project-URL: GitHub, https://github.com/ORNL/flowcept
6
6
  Author: Oak Ridge National Laboratory
@@ -96,20 +96,34 @@ Description-Content-Type: text/markdown
96
96
 
97
97
  # Flowcept
98
98
 
99
+ ## Table of Contents
100
+
101
+ - [Overview](#overview)
102
+ - [Features](#features)
103
+ - [Installation](#installation)
104
+ - [Setup and the Settings File](#setup)
105
+ - [Running with Containers](#running-with-containers)
106
+ - [Examples](#examples)
107
+ - [Data Persistence](#data-persistence)
108
+ - [Performance Tuning](#performance-tuning-for-performance-evaluation)
109
+ - [AMD GPU Setup](#install-amd-gpu-lib)
110
+
111
+ ## Overview
112
+
99
113
  Flowcept is a runtime data integration system that captures and queries workflow provenance with minimal or no code changes. It unifies data across diverse workflows and tools, enabling integrated analysis and insights, especially in federated environments. Designed for scenarios involving critical data from multiple workflows, Flowcept seamlessly integrates data at runtime, providing a unified view for end-to-end monitoring and analysis, and enhanced support for Machine Learning (ML) workflows.
100
114
 
101
- Other capabilities include:
102
-
103
- - Automatic multi-workflow provenance data capture;
104
- - Data observability, enabling minimal intrusion to user workflows;
105
- - Explicit user workflow instrumentation, if this is preferred over implicit data observability;
106
- - ML data capture in various levels of details: workflow, model fitting or evaluation task, epoch iteration, layer forwarding;
107
- - ML model management;
108
- - Adapter-based, loosely-coupled system architecture, making it easy to plug and play with different data processing systems and backend database (e.g., MongoDB) or MQ services (e.g., Redis, Kafka);
109
- - Low-overhead focused system architecture, to avoid adding performance overhead particularly to workloads that run on HPC machines;
110
- - Telemetry data capture (e.g., CPU, GPU, Memory consumption) linked to the application dataflow;
111
- - Highly customizable to multiple use cases, enabling easy toggle between settings (e.g., with/without provenance capture; with/without telemetry and which telemetry type to capture; which adapters or backend services to run with);
112
- - [W3C PROV](https://www.w3.org/TR/prov-overview/) adherence;
115
+ ## Features
116
+
117
+ - Automatic workflow provenance data capture from heterogeneous workflows
118
+ - Data observability with no or minimal intrusion to application workflows
119
+ - Explicit application instrumentation, if this is preferred over data observability
120
+ - ML data capture in various levels of details: workflow, model fitting or evaluation task, epoch iteration, layer forwarding
121
+ - ML model management (e.g., model storage and retrieval, along with their metadata and provenance)
122
+ - Adapter-based, loosely-coupled system architecture, making it easy to plug and play with different data processing systems and backend database (e.g., MongoDB) or MQ services (e.g., Redis, Kafka)
123
+ - Low-overhead focused system architecture, to avoid adding performance overhead particularly to workloads that run on HPC machines
124
+ - Telemetry data capture (e.g., CPU, GPU, Memory consumption) linked to the application dataflow
125
+ - Highly customizable to multiple use cases, enabling easy toggle between settings (e.g., with/without provenance capture; with/without telemetry and which telemetry type to capture; which adapters or backend services to run with)
126
+ - [W3C PROV](https://www.w3.org/TR/prov-overview/) adherence
113
127
 
114
128
  Notes:
115
129
 
@@ -192,7 +206,8 @@ To use Flowcept, one needs to start a MQ system `$> make services`. This will st
192
206
 
193
207
  ### Flowcept Settings File
194
208
 
195
- Flowcept requires a settings file for configuration. You can find an example [here](resources/sample_settings.yaml).
209
+ Flowcept requires a settings file for configuration.
210
+ You can find an example configuration file [here](resources/sample_settings.yaml), with documentation for each parameter provided as inline comments.
196
211
 
197
212
  #### What You Can Configure:
198
213
 
@@ -214,7 +229,6 @@ export FLOWCEPT_SETTINGS_PATH=/absolute/path/to/your/settings.yaml
214
229
 
215
230
  If this variable is not set, Flowcept will use the default values from the [example](resources/sample_settings.yaml) file.
216
231
 
217
-
218
232
  # Running with Containers
219
233
 
220
234
  To use containers instead of installing Flowcept's dependencies on your host system, we provide a [Dockerfile](deployment/Dockerfile) alongside a [docker-compose.yml](deployment/compose.yml) for dependent services (e.g., Redis, MongoDB).
@@ -1,6 +1,6 @@
1
1
  flowcept/__init__.py,sha256=CukmdzTUvm6Y_plTKPq4kKn7w9LdR36j7V_C_UQyjhU,2011
2
- flowcept/configs.py,sha256=_-jhoI_HGKjzymjYTlDuysbM38Gr2aunc0Q-Stlmcwk,7511
3
- flowcept/version.py,sha256=RW_aTLB2vWTDjrpIPcWytUXxQhZLynom14B2UHfVVcU,306
2
+ flowcept/configs.py,sha256=NDUAqqoKfztt6Qjwxy95eTQU71AovVJWXalI1x3HJ7Y,7441
3
+ flowcept/version.py,sha256=VOEzbBIlAwCTu8Yc20ihIzsk7fgYS5oHS79aAfnDY98,306
4
4
  flowcept/analytics/__init__.py,sha256=46q-7vsHq_ddPNrzNnDgEOiRgvlx-5Ggu2ocyROMV0w,641
5
5
  flowcept/analytics/analytics_utils.py,sha256=FRJdBtQa7Hrk2oR_FFhmhmMf3X6YyZ4nbH5RIYh7KL4,8753
6
6
  flowcept/analytics/data_augmentation.py,sha256=Dyr5x316Zf-k1e8rVoQMCpFOrklYVHjfejRPrtoycmc,1641
@@ -17,17 +17,17 @@ flowcept/commons/daos/keyvalue_dao.py,sha256=03xHhQIfZas0LQLP1DbGJ5DoskXyZNXQKIN
17
17
  flowcept/commons/daos/docdb_dao/__init__.py,sha256=qRvXREeUJ4mkhxdC9bzpOsVX6M2FB5hDyLFxhMxTGhs,30
18
18
  flowcept/commons/daos/docdb_dao/docdb_dao_base.py,sha256=YbfSVJPwZGK2GBYkeapRC83HkmP0c6Msv5TriD88RcI,11812
19
19
  flowcept/commons/daos/docdb_dao/lmdb_dao.py,sha256=dJOLgCx_lwdz6MKiMpM_UE4rm0angDCPaVz_WU5KqIA,10407
20
- flowcept/commons/daos/docdb_dao/mongodb_dao.py,sha256=-Kxjep1FbjKiGjvzyvePVHDf-Q1lOIce1EzBURSKubc,38037
20
+ flowcept/commons/daos/docdb_dao/mongodb_dao.py,sha256=0y9RiL54e1GxSTkRHFlMrLFAHWuB3YyNS2zLsnBPtxg,38456
21
21
  flowcept/commons/daos/mq_dao/__init__.py,sha256=Xxm4FmbBUZDQ7XIAmSFbeKE_AdHsbgFmSuftvMWSykQ,21
22
22
  flowcept/commons/daos/mq_dao/mq_dao_base.py,sha256=EAqOhy7Q8V29JFDG8C50nRK34KsPxEICkG4elk4ZfX8,9020
23
23
  flowcept/commons/daos/mq_dao/mq_dao_kafka.py,sha256=bf-bZvWw9JJk8Kdfzx2UkAnQC95rSrKXDEyYkrcncOk,4400
24
- flowcept/commons/daos/mq_dao/mq_dao_mofka.py,sha256=aZ810wN5Wkjk7oRUxDWJWOIREUsmq57oI4AxY1bWBuk,3940
24
+ flowcept/commons/daos/mq_dao/mq_dao_mofka.py,sha256=Q_mgZ3C_4gTTvnuJ2ZLmJgJfbAOopeSR9jvznI4JRuo,3948
25
25
  flowcept/commons/daos/mq_dao/mq_dao_redis.py,sha256=Br97SoDIkt4dHH937Yjg3wtkn1xGT-x9t-8E3VD5TeU,4277
26
26
  flowcept/commons/flowcept_dataclasses/__init__.py,sha256=8KkiJh0WSRAB50waVluxCSI8Tb9X1L9nup4c8RN3ulc,30
27
27
  flowcept/commons/flowcept_dataclasses/base_settings_dataclasses.py,sha256=Cjw2PGYtZDfnwecz6G3S42Ncmxj7AIZVEBx05bsxRUo,399
28
28
  flowcept/commons/flowcept_dataclasses/task_object.py,sha256=3DD5ZNMz7EVILS9PRkQ3khboav7lIKoUC5W6sKMFauQ,4694
29
29
  flowcept/commons/flowcept_dataclasses/telemetry.py,sha256=9_5ONCo-06r5nKHXmi5HfIhiZSuPgmTECiq_u9MlxXM,2822
30
- flowcept/commons/flowcept_dataclasses/workflow_object.py,sha256=FBpel5PulrN3mCCk3hrQhoYiFqd-4aNV4tT03bCV3DE,4376
30
+ flowcept/commons/flowcept_dataclasses/workflow_object.py,sha256=f8aB0b3xcUr3KQTlloF7R_P6xQejzDPOm-s6dLhGMeA,4383
31
31
  flowcept/flowcept_api/__init__.py,sha256=T1ty86YlocQ5Z18l5fUqHj_CC6Unq_iBv0lFyiI7Ao8,22
32
32
  flowcept/flowcept_api/db_api.py,sha256=hKXep-n50rp9cAzV0ljk2QVEF8O64yxi3ujXv5_Ibac,9723
33
33
  flowcept/flowcept_api/flowcept_controller.py,sha256=lkHR7O0zAAfbGtVa4o9tjZMdZquYN7vdnymRKzc4B8s,11933
@@ -38,9 +38,9 @@ flowcept/flowcept_webserver/resources/__init__.py,sha256=XOk5yhLeLU6JmVXxbl3TY2z
38
38
  flowcept/flowcept_webserver/resources/query_rsrc.py,sha256=Mk1XDC_wVYkMk0eaazqWWrTC07gQU9U0toKfip0ihZE,1353
39
39
  flowcept/flowcept_webserver/resources/task_messages_rsrc.py,sha256=0u68it2W-9NzUUx5fWOZCqvRKe5EsLI8oyvto9634Ng,666
40
40
  flowcept/flowceptor/__init__.py,sha256=wVxRXUv07iNx6SMRRma2vqhR_GIcRl0re_WCYG65PUs,29
41
- flowcept/flowceptor/telemetry_capture.py,sha256=FlWGLpzb6pBJOKVi349kyZKzAHeTsL86BCQd4Wtxpig,13746
41
+ flowcept/flowceptor/telemetry_capture.py,sha256=wSXyQJ-vPVzeldD4KqoLQA2rg7V0EOQo_11ErJE5oQQ,13743
42
42
  flowcept/flowceptor/adapters/__init__.py,sha256=SuZbSZVVQeBJ9zXW-M9jF09dw3XIjre3lSGrUO1Y8Po,27
43
- flowcept/flowceptor/adapters/base_interceptor.py,sha256=99a_Ipnj6g8qZMHWLBEYJh0Cox033ADxOKPFrivr9gw,6056
43
+ flowcept/flowceptor/adapters/base_interceptor.py,sha256=a2CX7COCpYzIpQeVulrLJTSVIw453U-S2gmrMlouO5A,6487
44
44
  flowcept/flowceptor/adapters/instrumentation_interceptor.py,sha256=DhK2bBnpghqPSeA62BUqRg6pl8zxuYrP33dK4x6PhRE,733
45
45
  flowcept/flowceptor/adapters/interceptor_state_manager.py,sha256=xRzmi5YFKBEqNtX8F5s6XlMTRe27ml4BmQtBO4WtG2c,919
46
46
  flowcept/flowceptor/adapters/dask/__init__.py,sha256=GKreb5L_nliD2BEckyB943zOQ-b6Gn1fLDj81FqSK2Y,23
@@ -60,14 +60,14 @@ flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py,sha256=nn9MxvcdzgmOa
60
60
  flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py,sha256=Bjyi48JW0DXJLJuvwPxaD8zxxsSoEFgSoXl8YcbwFWk,3782
61
61
  flowcept/flowceptor/consumers/__init__.py,sha256=foxtVEb2ZEe9g1slfYIKM4tIFv-He1l7XS--SYs7nlQ,28
62
62
  flowcept/flowceptor/consumers/consumer_utils.py,sha256=7bvFJWusJkfA4j0gwZLDIIsIOyfk9wRq6s5liS3JAV0,5665
63
- flowcept/flowceptor/consumers/document_inserter.py,sha256=rAK3rs3VNW5a6koesE05scQ1mR_4BhuxLurP10ipURs,9339
63
+ flowcept/flowceptor/consumers/document_inserter.py,sha256=fNPLa25oNhr3Y6-pRvzRp1zO4j3WBg7YXRnSHyDaaCo,9568
64
64
  flowcept/instrumentation/__init__.py,sha256=M5bTmg80E4QyN91gUX3qfw_nbtJSXwGWcKxdZP3vJz0,34
65
65
  flowcept/instrumentation/flowcept_loop.py,sha256=RvETm3Pn37dIw_a1RXigyh2U7MCBHqi46dPmbrz3RMQ,12171
66
66
  flowcept/instrumentation/flowcept_task.py,sha256=l_BAYEUZ_SeBt8QJN_E9D9QcZVYRnW9qO_XRnqvmePE,5993
67
- flowcept/instrumentation/flowcept_torch.py,sha256=KXA1HBwz8l5Qp7PkZ7nsbYlM8IcwWD_u04NxaAcZPzM,23395
68
- flowcept/instrumentation/task_capture.py,sha256=u82r_SgzoVKyb6_SWtfB-meBUZgjrXvF5dxkH9vnMDs,4776
69
- resources/sample_settings.yaml,sha256=aZRAZRkgCe52i-8czQvZsEIAz8dGau-OF2YClUF3QGs,3427
70
- flowcept-0.8.5.dist-info/METADATA,sha256=uZzz3Hz-Pee3zpeyNYLk5K-JbS_miCTMXIXlqtMBXX0,17543
71
- flowcept-0.8.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
72
- flowcept-0.8.5.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
73
- flowcept-0.8.5.dist-info/RECORD,,
67
+ flowcept/instrumentation/flowcept_torch.py,sha256=mH4sI2FMtBpGk4hN3U6MUwqd6sOPER8TbigUkexfhDY,23437
68
+ flowcept/instrumentation/task_capture.py,sha256=fbTAhf4y69pRCpnaH8r0dczSmPyNINSpljMrVyUnp0U,4945
69
+ resources/sample_settings.yaml,sha256=aKeHf8895vrHIbi0QS1w2WT5n8ZNI9Ep5PVPF5Y5MEQ,4957
70
+ flowcept-0.8.7.dist-info/METADATA,sha256=xRNvugAeW4JZkzUDzpYc96qOnxyu0-abqV6p6RArHZA,18086
71
+ flowcept-0.8.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
72
+ flowcept-0.8.7.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
73
+ flowcept-0.8.7.dist-info/RECORD,,
@@ -1,18 +1,19 @@
1
+ flowcept_version: 0.8.0 # Version of the Flowcept package. This setting file is compatible with this version.
2
+
1
3
  project:
2
- debug: true
3
- json_serializer: default # or complex. If "complex", Flowcept will deal with complex python dicts that may contain JSON unserializable values
4
- replace_non_json_serializable: true
5
- performance_logging: false
6
- register_workflow: true
7
- enrich_messages: true
8
- db_flush_mode: online # or offline
4
+ debug: true # Toggle debug mode. This will add a property `debug: true` to all saved data, making it easier to retrieve/delete them later.
5
+ json_serializer: default # JSON serialization mode: default or complex. If "complex", Flowcept will deal with complex python dicts that may contain JSON unserializable values
6
+ replace_non_json_serializable: true # Replace values that can't be JSON serialized
7
+ performance_logging: false # Enable performance logging if true. Particularly useful for MQ flushes.
8
+ enrich_messages: true # Add extra metadata to task messages, such as IP addresses and UTC timestamps.
9
+ db_flush_mode: online # Mode for flushing DB entries: "online" or "offline". If online, flushes to the DB will happen before the workflow ends.
9
10
 
10
11
  log:
11
- log_path: "default"
12
- log_file_level: error # use 'disable' to disable logs
13
- log_stream_level: error
12
+ log_path: "default" # Path for log file output; "default" will write the log in the directory where the main executable is running from.
13
+ log_file_level: error # Logging level (error, debug, info, critical) for file logs; use "disable" to turn off.
14
+ log_stream_level: error # Logging level (error, debug, info, critical) for console/stream logs; use "disable" to turn off.
14
15
 
15
- telemetry_capture:
16
+ telemetry_capture: # This toggles each individual type of telemetry capture. GPU capture is treated different depending on the vendor (AMD or NVIDIA).
16
17
  gpu: ~ # ~ means None. This is a list with GPU metrics. AMD=[activity,used,power,temperature,others,id]; NVIDIA=[used,temperature,power,name,id]
17
18
  cpu: true
18
19
  per_cpu: true
@@ -23,17 +24,18 @@ telemetry_capture:
23
24
  machine_info: true
24
25
 
25
26
  instrumentation:
26
- enabled: true
27
+ enabled: true # This toggles data capture for instrumentation.
28
+ singleton: true # Use a single instrumentation instance per process. Defaults to true
27
29
  torch:
28
- what: parent_and_children # parent_only, parent_and_children, ~
29
- children_mode: telemetry_and_tensor_inspection # tensor_inspection, telemetry, telemetry_and_tensor_inspection
30
+ what: parent_and_children # Scope of instrumentation: "parent_only" -- will capture only at the main model level, "parent_and_children" -- will capture the inner layers, or ~ (disable).
31
+ children_mode: telemetry_and_tensor_inspection # What to capture if parent_and_children is chosen in the scope. Possible values: "tensor_inspection" (i.e., tensor metadata), "telemetry", "telemetry_and_tensor_inspection"
30
32
  epoch_loop: lightweight # lightweight, ~ (disable), or default (default will use the default telemetry capture method)
31
33
  batch_loop: lightweight # lightweight, ~ (disable), or default (default will use the default telemetry capture method)
32
- capture_epochs_at_every: 1 #epochs; please use a value that is multiple of #epochs
33
- # enable to set between train, evaluate, and test
34
+ capture_epochs_at_every: 1 # Will capture data at every N epochs; please use a value that is multiple of the total number of #epochs.
35
+ register_workflow: true # Will store the parent model forward as a workflow itself in the database.
34
36
 
35
37
  experiment:
36
- user: root
38
+ user: root # Optionally identify the user running the experiment. The logged username will be captured anyways.
37
39
 
38
40
  mq:
39
41
  type: redis # or kafka or mofka; Please adjust the port (kafka's default is 9092; redis is 6379). If mofka, adjust the group_file.