datashare-python 0.7.3__tar.gz → 0.8.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {datashare_python-0.7.3 → datashare_python-0.8.2}/PKG-INFO +1 -1
  2. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/config.py +11 -2
  3. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/conftest.py +2 -1
  4. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/dependencies.py +1 -1
  5. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/discovery.py +2 -3
  6. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/logging_.py +68 -15
  7. datashare_python-0.8.2/datashare_python/worker-template.tar.gz +0 -0
  8. {datashare_python-0.7.3 → datashare_python-0.8.2}/pyproject.toml +1 -1
  9. datashare_python-0.7.3/datashare_python/worker-template.tar.gz +0 -0
  10. {datashare_python-0.7.3 → datashare_python-0.8.2}/.gitignore +0 -0
  11. {datashare_python-0.7.3 → datashare_python-0.8.2}/README.md +0 -0
  12. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/.gitignore +0 -0
  13. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/__init__.py +0 -0
  14. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/__main__.py +0 -0
  15. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/cli/__init__.py +0 -0
  16. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/cli/project.py +0 -0
  17. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/cli/task.py +0 -0
  18. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/cli/utils.py +0 -0
  19. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/cli/worker.py +0 -0
  20. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/constants.py +0 -0
  21. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/exceptions.py +0 -0
  22. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/interceptors.py +0 -0
  23. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/objects.py +0 -0
  24. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/task_client.py +0 -0
  25. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/template.py +0 -0
  26. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/types_.py +0 -0
  27. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/utils.py +0 -0
  28. {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datashare-python
3
- Version: 0.7.3
3
+ Version: 0.8.2
4
4
  Summary: Manage Python tasks and local resources in Datashare
5
5
  Project-URL: Homepage, https://icij.github.io/datashare-python/
6
6
  Project-URL: Documentation, https://icij.github.io/datashare-python/
@@ -1,3 +1,4 @@
1
+ from enum import StrEnum
1
2
  from pathlib import Path
2
3
  from typing import Literal
3
4
 
@@ -78,13 +79,21 @@ class TemporalClientConfig(BaseModel):
78
79
  LogLevel = Literal["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]
79
80
 
80
81
 
82
+ class LogFormat(StrEnum):
83
+ JSON = "json"
84
+ LOGFMT = "logfmt"
85
+ DEFAULT = "default"
86
+
87
+
81
88
  class LoggingConfig(BaseModel):
82
- log_in_json: bool = False
89
+ format: LogFormat = LogFormat.DEFAULT
83
90
  loggers: dict[str, LogLevel]
84
91
 
85
92
 
86
93
  _DEFAULT_LOGGERS = {datashare_python.__name__: "INFO"}
87
- _DEFAULT_LOGGING_CONFIG = LoggingConfig(log_in_json=True, loggers=_DEFAULT_LOGGERS)
94
+ _DEFAULT_LOGGING_CONFIG = LoggingConfig(
95
+ format=LogFormat.DEFAULT, loggers=_DEFAULT_LOGGERS
96
+ )
88
97
 
89
98
 
90
99
  class WorkerConfig(ICIJSettings, BaseModel):
@@ -13,6 +13,7 @@ from temporalio import workflow
13
13
 
14
14
  from datashare_python.config import (
15
15
  DatashareClientConfig,
16
+ LogFormat,
16
17
  LoggingConfig,
17
18
  TemporalClientConfig,
18
19
  WorkerConfig,
@@ -93,7 +94,7 @@ def event_loop(
93
94
  @pytest.fixture(scope="session")
94
95
  def test_worker_config() -> WorkerConfig:
95
96
  logging_config = LoggingConfig(
96
- log_in_json=False,
97
+ format=LogFormat.DEFAULT,
97
98
  loggers={
98
99
  "datashare_python": "DEBUG",
99
100
  "icij_common": "DEBUG",
@@ -40,7 +40,7 @@ def set_loggers(
40
40
  worker_config: WorkerConfig, worker_id: str, loggers: dict[str, LogLevel]
41
41
  ) -> None:
42
42
  setup_worker_loggers(
43
- loggers=loggers, worker_id=worker_id, in_json=worker_config.logging.log_in_json
43
+ loggers=loggers, worker_id=worker_id, log_format=worker_config.logging.format
44
44
  )
45
45
  logger.info("worker loggers ready to log 💬")
46
46
 
@@ -70,9 +70,8 @@ def discover(
70
70
  deps = []
71
71
  if deps_name is not None:
72
72
  deps = discover_dependencies(deps_name)
73
- for mandatory in _MANDATORY_DEPS:
74
- if mandatory not in deps:
75
- deps.append(mandatory)
73
+ missing = [m for m in _MANDATORY_DEPS if m not in deps]
74
+ deps = missing + deps
76
75
  if deps:
77
76
  n_deps = len(deps)
78
77
  discovered += "\n"
@@ -1,20 +1,39 @@
1
1
  import logging
2
+ import numbers
2
3
  import sys
3
4
  from copy import copy
5
+ from typing import Any
4
6
 
7
+ import orjson
5
8
  from icij_common.logging_utils import DATE_FMT, STREAM_HANDLER_FMT
6
- from pythonjsonlogger.core import RESERVED_ATTRS, BaseJsonFormatter
9
+ from pythonjsonlogger.core import BaseJsonFormatter
7
10
  from pythonjsonlogger.orjson import OrjsonFormatter
8
11
  from temporalio import activity, workflow
9
12
 
10
- from .config import LogLevel
13
+ from .config import LogFormat, LogLevel
11
14
  from .interceptors import get_trace_context
12
15
 
16
+ _BASE_ATTRS = [
17
+ "asctime",
18
+ "exc_info",
19
+ "filename",
20
+ "funcName",
21
+ "levelname",
22
+ "levelno",
23
+ "lineno",
24
+ "module",
25
+ "msecs",
26
+ "message",
27
+ "msg",
28
+ "name",
29
+ "pathname",
30
+ ]
13
31
  _ACT_LOGGER_ATTRS = ["activity_type", "activity_id", "activity_run_id"]
14
32
  _WF_LOGGED_ATTRS = ["workflow_type", "workflow_id", "workflow_run_id"]
15
33
  _TRACE_CONTEXT_ATTRS = ["trace_id", "parent_id", "traceparent"]
34
+
16
35
  _LOGGED_ATTRIBUTES = (
17
- copy(RESERVED_ATTRS)
36
+ copy(_BASE_ATTRS)
18
37
  + _WF_LOGGED_ATTRS
19
38
  + _ACT_LOGGER_ATTRS
20
39
  + _TRACE_CONTEXT_ATTRS
@@ -28,7 +47,7 @@ _STREAM_HANDLER_FMT_WITH_WORKER_ID = (
28
47
 
29
48
 
30
49
  def setup_worker_loggers(
31
- loggers: dict[str, LogLevel], *, worker_id: str | None, in_json: bool
50
+ loggers: dict[str, LogLevel], *, worker_id: str | None, log_format: LogFormat
32
51
  ) -> None:
33
52
  worker_filter = WorkerFilter(worker_id)
34
53
  for logger_name, level_str in loggers.items():
@@ -36,7 +55,9 @@ def setup_worker_loggers(
36
55
  logger = logging.getLogger(logger_name)
37
56
  logger.setLevel(level)
38
57
  logger.handlers = []
39
- for handler in _get_worker_handlers(level, worker_filter, in_json=in_json):
58
+ for handler in _get_worker_handlers(
59
+ level, worker_filter, log_format=log_format
60
+ ):
40
61
  logger.addHandler(handler)
41
62
 
42
63
 
@@ -50,8 +71,13 @@ class WorkerFilter(logging.Filter):
50
71
  record.worker_id = self.worker_id
51
72
  if workflow.in_workflow():
52
73
  wf_info = workflow.info()
74
+ wf_info = {
75
+ "workflow_run_id": wf_info.run_id,
76
+ "workflow_id": self.workflow_id,
77
+ "workflow_type": self.workflow_type,
78
+ }
53
79
  for attr in _WF_LOGGED_ATTRS:
54
- setattr(record, attr, getattr(wf_info, attr))
80
+ setattr(record, attr, wf_info[attr])
55
81
  if activity.in_activity():
56
82
  act_info = activity.info()
57
83
  for attr in _ACT_LOGGER_ATTRS:
@@ -64,23 +90,50 @@ class WorkerFilter(logging.Filter):
64
90
 
65
91
 
66
92
  def _get_worker_handlers(
67
- level: int, worker_filter: WorkerFilter, *, in_json: bool
93
+ level: int, worker_filter: WorkerFilter, *, log_format: LogFormat
68
94
  ) -> list[logging.Handler]:
69
95
  stream_handler = logging.StreamHandler(sys.stderr)
70
- if in_json:
71
- fmt = _json_formatter(datefmt=DATE_FMT)
72
- else:
73
- if worker_filter.worker_id is not None:
74
- fmt = _STREAM_HANDLER_FMT_WITH_WORKER_ID
75
- else:
76
- fmt = STREAM_HANDLER_FMT
77
- fmt = logging.Formatter(fmt, DATE_FMT)
96
+ match log_format:
97
+ case LogFormat.JSON:
98
+ fmt = _json_formatter(datefmt=DATE_FMT)
99
+ case LogFormat.LOGFMT:
100
+ fmt = LogFmtFormatter(datefmt=DATE_FMT)
101
+ case LogFormat.DEFAULT:
102
+ if worker_filter.worker_id is not None:
103
+ fmt = _STREAM_HANDLER_FMT_WITH_WORKER_ID
104
+ else:
105
+ fmt = STREAM_HANDLER_FMT
106
+ fmt = logging.Formatter(fmt, DATE_FMT)
107
+ case _:
108
+ raise NotImplementedError(f"invalid log format: {log_format}")
78
109
  stream_handler.setFormatter(fmt)
79
110
  stream_handler.setLevel(level)
80
111
  stream_handler.addFilter(worker_filter)
81
112
  return [stream_handler]
82
113
 
83
114
 
115
+ class LogFmtFormatter(logging.Formatter):
116
+ def format(self, record: logging.LogRecord) -> str:
117
+ logged = dict()
118
+ if record.exc_info and not record.exc_text:
119
+ record.exc_text = self.formatException(record.exc_info)
120
+ logged["exc_info"] = record.exc_text
121
+ for k, v in record.__dict__.items():
122
+ if k in _LOGGED_ATTRIBUTES and k != "exc_info":
123
+ logged[k] = _encode_value(v)
124
+ return " ".join(f"{k}={v}" for k, v in sorted(logged.items()))
125
+
126
+
127
+ def _encode_value(value: Any) -> str:
128
+ if value is None:
129
+ return ""
130
+ if isinstance(value, bool):
131
+ return "true" if value else "false"
132
+ if isinstance(value, numbers.Number):
133
+ return str(value)
134
+ return orjson.dumps(value).decode()
135
+
136
+
84
137
  def _json_formatter(datefmt: str) -> BaseJsonFormatter:
85
138
  fmt = OrjsonFormatter( # let's keep logging as fast as possible
86
139
  _LOGGED_ATTRIBUTES, datefmt=datefmt
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "datashare-python"
3
- version = "0.7.3"
3
+ version = "0.8.2"
4
4
  description = "Manage Python tasks and local resources in Datashare"
5
5
  authors = [
6
6
  { name = "Clément Doumouro", email = "cdoumouro@icij.org" },