grants-shared 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- grants_shared/__init__.py +0 -0
- grants_shared/logs/__init__.py +31 -0
- grants_shared/logs/audit.py +129 -0
- grants_shared/logs/config.py +165 -0
- grants_shared/logs/decodelog.py +156 -0
- grants_shared/logs/flask_logger.py +268 -0
- grants_shared/logs/formatters.py +62 -0
- grants_shared/logs/pii.py +97 -0
- grants_shared/util/__init__.py +0 -0
- grants_shared/util/datetime_util.py +99 -0
- grants_shared/util/deploy_metadata.py +72 -0
- grants_shared/util/env_config.py +16 -0
- grants_shared/util/json_util.py +60 -0
- grants_shared/util/local.py +31 -0
- grants_shared-0.1.0.dist-info/METADATA +67 -0
- grants_shared-0.1.0.dist-info/RECORD +18 -0
- grants_shared-0.1.0.dist-info/WHEEL +5 -0
- grants_shared-0.1.0.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Module for initializing logging configuration for the application.
|
|
2
|
+
|
|
3
|
+
There are two formatters for the log messages: human-readable and JSON.
|
|
4
|
+
The formatter that is used is determined by the environment variable
|
|
5
|
+
LOG_FORMAT. If the environment variable is not set, the JSON formatter
|
|
6
|
+
is used by default. See grants_shared.logs.formatters for more information.
|
|
7
|
+
|
|
8
|
+
The logger also adds a PII mask filter to the root logger. See
|
|
9
|
+
grants_shared.logs.pii for more information.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
import grants_shared.logs
|
|
13
|
+
|
|
14
|
+
with grants_shared.logs.init("program name"):
|
|
15
|
+
...
|
|
16
|
+
|
|
17
|
+
Once the module has been initialized, the standard logging module can be
|
|
18
|
+
used to log messages:
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
import logging
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
logger.info("message")
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import grants_shared.logs.config as config
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def init(program_name: str) -> config.LoggingContext:
|
|
31
|
+
return config.LoggingContext(program_name)
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Application-level audit logging.
|
|
3
|
+
#
|
|
4
|
+
# See https://docs.python.org/3/library/audit_events.html
|
|
5
|
+
# https://docs.python.org/3/library/sys.html#sys.addaudithook
|
|
6
|
+
# https://www.python.org/dev/peps/pep-0578/
|
|
7
|
+
#
|
|
8
|
+
import collections
|
|
9
|
+
import logging
|
|
10
|
+
import sys
|
|
11
|
+
from collections.abc import Hashable, Sequence
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
AUDIT = 32
|
|
17
|
+
logging.addLevelName(AUDIT, "AUDIT")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def init() -> None:
|
|
21
|
+
"""Initialize the audit logging module to start
|
|
22
|
+
logging security audit events."""
|
|
23
|
+
sys.addaudithook(handle_audit_event)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def handle_audit_event(event_name: str, args: tuple[Any, ...]) -> None:
|
|
27
|
+
# Define events to log and the arguments to log for each event.
|
|
28
|
+
# For more information about these events and what they mean, see https://peps.python.org/pep-0578/#suggested-audit-hook-locations
|
|
29
|
+
# For the full list of auditable events, see https://docs.python.org/3/library/audit_events.html
|
|
30
|
+
# Define this variable locally so it can't be modified by other modules.
|
|
31
|
+
|
|
32
|
+
EVENTS_TO_LOG = {
|
|
33
|
+
# Detect dynamic execution of code objects. This only occurs for explicit
|
|
34
|
+
# calls, and is not raised for normal function invocation.
|
|
35
|
+
"exec": ("code_object",),
|
|
36
|
+
# Detect when a file is about to be opened. path and mode are the usual
|
|
37
|
+
# parameters to open if available, while flags is provided instead of
|
|
38
|
+
# mode in some cases.
|
|
39
|
+
"open": ("path", "mode", "flags"),
|
|
40
|
+
# Detect when a signal is sent to a process.
|
|
41
|
+
"os.kill": ("pid", "sig"),
|
|
42
|
+
# Detect when a file is renamed.
|
|
43
|
+
"os.rename": ("src", "dst", "src_dir_fd", "dst_dir_fd"),
|
|
44
|
+
# Detect when a subprocess is started.
|
|
45
|
+
"subprocess.Popen": ("executable", "args", "cwd", "_"),
|
|
46
|
+
# Detect access to network resources. The address is unmodified from the original call.
|
|
47
|
+
"socket.connect": ("socket", "address"),
|
|
48
|
+
"socket.getaddrinfo": ("host", "port", "family", "type", "protocol"),
|
|
49
|
+
# Detect when new audit hooks are being added.
|
|
50
|
+
"sys.addaudithook": (),
|
|
51
|
+
# Detects URL requests.
|
|
52
|
+
# Don't log data or headers because they may contain sensitive information.
|
|
53
|
+
"urllib.Request": ("url", "_", "_", "method"),
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if event_name not in EVENTS_TO_LOG:
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
arg_names = EVENTS_TO_LOG[event_name]
|
|
60
|
+
log_audit_event(event_name, args, arg_names)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# Set the audit hook to be traceable so that coverage module can track calls to it
|
|
64
|
+
# The coverage module relies on Python's trace hooks
|
|
65
|
+
# (See https://coverage.readthedocs.io/en/7.1.0/howitworks.html#execution)
|
|
66
|
+
# According to the docs for sys.addaudithook, the audit hook is only traced if the callable
|
|
67
|
+
# has a __cantrace__ member that is set to a true value.
|
|
68
|
+
# (See https://docs.python.org/3/library/sys.html#sys.addaudithook)
|
|
69
|
+
handle_audit_event.__cantrace__ = True # type: ignore
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def log_audit_event(event_name: str, args: Sequence[Any], arg_names: Sequence[str]) -> None:
|
|
73
|
+
"""Log a message but only log recently repeated messages at intervals."""
|
|
74
|
+
extra = {
|
|
75
|
+
f"audit.args.{arg_name}": arg
|
|
76
|
+
for arg_name, arg in zip(arg_names, args, strict=True)
|
|
77
|
+
if arg_name != "_"
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
key = (event_name, repr(args))
|
|
81
|
+
if key not in audit_message_count:
|
|
82
|
+
count = 1
|
|
83
|
+
else:
|
|
84
|
+
count = audit_message_count[key] + 1
|
|
85
|
+
audit_message_count[key] = count
|
|
86
|
+
|
|
87
|
+
if count > 100 and count % 100 != 0:
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
if count > 10 and count % 10 != 0:
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
extra["count"] = count
|
|
94
|
+
|
|
95
|
+
logger.log(AUDIT, event_name, extra=extra)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class LeastRecentlyUsedDict(collections.OrderedDict):
|
|
99
|
+
"""A dict with a maximum size, evicting the least recently written key when full.
|
|
100
|
+
|
|
101
|
+
Getting a key that is not present returns a default value of 0.
|
|
102
|
+
|
|
103
|
+
Setting a key marks it as most recently used and removes the oldest key if full.
|
|
104
|
+
|
|
105
|
+
May be useful for tracking the count of items where limited memory usage is needed even if
|
|
106
|
+
the set of items can be unlimited.
|
|
107
|
+
|
|
108
|
+
Based on the example at
|
|
109
|
+
https://docs.python.org/3/library/collections.html#ordereddict-examples-and-recipes
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(self, maxsize: int = 128, *args: Any, **kwargs: Any) -> None:
|
|
113
|
+
self.maxsize = maxsize
|
|
114
|
+
super().__init__(*args, **kwargs)
|
|
115
|
+
|
|
116
|
+
def __getitem__(self, key: Hashable) -> int:
|
|
117
|
+
if key in self:
|
|
118
|
+
return super().__getitem__(key)
|
|
119
|
+
return 0
|
|
120
|
+
|
|
121
|
+
def __setitem__(self, key: Hashable, value: int) -> None:
|
|
122
|
+
if key in self:
|
|
123
|
+
self.move_to_end(key)
|
|
124
|
+
super().__setitem__(key, value)
|
|
125
|
+
if self.maxsize < len(self):
|
|
126
|
+
self.popitem(last=False)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
audit_message_count = LeastRecentlyUsedDict()
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import platform
|
|
5
|
+
import pwd
|
|
6
|
+
import sys
|
|
7
|
+
from typing import Any, cast
|
|
8
|
+
|
|
9
|
+
from pydantic_settings import SettingsConfigDict
|
|
10
|
+
|
|
11
|
+
import grants_shared.logs.audit
|
|
12
|
+
import grants_shared.logs.formatters as formatters
|
|
13
|
+
import grants_shared.logs.pii as pii
|
|
14
|
+
from grants_shared.util.env_config import PydanticBaseEnvConfig
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
_original_argv = tuple(sys.argv)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class HumanReadableFormatterConfig(PydanticBaseEnvConfig):
|
|
22
|
+
message_width: int = formatters.HUMAN_READABLE_FORMATTER_DEFAULT_MESSAGE_WIDTH
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class LoggingConfig(PydanticBaseEnvConfig):
|
|
26
|
+
model_config = SettingsConfigDict(env_prefix="log_", env_nested_delimiter="__")
|
|
27
|
+
|
|
28
|
+
format: str = "json"
|
|
29
|
+
level: str = "INFO"
|
|
30
|
+
enable_audit: bool = False
|
|
31
|
+
human_readable_formatter: HumanReadableFormatterConfig = HumanReadableFormatterConfig()
|
|
32
|
+
|
|
33
|
+
# Specify logging_level_overrides formatted as "<logger>=<level>" like "newrelic=INFO,something.else=ERROR"
|
|
34
|
+
level_overrides: str | None = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class LoggingContext(contextlib.AbstractContextManager[None]):
|
|
38
|
+
"""
|
|
39
|
+
A context manager for handling setting up the logging stream.
|
|
40
|
+
|
|
41
|
+
To help facillitate being able to test logging, we need to be able
|
|
42
|
+
to easily create temporary output streams and then tear them down.
|
|
43
|
+
|
|
44
|
+
When this context manager is torn down, the stream handler created
|
|
45
|
+
with it will be removed.
|
|
46
|
+
|
|
47
|
+
For example:
|
|
48
|
+
```py
|
|
49
|
+
import logging
|
|
50
|
+
|
|
51
|
+
logger = logging.getLogger(__name__)
|
|
52
|
+
|
|
53
|
+
with LoggingContext("example_program_name"):
|
|
54
|
+
# This log message will go to stdout
|
|
55
|
+
logger.info("example log message")
|
|
56
|
+
|
|
57
|
+
# This log message won't go to stdout as the
|
|
58
|
+
# handler will have been removed
|
|
59
|
+
logger.info("example log message")
|
|
60
|
+
```
|
|
61
|
+
Note that any other handlers added to the root logger won't be affected
|
|
62
|
+
and calling this multiple times before exit would result in duplicate logs.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self, program_name: str) -> None:
|
|
66
|
+
self._configure_logging()
|
|
67
|
+
log_program_info(program_name)
|
|
68
|
+
|
|
69
|
+
def __enter__(self) -> None:
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
73
|
+
# Remove the console handler to stop logs from being sent to stdout
|
|
74
|
+
# This is useful in the test suite, since multiple tests may initialize
|
|
75
|
+
# separate duplicate handlers. This allows for easier cleanup for each
|
|
76
|
+
# of those tests.
|
|
77
|
+
logging.root.removeHandler(self.console_handler)
|
|
78
|
+
|
|
79
|
+
def _configure_logging(self) -> None:
|
|
80
|
+
"""Configure logging for the application.
|
|
81
|
+
|
|
82
|
+
Configures the root module logger to log to stdout.
|
|
83
|
+
Adds a PII mask filter to the root logger.
|
|
84
|
+
Also configures log levels third party packages.
|
|
85
|
+
"""
|
|
86
|
+
config = LoggingConfig()
|
|
87
|
+
|
|
88
|
+
# Loggers can be configured using config functions defined
|
|
89
|
+
# in logging.config or by directly making calls to the main API
|
|
90
|
+
# of the logging module (see https://docs.python.org/3/library/logging.config.html)
|
|
91
|
+
# We opt to use the main API using functions like `addHandler` which is
|
|
92
|
+
# non-destructive, i.e. it does not overwrite any existing handlers.
|
|
93
|
+
# In contrast, logging.config.dictConfig() would overwrite any existing loggers.
|
|
94
|
+
# This is important during testing, since fixtures like `caplog` add handlers that would
|
|
95
|
+
# get overwritten if we call logging.config.dictConfig() during the scope of the test.
|
|
96
|
+
self.console_handler = logging.StreamHandler(sys.stdout)
|
|
97
|
+
formatter = get_formatter(config)
|
|
98
|
+
self.console_handler.setFormatter(formatter)
|
|
99
|
+
self.console_handler.addFilter(pii.mask_pii)
|
|
100
|
+
logging.root.addHandler(self.console_handler)
|
|
101
|
+
logging.root.setLevel(config.level)
|
|
102
|
+
|
|
103
|
+
if config.enable_audit:
|
|
104
|
+
grants_shared.logs.audit.init()
|
|
105
|
+
|
|
106
|
+
# Configure loggers for third party packages
|
|
107
|
+
logging.getLogger("alembic").setLevel(logging.INFO)
|
|
108
|
+
logging.getLogger("werkzeug").setLevel(logging.WARN)
|
|
109
|
+
logging.getLogger("sqlalchemy.pool").setLevel(logging.INFO)
|
|
110
|
+
logging.getLogger("sqlalchemy.dialects.postgresql").setLevel(logging.INFO)
|
|
111
|
+
|
|
112
|
+
# Allow an env var to override logging config, mostly for development purposes
|
|
113
|
+
# Parsing string formatted like "logger1=INFO,logger2=ERROR"
|
|
114
|
+
if config.level_overrides is not None:
|
|
115
|
+
for override in config.level_overrides.split(","):
|
|
116
|
+
logger_override, level_override = override.split("=")
|
|
117
|
+
logging.getLogger(logger_override).setLevel(level_override)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def get_formatter(config: LoggingConfig) -> logging.Formatter:
|
|
121
|
+
"""Return the formatter used by the root logger.
|
|
122
|
+
|
|
123
|
+
The formatter is determined by the environment variable LOG_FORMAT. If the
|
|
124
|
+
environment variable is not set, the JSON formatter is used by default.
|
|
125
|
+
"""
|
|
126
|
+
if config.format == "human-readable":
|
|
127
|
+
return get_human_readable_formatter(config.human_readable_formatter)
|
|
128
|
+
return formatters.JsonFormatter()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def log_program_info(program_name: str) -> None:
|
|
132
|
+
logger.info(
|
|
133
|
+
"start %s: %s %s %s, hostname %s, pid %i, user %i(%s)",
|
|
134
|
+
program_name,
|
|
135
|
+
platform.python_implementation(),
|
|
136
|
+
platform.python_version(),
|
|
137
|
+
platform.system(),
|
|
138
|
+
platform.node(),
|
|
139
|
+
os.getpid(),
|
|
140
|
+
os.getuid(),
|
|
141
|
+
pwd.getpwuid(os.getuid()).pw_name,
|
|
142
|
+
extra={
|
|
143
|
+
"hostname": platform.node(),
|
|
144
|
+
"cpu_count": os.cpu_count(),
|
|
145
|
+
# If mypy is run on a mac, it will throw a module has no attribute error, even though
|
|
146
|
+
# we never actually access it with the conditional.
|
|
147
|
+
#
|
|
148
|
+
# However, we can't just silence this error, because on linux (e.g. CI/CD) that will
|
|
149
|
+
# throw an unused “type: ignore” comment error. Casting to Any instead ensures this
|
|
150
|
+
# passes regardless of where mypy is being run
|
|
151
|
+
"cpu_usable": (
|
|
152
|
+
len(cast(Any, os).sched_getaffinity(0))
|
|
153
|
+
if "sched_getaffinity" in dir(os)
|
|
154
|
+
else "unknown"
|
|
155
|
+
),
|
|
156
|
+
},
|
|
157
|
+
)
|
|
158
|
+
logger.info("invoked as: %s", " ".join(_original_argv))
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def get_human_readable_formatter(
|
|
162
|
+
config: HumanReadableFormatterConfig,
|
|
163
|
+
) -> formatters.HumanReadableFormatter:
|
|
164
|
+
"""Return the human readable formatter used by the root logger."""
|
|
165
|
+
return formatters.HumanReadableFormatter(message_width=config.message_width)
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Make JSON logs easier to read when developing or troubleshooting.
|
|
3
|
+
#
|
|
4
|
+
# Expects JSON log lines or `docker-compose log` output on stdin and outputs plain text lines on
|
|
5
|
+
# stdout.
|
|
6
|
+
#
|
|
7
|
+
# This module intentionally has no dependencies outside the standard library so that it can be run
|
|
8
|
+
# as a script outside the virtual environment if needed.
|
|
9
|
+
#
|
|
10
|
+
# mypy: disallow-untyped-defs
|
|
11
|
+
|
|
12
|
+
import datetime
|
|
13
|
+
import json
|
|
14
|
+
import sys
|
|
15
|
+
from collections.abc import Mapping
|
|
16
|
+
|
|
17
|
+
RED = "\033[31m"
|
|
18
|
+
GREEN = "\033[32m"
|
|
19
|
+
BLUE = "\033[34m"
|
|
20
|
+
ORANGE = "\033[38;5;208m"
|
|
21
|
+
RESET = "\033[0m"
|
|
22
|
+
NO_COLOUR = ""
|
|
23
|
+
|
|
24
|
+
DEFAULT_MESSAGE_WIDTH = 50
|
|
25
|
+
|
|
26
|
+
output_dates = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def main() -> None:
|
|
30
|
+
"""Main entry point when used as a script."""
|
|
31
|
+
for line in sys.stdin:
|
|
32
|
+
processed = process_line(line)
|
|
33
|
+
if processed is not None:
|
|
34
|
+
sys.stdout.write(processed)
|
|
35
|
+
sys.stdout.write("\r\n")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def process_line(line: str) -> str | None:
|
|
39
|
+
"""Process a line of the log and return the reformatted line."""
|
|
40
|
+
line = line.rstrip()
|
|
41
|
+
if line and line[0] == "{":
|
|
42
|
+
# JSON format
|
|
43
|
+
return decode_json_line(line)
|
|
44
|
+
elif "| {" in line:
|
|
45
|
+
# `docker-compose logs ...` format
|
|
46
|
+
return decode_json_line(line[line.find("| {") + 2 :])
|
|
47
|
+
# Anything else is left alone
|
|
48
|
+
return line
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def decode_json_line(line: str) -> str | None:
|
|
52
|
+
"""Decode a JSON log line and return the reformatted line."""
|
|
53
|
+
try:
|
|
54
|
+
data = json.loads(line)
|
|
55
|
+
except json.decoder.JSONDecodeError:
|
|
56
|
+
return line
|
|
57
|
+
|
|
58
|
+
name = data.pop("name", "-")
|
|
59
|
+
level = data.pop("levelname", "-")
|
|
60
|
+
func_name = data.pop("funcName", "-")
|
|
61
|
+
created = datetime.datetime.fromtimestamp(
|
|
62
|
+
float(data.pop("created", 0)), tz=datetime.timezone.utc
|
|
63
|
+
)
|
|
64
|
+
message = data.pop("message", "-")
|
|
65
|
+
|
|
66
|
+
if level == "AUDIT":
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
return format_line(created, name, func_name, level, message, data)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def format_line(
|
|
73
|
+
created: datetime.datetime,
|
|
74
|
+
logger_name: str,
|
|
75
|
+
func_name: str,
|
|
76
|
+
level: str,
|
|
77
|
+
message: str,
|
|
78
|
+
extra: Mapping[str, str],
|
|
79
|
+
message_width: int = DEFAULT_MESSAGE_WIDTH,
|
|
80
|
+
) -> str:
|
|
81
|
+
"""Format log fields as a coloured string."""
|
|
82
|
+
logger_name_color = color_for_name(logger_name)
|
|
83
|
+
level_color = color_for_level(level)
|
|
84
|
+
return f"{format_datetime(created)} {colorize(logger_name.ljust(36), logger_name_color)} {func_name:<28} {colorize(level.ljust(8), level_color)} {colorize(message.ljust(message_width), level_color)} {colorize(format_extra(extra), BLUE)}"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def colorize(text: str, color: str) -> str:
|
|
88
|
+
return f"{color}{text}{RESET}"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def color_for_name(name: str) -> str:
|
|
92
|
+
if name.startswith("src"):
|
|
93
|
+
return GREEN
|
|
94
|
+
elif name.startswith("sqlalchemy"):
|
|
95
|
+
return ORANGE
|
|
96
|
+
return NO_COLOUR
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def color_for_level(level: str) -> str:
|
|
100
|
+
if level in ("WARNING", "ERROR", "CRITICAL"):
|
|
101
|
+
return RED
|
|
102
|
+
return NO_COLOUR
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def format_datetime(created: datetime.datetime) -> str:
|
|
106
|
+
global output_dates
|
|
107
|
+
if output_dates is None:
|
|
108
|
+
# Check first line - if over 10h ago, output dates as well as time.
|
|
109
|
+
output_dates = 36000 < (datetime.datetime.now() - created).total_seconds()
|
|
110
|
+
if output_dates:
|
|
111
|
+
return created.isoformat(timespec="milliseconds")
|
|
112
|
+
else:
|
|
113
|
+
return created.time().isoformat(timespec="milliseconds")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
EXCLUDE_EXTRA = {
|
|
117
|
+
"args",
|
|
118
|
+
"created",
|
|
119
|
+
"entity.guid",
|
|
120
|
+
"entity.name",
|
|
121
|
+
"entity.type",
|
|
122
|
+
"exc_info",
|
|
123
|
+
"filename",
|
|
124
|
+
"funcName",
|
|
125
|
+
"levelname",
|
|
126
|
+
"levelno",
|
|
127
|
+
"lineno",
|
|
128
|
+
"message",
|
|
129
|
+
"module",
|
|
130
|
+
"msecs",
|
|
131
|
+
"msg",
|
|
132
|
+
"name",
|
|
133
|
+
"pathname",
|
|
134
|
+
"process",
|
|
135
|
+
"processName",
|
|
136
|
+
"relativeCreated",
|
|
137
|
+
"span.id",
|
|
138
|
+
"thread",
|
|
139
|
+
"threadName",
|
|
140
|
+
"trace.id",
|
|
141
|
+
"traceId",
|
|
142
|
+
"deploy_github_ref",
|
|
143
|
+
"deploy_github_sha",
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def format_extra(data: Mapping[str, str]) -> str:
|
|
148
|
+
return " ".join(
|
|
149
|
+
"%s=%s" % (key, value)
|
|
150
|
+
for key, value in data.items()
|
|
151
|
+
if key not in EXCLUDE_EXTRA and value is not None
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
if __name__ == "__main__":
|
|
156
|
+
main()
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""Module for adding standard logging functionality to a Flask app.
|
|
2
|
+
|
|
3
|
+
This module configures an application's logger to add extra data
|
|
4
|
+
to all log messages. Flask application context data such as the
|
|
5
|
+
app name and request context data such as the request method, request url
|
|
6
|
+
rule, and query parameters are added to the log record.
|
|
7
|
+
|
|
8
|
+
This module also configures the Flask application to log every
|
|
9
|
+
non-404 request.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
import grants_shared.logs.flask_logger as flask_logger
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
app = create_app()
|
|
16
|
+
|
|
17
|
+
flask_logger.init_app(logger, app)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
import sys
|
|
23
|
+
import time
|
|
24
|
+
import uuid
|
|
25
|
+
|
|
26
|
+
import flask
|
|
27
|
+
import newrelic.api.time_trace
|
|
28
|
+
|
|
29
|
+
from grants_shared.util.deploy_metadata import get_deploy_metadata_config
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
EXTRA_LOG_DATA_ATTR = "extra_log_data"
|
|
33
|
+
|
|
34
|
+
_GLOBAL_LOG_CONTEXT: dict = {}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def init_general_logging(app_logger: logging.Logger, app_name: str, app_domain: str) -> None:
|
|
38
|
+
"""Initialize logging that doesn't depend on a Flask app
|
|
39
|
+
|
|
40
|
+
If possible, use init_app instead which is called when we
|
|
41
|
+
create a flask app, this is only necessary for scripts that
|
|
42
|
+
aren't possible to run via Flask like our Alembic migrations
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
# Need to add filters to each of the handlers rather than to the logger itself, since
|
|
46
|
+
# messages are passed directly to the ancestor loggers’ handlers bypassing any filters
|
|
47
|
+
# set on the ancestors.
|
|
48
|
+
# See https://docs.python.org/3/library/logging.html#logging.Logger.propagate
|
|
49
|
+
for handler in app_logger.handlers:
|
|
50
|
+
handler.addFilter(_add_global_context_info_to_log_record)
|
|
51
|
+
handler.addFilter(_add_request_context_info_to_log_record)
|
|
52
|
+
handler.addFilter(_add_new_relic_context_to_log_record)
|
|
53
|
+
handler.addFilter(_add_error_info_to_log_record)
|
|
54
|
+
|
|
55
|
+
deploy_metadata = get_deploy_metadata_config()
|
|
56
|
+
|
|
57
|
+
# Add some metadata to all log messages globally
|
|
58
|
+
add_extra_data_to_global_logs(
|
|
59
|
+
{
|
|
60
|
+
"app.name": app_name,
|
|
61
|
+
"app_name": "api",
|
|
62
|
+
"app_domain": app_domain,
|
|
63
|
+
"run_mode": get_run_mode(),
|
|
64
|
+
"environment": os.environ.get("ENVIRONMENT"),
|
|
65
|
+
"deploy_github_ref": deploy_metadata.deploy_github_ref,
|
|
66
|
+
"deploy_github_sha": deploy_metadata.deploy_github_sha,
|
|
67
|
+
"deploy_whoami": deploy_metadata.deploy_whoami,
|
|
68
|
+
}
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
app_logger.info("initialized flask logger")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def init_app(app_logger: logging.Logger, app: flask.Flask, app_domain: str) -> None:
|
|
75
|
+
"""Initialize the Flask app logger.
|
|
76
|
+
|
|
77
|
+
Adds Flask app context data and Flask request context data
|
|
78
|
+
to every log record using log filters.
|
|
79
|
+
See https://docs.python.org/3/howto/logging-cookbook.html#using-filters-to-impart-contextual-information
|
|
80
|
+
|
|
81
|
+
Also configures the app to log every non-404 request using the given logger.
|
|
82
|
+
|
|
83
|
+
Usage:
|
|
84
|
+
import grants_shared.logs.flask_logger as flask_logger
|
|
85
|
+
|
|
86
|
+
logger = logging.getLogger(__name__)
|
|
87
|
+
app = create_app()
|
|
88
|
+
|
|
89
|
+
flask_logger.init_app(logger, app)
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
# Add request context data to every log record for the current request
|
|
93
|
+
# such as request id, request method, request path, and the matching Flask request url rule
|
|
94
|
+
app.before_request(
|
|
95
|
+
lambda: add_extra_data_to_current_request_logs(_get_request_context_info(flask.request))
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
app.before_request(_track_request_start_time)
|
|
99
|
+
app.before_request(_log_start_request)
|
|
100
|
+
app.after_request(_log_end_request)
|
|
101
|
+
|
|
102
|
+
init_general_logging(app_logger, app.name, app_domain)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def add_extra_data_to_current_request_logs(
|
|
106
|
+
data: dict[str, str | int | float | bool | uuid.UUID | None],
|
|
107
|
+
) -> None:
|
|
108
|
+
"""Add data to every log record for the current request."""
|
|
109
|
+
if not flask.has_request_context():
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
extra_log_data = getattr(flask.g, EXTRA_LOG_DATA_ATTR, {})
|
|
113
|
+
extra_log_data.update(data)
|
|
114
|
+
setattr(flask.g, EXTRA_LOG_DATA_ATTR, extra_log_data)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def add_extra_data_to_global_logs(data: dict[str, str | int | float | bool | None]) -> None:
|
|
118
|
+
"""Add metadata to all logs for the rest of the lifecycle of this app process"""
|
|
119
|
+
global _GLOBAL_LOG_CONTEXT
|
|
120
|
+
_GLOBAL_LOG_CONTEXT.update(data)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _track_request_start_time() -> None:
|
|
124
|
+
"""Store the request start time in flask.g"""
|
|
125
|
+
flask.g.request_start_time = time.perf_counter()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _log_start_request() -> None:
|
|
129
|
+
"""Log the start of a request.
|
|
130
|
+
|
|
131
|
+
This function handles the Flask's before_request event.
|
|
132
|
+
See https://tedboy.github.io/flask/interface_src.application_object.html#flask.Flask.before_request
|
|
133
|
+
|
|
134
|
+
Additional info about the request will be in the `extra` field
|
|
135
|
+
added by `_add_request_context_info_to_log_record`
|
|
136
|
+
"""
|
|
137
|
+
logger.info("start request")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _log_end_request(response: flask.Response) -> flask.Response:
|
|
141
|
+
"""Log the end of a request.
|
|
142
|
+
|
|
143
|
+
This function handles the Flask's after_request event.
|
|
144
|
+
See https://tedboy.github.io/flask/interface_src.application_object.html#flask.Flask.after_request
|
|
145
|
+
|
|
146
|
+
Additional info about the request will be in the `extra` field
|
|
147
|
+
added by `_add_request_context_info_to_log_record`
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
logger.info(
|
|
151
|
+
"end request",
|
|
152
|
+
extra={
|
|
153
|
+
"response.status_code": response.status_code,
|
|
154
|
+
"response.content_length": response.content_length,
|
|
155
|
+
"response.content_type": response.content_type,
|
|
156
|
+
"response.mimetype": response.mimetype,
|
|
157
|
+
"response.time_ms": (time.perf_counter() - flask.g.request_start_time) * 1000,
|
|
158
|
+
},
|
|
159
|
+
)
|
|
160
|
+
return response
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _add_request_context_info_to_log_record(record: logging.LogRecord) -> bool:
|
|
164
|
+
"""Add request context data to the log record.
|
|
165
|
+
|
|
166
|
+
If there is no request context, then do not add any data.
|
|
167
|
+
"""
|
|
168
|
+
if not flask.has_request_context():
|
|
169
|
+
return True
|
|
170
|
+
|
|
171
|
+
if flask.request is None:
|
|
172
|
+
raise Exception("")
|
|
173
|
+
|
|
174
|
+
extra_log_data: dict[str, str] = getattr(flask.g, EXTRA_LOG_DATA_ATTR, {})
|
|
175
|
+
record.__dict__.update(extra_log_data)
|
|
176
|
+
|
|
177
|
+
return True
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _add_global_context_info_to_log_record(record: logging.LogRecord) -> bool:
|
|
181
|
+
global _GLOBAL_LOG_CONTEXT
|
|
182
|
+
record.__dict__ |= _GLOBAL_LOG_CONTEXT
|
|
183
|
+
|
|
184
|
+
return True
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _get_request_context_info(request: flask.Request) -> dict:
|
|
188
|
+
internal_request_id = str(uuid.uuid4())
|
|
189
|
+
flask.g.internal_request_id = internal_request_id
|
|
190
|
+
|
|
191
|
+
data = {
|
|
192
|
+
"request.id": request.headers.get("x-amzn-requestid", ""),
|
|
193
|
+
"request.method": request.method,
|
|
194
|
+
"request.path": request.path,
|
|
195
|
+
"request.url_rule": str(request.url_rule),
|
|
196
|
+
# This ID is used to group all logs for a given request
|
|
197
|
+
# and is returned in the API response for any 4xx/5xx scenarios
|
|
198
|
+
"request.internal_id": internal_request_id,
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
# Add query parameter data in the format request.query.<key> = <value>
|
|
202
|
+
# For example, the query string ?foo=bar&baz=qux would be added as
|
|
203
|
+
# request.query.foo = bar and request.query.baz = qux
|
|
204
|
+
# PII should be kept out of the URL, as URLs are logged in access logs.
|
|
205
|
+
# With that assumption, it is safe to log query parameters.
|
|
206
|
+
for key, value in request.args.items():
|
|
207
|
+
data[f"request.query.{key}"] = value
|
|
208
|
+
return data
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _add_new_relic_context_to_log_record(record: logging.LogRecord) -> bool:
|
|
212
|
+
"""Add New Relic tracing info to our log record."""
|
|
213
|
+
|
|
214
|
+
# This is not the recommended way of implementing this, but the alternatives
|
|
215
|
+
# either change the structure of our logging to not be JSON, or would
|
|
216
|
+
# entirely replace the formatter we have for outputting logs.
|
|
217
|
+
#
|
|
218
|
+
# The NewRelicContextFormatter calls this function internally when it
|
|
219
|
+
# creates the output object.
|
|
220
|
+
#
|
|
221
|
+
# This sets the following fields:
|
|
222
|
+
# entity.type
|
|
223
|
+
# entity.name
|
|
224
|
+
# entity.guid
|
|
225
|
+
# hostname
|
|
226
|
+
# span.id
|
|
227
|
+
# trace.id
|
|
228
|
+
newrelic_metadata = newrelic.api.time_trace.get_linking_metadata()
|
|
229
|
+
|
|
230
|
+
record.__dict__ |= newrelic_metadata
|
|
231
|
+
|
|
232
|
+
return True
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _add_error_info_to_log_record(record: logging.LogRecord) -> bool:
|
|
236
|
+
"""Add a shorter form of the error message to our log record."""
|
|
237
|
+
exc_info = getattr(record, "exc_info", None)
|
|
238
|
+
# exc_info is a 3-part tuple with the class, error obj, and traceback
|
|
239
|
+
if exc_info and len(exc_info) == 3:
|
|
240
|
+
# Add the exception class name to the logs, check that it
|
|
241
|
+
# is a class just in case there is some code path that sets this different.
|
|
242
|
+
if isinstance(exc_info[0], type):
|
|
243
|
+
record.__dict__["exc_info_cls"] = exc_info[0].__name__
|
|
244
|
+
# If the error were `raise ValueError("example")`, the
|
|
245
|
+
# value of this would be "ValueError('example')"
|
|
246
|
+
record.__dict__["exc_info_short"] = repr(exc_info[1])
|
|
247
|
+
|
|
248
|
+
return True
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def get_run_mode() -> str:
|
|
252
|
+
# We want to indicate whether the app was run as an API service
|
|
253
|
+
# or as a CLI - use the argv of the command we ran it with
|
|
254
|
+
# to determine that.
|
|
255
|
+
# CLI commands are always of the form "/path/to/flask <blueprint name> <task name> <commands>"
|
|
256
|
+
#
|
|
257
|
+
# The API service can be started either as
|
|
258
|
+
# "/path/to/flask --app src.app run ..." --> When run locally
|
|
259
|
+
# "/api/.venv/bin/gunicorn src.app:create_app()" --> When run non-locally
|
|
260
|
+
#
|
|
261
|
+
# So we check for pieces that only appear in the API commands
|
|
262
|
+
|
|
263
|
+
_original_argv = " ".join(sys.argv)
|
|
264
|
+
run_mode = "cli"
|
|
265
|
+
if "gunicorn" in _original_argv or "--app" in _original_argv:
|
|
266
|
+
run_mode = "service"
|
|
267
|
+
|
|
268
|
+
return run_mode
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Log formatters for the API.
|
|
2
|
+
|
|
3
|
+
This module defines two formatters, JsonFormatter for machine-readable logs to
|
|
4
|
+
be used in production, and HumanReadableFormatter for human readable logs to
|
|
5
|
+
be used used during development.
|
|
6
|
+
|
|
7
|
+
See https://docs.python.org/3/library/logging.html#formatter-objects
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
|
|
14
|
+
import grants_shared.logs.decodelog as decodelog
|
|
15
|
+
from grants_shared.util.json_util import json_encoder
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class JsonFormatter(logging.Formatter):
|
|
19
|
+
"""A logging formatter which formats each line as JSON."""
|
|
20
|
+
|
|
21
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
22
|
+
# logging.Formatter.format adds the `message` attribute to the LogRecord
|
|
23
|
+
# see https://github.com/python/cpython/blob/main/Lib/logging/__init__.py#L690-L720
|
|
24
|
+
super().format(record)
|
|
25
|
+
|
|
26
|
+
# New Relic automatically maps log messages of certain names over
|
|
27
|
+
# the "message" field in their system. This includes mapping "msg"
|
|
28
|
+
# which is the unformatted version of a log message. This means the
|
|
29
|
+
# formatted message doesn't make it into New Relic
|
|
30
|
+
# https://docs.newrelic.com/docs/logs/log-api/introduction-log-api/#json-logs
|
|
31
|
+
#
|
|
32
|
+
# To work around this, we copy the message field to a new field name
|
|
33
|
+
record.formatted_msg = getattr(record, "message", None)
|
|
34
|
+
|
|
35
|
+
return json.dumps(record.__dict__, separators=(",", ":"), default=json_encoder)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
HUMAN_READABLE_FORMATTER_DEFAULT_MESSAGE_WIDTH = decodelog.DEFAULT_MESSAGE_WIDTH
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class HumanReadableFormatter(logging.Formatter):
|
|
42
|
+
"""A logging formatter which formats each line
|
|
43
|
+
as color-code human readable text
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
message_width: int
|
|
47
|
+
|
|
48
|
+
def __init__(self, message_width: int = HUMAN_READABLE_FORMATTER_DEFAULT_MESSAGE_WIDTH):
|
|
49
|
+
super().__init__()
|
|
50
|
+
self.message_width = message_width
|
|
51
|
+
|
|
52
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
53
|
+
message = super().format(record)
|
|
54
|
+
return decodelog.format_line(
|
|
55
|
+
datetime.fromtimestamp(record.created),
|
|
56
|
+
record.name,
|
|
57
|
+
record.funcName,
|
|
58
|
+
record.levelname,
|
|
59
|
+
message,
|
|
60
|
+
record.__dict__,
|
|
61
|
+
message_width=self.message_width,
|
|
62
|
+
)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Mask PII from log records.
|
|
2
|
+
|
|
3
|
+
This module defines a filter that can be attached to a logger to mask PII
|
|
4
|
+
from log records. The filter is applied to all log records, and masks PII
|
|
5
|
+
that looks like social security numbers.
|
|
6
|
+
|
|
7
|
+
You can add the filter to a handler:
|
|
8
|
+
|
|
9
|
+
Example:
|
|
10
|
+
import logging
|
|
11
|
+
import grants_shared.logs.pii as pii
|
|
12
|
+
|
|
13
|
+
handler = logging.StreamHandler()
|
|
14
|
+
handler.addFilter(pii.mask_pii)
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
logger.addHandler(handler)
|
|
17
|
+
|
|
18
|
+
Or you can add the filter directly to a logger.
|
|
19
|
+
If adding the filter directly to a logger, take note that the filter
|
|
20
|
+
will not be called for child loggers.
|
|
21
|
+
See https://docs.python.org/3/library/logging.html#logging.Logger.propagate
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
import logging
|
|
25
|
+
import grants_shared.logs.pii as pii
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
logger.addFilter(pii.mask_pii)
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
import logging
|
|
32
|
+
import re
|
|
33
|
+
from typing import Any
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def mask_pii(record: logging.LogRecord) -> bool:
|
|
37
|
+
# Loop through all entries in the record's __dict__
|
|
38
|
+
# attribute and mask any things that look like PII.
|
|
39
|
+
# We will mask positional args separately below.
|
|
40
|
+
record.__dict__ |= {
|
|
41
|
+
key: _mask_pii_for_key(key, value)
|
|
42
|
+
for key, value in record.__dict__.items()
|
|
43
|
+
if key != "args" # Handle positional "args" separately
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# record.__dict__["args"] will contain positional arguments to logging calls.
|
|
47
|
+
# For example, a call like logger.info("%s %s", "foo", "bar") will result in a LogRecord
|
|
48
|
+
# with record.__dict__["args"] == ("foo", "bar")
|
|
49
|
+
# We want to mask the PII on each argument separately rather than trying to do a PII regex
|
|
50
|
+
# match on the entire args tuple.
|
|
51
|
+
args = record.__dict__["args"]
|
|
52
|
+
record.__dict__["args"] = tuple(map(_mask_pii, args))
|
|
53
|
+
return True
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# Regular expression to match a tax identifier (SSN), 9 digits with optional dashes.
|
|
57
|
+
# Matches between word boundaries (\b), except when:
|
|
58
|
+
# - Preceded by word character and dash (e.g. "ip-10-11-12-134")
|
|
59
|
+
# - Preceded by or followed by a decimal point (for floating point numbers)
|
|
60
|
+
TIN_RE = re.compile(
|
|
61
|
+
r"""
|
|
62
|
+
\b # word boundary
|
|
63
|
+
(?<!\w-) # not preceded by word character and dash
|
|
64
|
+
(?<!\.) # not preceded by decimal point
|
|
65
|
+
(\d-?){8} # digit then optional dash, 8 times
|
|
66
|
+
\d # last digit
|
|
67
|
+
\b # word boundary
|
|
68
|
+
(?!\.\d) # not followed by decimal point and digit (for decimal numbers)
|
|
69
|
+
""",
|
|
70
|
+
re.ASCII | re.VERBOSE,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
ALLOW_NO_MASK = {
|
|
74
|
+
"account_key",
|
|
75
|
+
"count",
|
|
76
|
+
"created",
|
|
77
|
+
"hostname",
|
|
78
|
+
"process",
|
|
79
|
+
"thread",
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _mask_pii_for_key(key: str, value: Any | None) -> Any | None:
|
|
84
|
+
"""
|
|
85
|
+
Mask the given value if it has the pattern of a tax identifier
|
|
86
|
+
unless its key is one of the allowed values to avoid masking
|
|
87
|
+
something that looks like an SSN but is known to be safe (like a timestamp)
|
|
88
|
+
"""
|
|
89
|
+
if key in ALLOW_NO_MASK:
|
|
90
|
+
return value
|
|
91
|
+
return _mask_pii(value)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _mask_pii(value: Any | None) -> Any | None:
|
|
95
|
+
if TIN_RE.search(str(value)):
|
|
96
|
+
return TIN_RE.sub("*********", str(value))
|
|
97
|
+
return value
|
|
File without changes
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import zoneinfo
|
|
3
|
+
from datetime import date, datetime, timezone
|
|
4
|
+
|
|
5
|
+
import pytz
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def utcnow() -> datetime:
|
|
9
|
+
"""Current time in UTC tagged with timezone info marking it as UTC, unlike datetime.utcnow().
|
|
10
|
+
|
|
11
|
+
See https://docs.python.org/3/library/datetime.html#datetime.datetime.utcnow
|
|
12
|
+
"""
|
|
13
|
+
return datetime.now(timezone.utc)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def adjust_timezone(timestamp: datetime, timezone_str: str) -> datetime:
|
|
17
|
+
"""
|
|
18
|
+
Utility method for converting a datetime object
|
|
19
|
+
between different timezones. The string passed in
|
|
20
|
+
can be anything recognized by the pytz library
|
|
21
|
+
|
|
22
|
+
Details on how to find all the potential timezone
|
|
23
|
+
names can be found in http://pytz.sourceforge.net/#helpers
|
|
24
|
+
but a few that are likely useful include:
|
|
25
|
+
* UTC
|
|
26
|
+
* US/Eastern
|
|
27
|
+
* US/Central
|
|
28
|
+
* US/Mountain
|
|
29
|
+
* US/Pacific
|
|
30
|
+
"""
|
|
31
|
+
new_timezone = pytz.timezone(timezone_str)
|
|
32
|
+
return timestamp.astimezone(new_timezone)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def make_timezone_aware(timestamp: datetime, timezone_str: str) -> datetime:
|
|
36
|
+
new_timezone = zoneinfo.ZoneInfo(timezone_str)
|
|
37
|
+
return timestamp.replace(tzinfo=new_timezone)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_now_us_eastern_datetime() -> datetime:
|
|
41
|
+
"""
|
|
42
|
+
Return the current time in the eastern time zone. DST is handled based on the local time.
|
|
43
|
+
For information on handling Daylight Savings Time, refer to this documentation on now() vs. utcnow():
|
|
44
|
+
http://pytz.sourceforge.net/#problems-with-localtime
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
# Note that this uses Eastern time (not UTC)
|
|
48
|
+
tz = pytz.timezone("America/New_York")
|
|
49
|
+
return datetime.now(tz)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_now_us_eastern_date() -> date:
|
|
53
|
+
# We get the datetime and truncate it to the date portion
|
|
54
|
+
# as there aren't any direct date methods that take in a timezone
|
|
55
|
+
return get_now_us_eastern_datetime().date()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def datetime_str_to_date(datetime_str: str | None) -> date | None:
|
|
59
|
+
if not datetime_str:
|
|
60
|
+
return None
|
|
61
|
+
return datetime.fromisoformat(datetime_str).date()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def parse_grants_gov_date(date_str: str | None) -> date | None:
|
|
65
|
+
"""
|
|
66
|
+
Parse a date string from grants.gov SOAP API response.
|
|
67
|
+
|
|
68
|
+
Grants.gov returns dates in formats like:
|
|
69
|
+
- "2025-09-16-04:00" (with timezone suffix)
|
|
70
|
+
- "2025-09-16" (standard ISO format)
|
|
71
|
+
|
|
72
|
+
This function strips any timezone suffix and returns a date object.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
date_str: Date string from grants.gov API
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
date object or None if date_str is None/empty
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
ValueError: If date_str cannot be parsed as a valid date
|
|
82
|
+
"""
|
|
83
|
+
if not date_str or not date_str.strip():
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
# Strip timezone suffix if present (e.g., "-04:00" or "+05:00")
|
|
87
|
+
# The pattern matches a hyphen or plus sign followed by HH:MM at the end of string
|
|
88
|
+
cleaned_date_str = re.sub(r"[+-]\d{2}:\d{2}$", "", date_str.strip())
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
# Parse the cleaned date string
|
|
92
|
+
return datetime.fromisoformat(cleaned_date_str).date()
|
|
93
|
+
except ValueError as e:
|
|
94
|
+
raise ValueError(f"Could not parse date string '{date_str}': {e}") from e
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def from_timestamp(timestamp: int) -> datetime:
|
|
98
|
+
"""Convert an epoch timestamp (in milliseconds) into a datetime object in timezone aware UTC."""
|
|
99
|
+
return datetime.fromtimestamp(timestamp / 1000.0, timezone.utc)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import typing
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from pydantic_settings import SettingsConfigDict
|
|
6
|
+
|
|
7
|
+
import grants_shared.util.datetime_util as datetime_util
|
|
8
|
+
from grants_shared.util.env_config import PydanticBaseEnvConfig
|
|
9
|
+
|
|
10
|
+
# We expect release notes to be formatted as:
|
|
11
|
+
# YYYY-MM-DD-#
|
|
12
|
+
# However we don't always put leading zeroes, so all of the following
|
|
13
|
+
# would be valid release versions:
|
|
14
|
+
# 2024.11.27-1
|
|
15
|
+
# 2024.11.5-1
|
|
16
|
+
# 2024.4.30-1
|
|
17
|
+
RELEASE_NOTE_REGEX = re.compile(
|
|
18
|
+
r"""
|
|
19
|
+
^[0-9]{4} # Exactly 4 leading digits
|
|
20
|
+
(?:\.[0-9]{1,2}) # Period followed by 1-2 digits
|
|
21
|
+
(?:\.[0-9]{1,2}) # Period followed by 1-2 digits
|
|
22
|
+
(?:\-[0-9]{1,2})$ # Ends with a dash and 1-2 digits
|
|
23
|
+
""",
|
|
24
|
+
re.ASCII | re.VERBOSE,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DeployMetadataConfig(PydanticBaseEnvConfig):
|
|
29
|
+
model_config = SettingsConfigDict(extra="allow")
|
|
30
|
+
|
|
31
|
+
# We don't want these values being None to break
|
|
32
|
+
# any of our system, so allow them to be None
|
|
33
|
+
deploy_github_ref: str | None = None # DEPLOY_GITHUB_REF
|
|
34
|
+
deploy_github_sha: str | None = None # DEPLOY_GITHUB_SHA
|
|
35
|
+
deploy_timestamp: datetime | None = None # DEPLOY_TIMESTAMP
|
|
36
|
+
deploy_whoami: str | None = None # DEPLOY_WHOAMI
|
|
37
|
+
|
|
38
|
+
def model_post_init(self, _context: typing.Any) -> None:
|
|
39
|
+
"""Run after __init__ sets above values from env vars"""
|
|
40
|
+
|
|
41
|
+
if self.deploy_github_ref and RELEASE_NOTE_REGEX.match(self.deploy_github_ref):
|
|
42
|
+
self.release_notes = (
|
|
43
|
+
f"https://github.com/HHS/simpler-grants-gov/releases/tag/{self.deploy_github_ref}"
|
|
44
|
+
)
|
|
45
|
+
else:
|
|
46
|
+
self.release_notes = "https://github.com/HHS/simpler-grants-gov/releases"
|
|
47
|
+
|
|
48
|
+
if self.deploy_github_sha:
|
|
49
|
+
self.deploy_commit = (
|
|
50
|
+
f"https://github.com/HHS/simpler-grants-gov/commit/{self.deploy_github_sha}"
|
|
51
|
+
)
|
|
52
|
+
else:
|
|
53
|
+
self.deploy_commit = "https://github.com/HHS/simpler-grants-gov"
|
|
54
|
+
|
|
55
|
+
if self.deploy_timestamp:
|
|
56
|
+
self.deploy_datetime_est = datetime_util.adjust_timezone(
|
|
57
|
+
self.deploy_timestamp, "US/Eastern"
|
|
58
|
+
)
|
|
59
|
+
else:
|
|
60
|
+
# Just put when the API started up as a fallback
|
|
61
|
+
self.deploy_datetime_est = datetime_util.get_now_us_eastern_datetime()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
_deploy_metadata_config: DeployMetadataConfig | None = None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_deploy_metadata_config() -> DeployMetadataConfig:
|
|
68
|
+
global _deploy_metadata_config
|
|
69
|
+
if _deploy_metadata_config is None:
|
|
70
|
+
_deploy_metadata_config = DeployMetadataConfig()
|
|
71
|
+
|
|
72
|
+
return _deploy_metadata_config
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
4
|
+
|
|
5
|
+
import grants_shared
|
|
6
|
+
|
|
7
|
+
# TODO - this wouldn't make sense when used as a package - fix?
|
|
8
|
+
env_file = os.path.join(
|
|
9
|
+
os.path.dirname(os.path.dirname(grants_shared.__file__)),
|
|
10
|
+
"config",
|
|
11
|
+
"%s.env" % os.getenv("ENVIRONMENT", "local"),
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PydanticBaseEnvConfig(BaseSettings):
|
|
16
|
+
model_config = SettingsConfigDict(env_file=env_file)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from datetime import date, datetime
|
|
3
|
+
from decimal import Decimal
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any
|
|
6
|
+
from uuid import UUID
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# identity returns an unmodified object
|
|
10
|
+
def identity[T](obj: T) -> T:
|
|
11
|
+
return obj
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Mapping of types to functions for conversion
|
|
15
|
+
# when writing logs to JSON
|
|
16
|
+
ENCODERS_BY_TYPE: dict[type[Any], Callable[[Any], Any]] = {
|
|
17
|
+
# JSONEncoder handles these properly already:
|
|
18
|
+
# https://docs.python.org/3/library/json.html#json.JSONEncoder
|
|
19
|
+
str: identity,
|
|
20
|
+
int: identity,
|
|
21
|
+
float: identity,
|
|
22
|
+
bool: identity,
|
|
23
|
+
list: identity,
|
|
24
|
+
datetime: lambda d: d.isoformat(),
|
|
25
|
+
date: lambda d: d.isoformat(),
|
|
26
|
+
Enum: lambda e: e.value,
|
|
27
|
+
set: lambda s: list(s),
|
|
28
|
+
# The fallback below would do these,
|
|
29
|
+
# but making it explicit that these
|
|
30
|
+
# types are supported for logging.
|
|
31
|
+
Decimal: str,
|
|
32
|
+
UUID: str,
|
|
33
|
+
Exception: str,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def json_encoder(obj: Any) -> Any:
|
|
38
|
+
"""
|
|
39
|
+
Handle conversion of various types when logs
|
|
40
|
+
are serialized into JSON. If not specified
|
|
41
|
+
will attempt to convert using str() on the object
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
_type = type(obj)
|
|
45
|
+
encode = ENCODERS_BY_TYPE.get(_type, str)
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
The recommended approach from the JSON docs
|
|
49
|
+
is to call the default method from JSONEncoder
|
|
50
|
+
to allow it to error anything not defined, we
|
|
51
|
+
choose not to do that as we want to give a best
|
|
52
|
+
effort for every value to be serialized for the logs
|
|
53
|
+
https://docs.python.org/3/library/json.html
|
|
54
|
+
|
|
55
|
+
If a field you are trying to log doesn't make sense
|
|
56
|
+
to format as a string then please add it above, but be
|
|
57
|
+
aware that the format needs to be parseable by whatever
|
|
58
|
+
tools you are using to ingest logs and metrics.
|
|
59
|
+
"""
|
|
60
|
+
return encode(obj)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from dotenv import load_dotenv
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_local_env_vars(env_file: str = "local.env") -> None:
|
|
10
|
+
"""
|
|
11
|
+
Load environment variables from the local.env so
|
|
12
|
+
that they can be fetched with `os.getenv()` or with
|
|
13
|
+
other utils that pull env vars.
|
|
14
|
+
|
|
15
|
+
https://pypi.org/project/python-dotenv/
|
|
16
|
+
|
|
17
|
+
NOTE: any existing env vars will not be overriden by this
|
|
18
|
+
"""
|
|
19
|
+
environment = os.getenv("ENVIRONMENT", None)
|
|
20
|
+
|
|
21
|
+
# If the environment is explicitly local or undefined
|
|
22
|
+
# we'll use the dotenv file, otherwise we'll skip
|
|
23
|
+
# Should never run if not local development
|
|
24
|
+
if environment is None or environment == "local":
|
|
25
|
+
load_dotenv(env_file)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def error_if_not_local() -> None:
|
|
29
|
+
if (env := os.getenv("ENVIRONMENT")) != "local":
|
|
30
|
+
logger.error("Environment %s is not local - cannot run operation", env)
|
|
31
|
+
raise Exception("Local-only process called when environment was set to non-local")
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: grants-shared
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Shared code used by the Simpler Grants.gov repo
|
|
5
|
+
Author-email: Nava Engineering <engineering@navapbc.com>
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
8
|
+
Requires-Python: <3.15,>=3.14
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: apiflask<4,>=3.1.0
|
|
11
|
+
Requires-Dist: marshmallow<4,>=3.20.1
|
|
12
|
+
Requires-Dist: pydantic<3,>=2.13.3
|
|
13
|
+
Requires-Dist: pydantic-settings<3,>=2.14.0
|
|
14
|
+
Requires-Dist: sqlalchemy[mypy]<3,>=2.0.49
|
|
15
|
+
Requires-Dist: psycopg[binary]<4,>=3.3.4
|
|
16
|
+
Requires-Dist: botocore<2,>=1.43.3
|
|
17
|
+
Requires-Dist: boto3<2,>=1.43.3
|
|
18
|
+
Requires-Dist: smart-open<8,>=7.6.0
|
|
19
|
+
Requires-Dist: pytz<2027,>=2026.2
|
|
20
|
+
Requires-Dist: pyjwt[crypto]<3,>=2.12.1
|
|
21
|
+
Requires-Dist: jsonschema[format-nongpl]<5,>=4.26.0
|
|
22
|
+
Requires-Dist: jsonpath-ng<2,>=1.8.0
|
|
23
|
+
Requires-Dist: jsonref<2,>=1.1.0
|
|
24
|
+
Requires-Dist: pandas<3,>=2.0.3
|
|
25
|
+
Requires-Dist: pandas-stubs<3,>=2.0.3
|
|
26
|
+
Requires-Dist: newrelic<13,>=12.1.0
|
|
27
|
+
Requires-Dist: python-dotenv<2,>=1.2.2
|
|
28
|
+
|
|
29
|
+
# Grants Shared
|
|
30
|
+
|
|
31
|
+
This repo exists to contain the shared code used by the backend
|
|
32
|
+
of simpler.grants.gov which is made up of multiple backend services.
|
|
33
|
+
|
|
34
|
+
This code is not meant to be used outside of the [Simpler Grants](https://github.com/HHS/simpler-grants-gov) system.
|
|
35
|
+
|
|
36
|
+
[License](https://github.com/HHS/simpler-grants-gov/blob/main/LICENSE.md)
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
TODO - this code isn't yet in PyPi, so this won't actually work yet.
|
|
40
|
+
Will update instructions more thoroughly once it is available.
|
|
41
|
+
|
|
42
|
+
```shell
|
|
43
|
+
# Using pip
|
|
44
|
+
pip install grants_shared
|
|
45
|
+
|
|
46
|
+
# Using poetry
|
|
47
|
+
poetry add grants_shared
|
|
48
|
+
|
|
49
|
+
# Using uv
|
|
50
|
+
uv add grants_shared
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Usage
|
|
54
|
+
Guidance on common commands and running the application will come in later
|
|
55
|
+
versions as we're still getting this setup, but a few basic commands to get you started.
|
|
56
|
+
|
|
57
|
+
```shell
|
|
58
|
+
# Build the docker image
|
|
59
|
+
make build
|
|
60
|
+
|
|
61
|
+
# Run tests
|
|
62
|
+
make test
|
|
63
|
+
|
|
64
|
+
# Formatting and linting
|
|
65
|
+
make format
|
|
66
|
+
make lint
|
|
67
|
+
```
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
grants_shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
grants_shared/logs/__init__.py,sha256=7lgGlUEwJpjyPl0TWkAi3PFxt6Wq4flIBVLue2E1bJM,914
|
|
3
|
+
grants_shared/logs/audit.py,sha256=gK8baMR5s4Q1iamnvMoTuZwR8aFRREcBHC5GOdB8CvA,4699
|
|
4
|
+
grants_shared/logs/config.py,sha256=Wd3x08SJEP05pfCXleBZ_ajDTxkJxKcV5cr8NOOgAog,6457
|
|
5
|
+
grants_shared/logs/decodelog.py,sha256=SMAqS2TyMtyDNMhhBQUPVMXtLo4KxosBhW4XD-nxrXk,3999
|
|
6
|
+
grants_shared/logs/flask_logger.py,sha256=Z9QDPilE_EEv_u4VbcYYmnggQsH_Se8-JNzp-r0nukg,9454
|
|
7
|
+
grants_shared/logs/formatters.py,sha256=0tGFFAekLO_nuYf_SLahEHvq-73SiKVBC4gVnY-SbUo,2231
|
|
8
|
+
grants_shared/logs/pii.py,sha256=5PAlWjNKQWUMkLPJiewRDbfgVLvFEv71Hg5H8XTtgFs,3131
|
|
9
|
+
grants_shared/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
grants_shared/util/datetime_util.py,sha256=h9o_w916oP1nJQTY4oTJqbxyYccpBeEEYI2xXuPNA9s,3229
|
|
11
|
+
grants_shared/util/deploy_metadata.py,sha256=l6CnVJZe9VSrUJI1_EhXgr_JGtcvuNV2sWQsqRnMnEg,2534
|
|
12
|
+
grants_shared/util/env_config.py,sha256=heyDp8bUj-lfTAtCOrEZOQd_x2prN20mUBpdeAyIHY8,416
|
|
13
|
+
grants_shared/util/json_util.py,sha256=01BpkldEQATDChIt5MQQgdDWjF38KtiwegmAPqsK34o,1760
|
|
14
|
+
grants_shared/util/local.py,sha256=n4uO2-oSH1C5xmuAHun777VJbW493rmujYOCL4B0uSk,982
|
|
15
|
+
grants_shared-0.1.0.dist-info/METADATA,sha256=H59-cQQe8ipflBw2gFBYZsRUAIIz9XzqNXYusY3WwYU,1934
|
|
16
|
+
grants_shared-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
17
|
+
grants_shared-0.1.0.dist-info/top_level.txt,sha256=5xACZ9ZOtI4Vg-LU8zhoCSc55ITPn3bFQ-36oIilRWM,14
|
|
18
|
+
grants_shared-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
grants_shared
|