grants-shared 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- grants_shared-0.1.0/PKG-INFO +67 -0
- grants_shared-0.1.0/README.md +39 -0
- grants_shared-0.1.0/pyproject.toml +228 -0
- grants_shared-0.1.0/setup.cfg +4 -0
- grants_shared-0.1.0/src/grants_shared/__init__.py +0 -0
- grants_shared-0.1.0/src/grants_shared/logs/__init__.py +31 -0
- grants_shared-0.1.0/src/grants_shared/logs/audit.py +129 -0
- grants_shared-0.1.0/src/grants_shared/logs/config.py +165 -0
- grants_shared-0.1.0/src/grants_shared/logs/decodelog.py +156 -0
- grants_shared-0.1.0/src/grants_shared/logs/flask_logger.py +268 -0
- grants_shared-0.1.0/src/grants_shared/logs/formatters.py +62 -0
- grants_shared-0.1.0/src/grants_shared/logs/pii.py +97 -0
- grants_shared-0.1.0/src/grants_shared/util/__init__.py +0 -0
- grants_shared-0.1.0/src/grants_shared/util/datetime_util.py +99 -0
- grants_shared-0.1.0/src/grants_shared/util/deploy_metadata.py +72 -0
- grants_shared-0.1.0/src/grants_shared/util/env_config.py +16 -0
- grants_shared-0.1.0/src/grants_shared/util/json_util.py +60 -0
- grants_shared-0.1.0/src/grants_shared/util/local.py +31 -0
- grants_shared-0.1.0/src/grants_shared.egg-info/PKG-INFO +67 -0
- grants_shared-0.1.0/src/grants_shared.egg-info/SOURCES.txt +21 -0
- grants_shared-0.1.0/src/grants_shared.egg-info/dependency_links.txt +1 -0
- grants_shared-0.1.0/src/grants_shared.egg-info/requires.txt +18 -0
- grants_shared-0.1.0/src/grants_shared.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: grants-shared
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Shared code used by the Simpler Grants.gov repo
|
|
5
|
+
Author-email: Nava Engineering <engineering@navapbc.com>
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
8
|
+
Requires-Python: <3.15,>=3.14
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: apiflask<4,>=3.1.0
|
|
11
|
+
Requires-Dist: marshmallow<4,>=3.20.1
|
|
12
|
+
Requires-Dist: pydantic<3,>=2.13.3
|
|
13
|
+
Requires-Dist: pydantic-settings<3,>=2.14.0
|
|
14
|
+
Requires-Dist: sqlalchemy[mypy]<3,>=2.0.49
|
|
15
|
+
Requires-Dist: psycopg[binary]<4,>=3.3.4
|
|
16
|
+
Requires-Dist: botocore<2,>=1.43.3
|
|
17
|
+
Requires-Dist: boto3<2,>=1.43.3
|
|
18
|
+
Requires-Dist: smart-open<8,>=7.6.0
|
|
19
|
+
Requires-Dist: pytz<2027,>=2026.2
|
|
20
|
+
Requires-Dist: pyjwt[crypto]<3,>=2.12.1
|
|
21
|
+
Requires-Dist: jsonschema[format-nongpl]<5,>=4.26.0
|
|
22
|
+
Requires-Dist: jsonpath-ng<2,>=1.8.0
|
|
23
|
+
Requires-Dist: jsonref<2,>=1.1.0
|
|
24
|
+
Requires-Dist: pandas<3,>=2.0.3
|
|
25
|
+
Requires-Dist: pandas-stubs<3,>=2.0.3
|
|
26
|
+
Requires-Dist: newrelic<13,>=12.1.0
|
|
27
|
+
Requires-Dist: python-dotenv<2,>=1.2.2
|
|
28
|
+
|
|
29
|
+
# Grants Shared
|
|
30
|
+
|
|
31
|
+
This repo exists to contain the shared code used by the backend
|
|
32
|
+
of simpler.grants.gov which is made up of multiple backend services.
|
|
33
|
+
|
|
34
|
+
This code is not meant to be used outside of the [Simpler Grants](https://github.com/HHS/simpler-grants-gov) system.
|
|
35
|
+
|
|
36
|
+
[License](https://github.com/HHS/simpler-grants-gov/blob/main/LICENSE.md)
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
TODO - this code isn't yet in PyPi, so this won't actually work yet.
|
|
40
|
+
Will update instructions more thoroughly once it is available.
|
|
41
|
+
|
|
42
|
+
```shell
|
|
43
|
+
# Using pip
|
|
44
|
+
pip install grants_shared
|
|
45
|
+
|
|
46
|
+
# Using poetry
|
|
47
|
+
poetry add grants_shared
|
|
48
|
+
|
|
49
|
+
# Using uv
|
|
50
|
+
uv add grants_shared
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Usage
|
|
54
|
+
Guidance on common commands and running the application will come in later
|
|
55
|
+
versions as we're still getting this setup, but a few basic commands to get you started.
|
|
56
|
+
|
|
57
|
+
```shell
|
|
58
|
+
# Build the docker image
|
|
59
|
+
make build
|
|
60
|
+
|
|
61
|
+
# Run tests
|
|
62
|
+
make test
|
|
63
|
+
|
|
64
|
+
# Formatting and linting
|
|
65
|
+
make format
|
|
66
|
+
make lint
|
|
67
|
+
```
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Grants Shared
|
|
2
|
+
|
|
3
|
+
This repo exists to contain the shared code used by the backend
|
|
4
|
+
of simpler.grants.gov which is made up of multiple backend services.
|
|
5
|
+
|
|
6
|
+
This code is not meant to be used outside of the [Simpler Grants](https://github.com/HHS/simpler-grants-gov) system.
|
|
7
|
+
|
|
8
|
+
[License](https://github.com/HHS/simpler-grants-gov/blob/main/LICENSE.md)
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
TODO - this code isn't yet in PyPi, so this won't actually work yet.
|
|
12
|
+
Will update instructions more thoroughly once it is available.
|
|
13
|
+
|
|
14
|
+
```shell
|
|
15
|
+
# Using pip
|
|
16
|
+
pip install grants_shared
|
|
17
|
+
|
|
18
|
+
# Using poetry
|
|
19
|
+
poetry add grants_shared
|
|
20
|
+
|
|
21
|
+
# Using uv
|
|
22
|
+
uv add grants_shared
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Usage
|
|
26
|
+
Guidance on common commands and running the application will come in later
|
|
27
|
+
versions as we're still getting this setup, but a few basic commands to get you started.
|
|
28
|
+
|
|
29
|
+
```shell
|
|
30
|
+
# Build the docker image
|
|
31
|
+
make build
|
|
32
|
+
|
|
33
|
+
# Run tests
|
|
34
|
+
make test
|
|
35
|
+
|
|
36
|
+
# Formatting and linting
|
|
37
|
+
make format
|
|
38
|
+
make lint
|
|
39
|
+
```
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "grants-shared"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Shared code used by the Simpler Grants.gov repo"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [{ name = "Nava Engineering", email = "engineering@navapbc.com" }]
|
|
7
|
+
requires-python = ">=3.14,<3.15"
|
|
8
|
+
classifiers = [
|
|
9
|
+
"Programming Language :: Python :: 3",
|
|
10
|
+
"Programming Language :: Python :: 3.14",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
dependencies = [
|
|
14
|
+
"apiflask>=3.1.0,<4",
|
|
15
|
+
"marshmallow>=3.20.1,<4",
|
|
16
|
+
"pydantic>=2.13.3,<3",
|
|
17
|
+
"pydantic-settings>=2.14.0,<3",
|
|
18
|
+
"sqlalchemy[mypy]>=2.0.49,<3",
|
|
19
|
+
"psycopg[binary]>=3.3.4,<4",
|
|
20
|
+
"botocore>=1.43.3,<2",
|
|
21
|
+
"boto3>=1.43.3,<2",
|
|
22
|
+
"smart-open>=7.6.0,<8",
|
|
23
|
+
"pytz>=2026.2,<2027",
|
|
24
|
+
"pyjwt[crypto]>=2.12.1,<3",
|
|
25
|
+
"jsonschema[format-nongpl]>=4.26.0,<5",
|
|
26
|
+
"jsonpath-ng>=1.8.0,<2",
|
|
27
|
+
"jsonref>=1.1.0,<2",
|
|
28
|
+
"pandas>=2.0.3,<3",
|
|
29
|
+
"pandas-stubs>=2.0.3,<3",
|
|
30
|
+
"newrelic>=12.1.0,<13",
|
|
31
|
+
"python-dotenv>=1.2.2,<2",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[dependency-groups]
|
|
35
|
+
dev = [
|
|
36
|
+
"black>=26.3.1,<27",
|
|
37
|
+
"isort>=8.0.1,<9",
|
|
38
|
+
"moto[s3]>=5.2.0,<6",
|
|
39
|
+
"mypy>=1.20.2,<2",
|
|
40
|
+
"coverage>=7.13.5,<8",
|
|
41
|
+
"faker>=40.15.0,<41",
|
|
42
|
+
"factory-boy>=3.3.3,<4",
|
|
43
|
+
"bandit>=1.9.4,<2",
|
|
44
|
+
"pytest>=9.0.3,<10",
|
|
45
|
+
"ruff>=0.15.12,<16",
|
|
46
|
+
"freezegun>=1.5.5,<2",
|
|
47
|
+
"debugpy>=1.8.20,<2",
|
|
48
|
+
"types-requests>=2.33.0.20260503",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
[tool.hatch.build.targets.sdist]
|
|
53
|
+
include = ["src"]
|
|
54
|
+
|
|
55
|
+
[tool.hatch.build.targets.wheel]
|
|
56
|
+
packages = ["src/grants_shared"]
|
|
57
|
+
|
|
58
|
+
[tool.black]
|
|
59
|
+
line-length = 100
|
|
60
|
+
|
|
61
|
+
[tool.isort]
|
|
62
|
+
multi_line_output = 3
|
|
63
|
+
include_trailing_comma = true
|
|
64
|
+
force_grid_wrap = 0
|
|
65
|
+
use_parentheses = true
|
|
66
|
+
line_length = 100
|
|
67
|
+
|
|
68
|
+
[tool.ruff]
|
|
69
|
+
line-length = 100
|
|
70
|
+
# Some rules are considered preview-only, this allows them
|
|
71
|
+
# assuming we enabled them below
|
|
72
|
+
preview = true
|
|
73
|
+
|
|
74
|
+
target-version = "py314"
|
|
75
|
+
|
|
76
|
+
[tool.ruff.lint]
|
|
77
|
+
# See: https://docs.astral.sh/ruff/rules/ for all possible rules
|
|
78
|
+
select = [
|
|
79
|
+
"B", # https://docs.astral.sh/ruff/rules/#flake8-bugbear-b
|
|
80
|
+
"C",
|
|
81
|
+
"E", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w
|
|
82
|
+
"F", # https://docs.astral.sh/ruff/rules/#pyflakes-f
|
|
83
|
+
"W", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w
|
|
84
|
+
"UP", # https://docs.astral.sh/ruff/rules/#pyupgrade-up
|
|
85
|
+
"RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
|
|
86
|
+
"PT", # https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt
|
|
87
|
+
"TID251", # https://docs.astral.sh/ruff/rules/#flake8-tidy-imports-tid
|
|
88
|
+
"T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20
|
|
89
|
+
]
|
|
90
|
+
ignore = [
|
|
91
|
+
# too many leading '#' for block comment, we can format our comments however we want
|
|
92
|
+
"E266",
|
|
93
|
+
# Ignore line-too-long errors, assume the formatter handles that appropriately
|
|
94
|
+
"E501",
|
|
95
|
+
# Ignore rules regarding unecessary list / generator usage which complains about [e for e in MyEnum] #
|
|
96
|
+
"C4",
|
|
97
|
+
# Ignore rule that flags functions with many branches - sometimes we just have a lot of
|
|
98
|
+
# business rules that make sense to aggregate in one place.
|
|
99
|
+
"C901",
|
|
100
|
+
# Ruff suggests not doing .encode("utf-8") and leaving utf-8 (the default out),
|
|
101
|
+
# but nothing wrong with being explicit and clear
|
|
102
|
+
"UP012",
|
|
103
|
+
# Ruff suggests using datetime.UTC over datetime.timezone.utc - but other timezones
|
|
104
|
+
# would need to be specified the latter way, seems odd to intentionally be inconsistent
|
|
105
|
+
"UP017",
|
|
106
|
+
# Ruff suggests using f-strings over .format - this one is a good recommendation
|
|
107
|
+
# we just have a few too many to refactor at this time.
|
|
108
|
+
"UP031",
|
|
109
|
+
# Ruff doesn't like array concatenation like [1, 2, 3] + [4, 5, 6], but it's intuitive where used.
|
|
110
|
+
"RUF005",
|
|
111
|
+
# Ruff doesn't like str() in f-strings, but instead
|
|
112
|
+
# recommends conversion flags (eg. f"{a!r}") which is less well known
|
|
113
|
+
"RUF010",
|
|
114
|
+
# Ruff thinks our SQLAlchemy models and factories have ClassVars
|
|
115
|
+
# but those classes don't use those variables in the problematic way it wants to avoid
|
|
116
|
+
"RUF012",
|
|
117
|
+
# Ruff wants __all__ sorted, not against it, but we'd want our
|
|
118
|
+
# formatter to handle that first to avoid it being tedious work
|
|
119
|
+
"RUF022",
|
|
120
|
+
# Ruff doesn't like when you use variables with _ prefixes in functions
|
|
121
|
+
# saying to avoid shadowing other params to do my_var_, but while
|
|
122
|
+
# that follows PEP8 formatting, it's not generally what I've seen
|
|
123
|
+
"RUF052",
|
|
124
|
+
# Ruff doesn't want any code in __init__ files, but some of our
|
|
125
|
+
# libraries and patterns need a very small amount of code configured
|
|
126
|
+
# in these files.
|
|
127
|
+
"RUF067",
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
# These are characters we are allowing to be confusing
|
|
131
|
+
# for RUF001 which recommends not using certain characters
|
|
132
|
+
# that look like each other.
|
|
133
|
+
# https://docs.astral.sh/ruff/rules/ambiguous-unicode-character-string/#ambiguous-unicode-character-string-ruf001
|
|
134
|
+
allowed-confusables = [
|
|
135
|
+
# endash (not a regular dash) - we need this in our email formatting
|
|
136
|
+
"–",
|
|
137
|
+
# right quotation mark (not a simple "'") - used in email formatting
|
|
138
|
+
"’"
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
[tool.ruff.lint.per-file-ignores]
|
|
142
|
+
# These are rules that are excluded from just our unit tests
|
|
143
|
+
# but still run for the rest of our code.
|
|
144
|
+
"tests/*" = [
|
|
145
|
+
# Ruff suggests changing how iterables are merged, but it complicates test setup
|
|
146
|
+
"RUF005",
|
|
147
|
+
# Ruff wants to avoid "arg: int = None" - we shouldn't do this
|
|
148
|
+
# but our tests are only partially typed
|
|
149
|
+
"RUF013",
|
|
150
|
+
# Ruff suggests making any match statement in pytest.raises
|
|
151
|
+
# a proper regex pattern, but we just use it for finding strings in most cases
|
|
152
|
+
"RUF043",
|
|
153
|
+
# Ruff doesn't like "x, y = get_tuple()" and not using one of the values
|
|
154
|
+
# but the value might be used by a developer doing debugging
|
|
155
|
+
"RUF059",
|
|
156
|
+
# Ruff recommends the first parameter of a parametrized test be a tuple
|
|
157
|
+
# We have a lot of tests that don't do that, can circle back to this later
|
|
158
|
+
"PT006",
|
|
159
|
+
# Ruff recommends having pytest.raises() be a specific error
|
|
160
|
+
# but sometimes we just want to verify an error was raised at all
|
|
161
|
+
"PT011",
|
|
162
|
+
# Ruff recommends not doing "assert x == 1 and y == 2" in tests
|
|
163
|
+
# but the few places we do this are kept simple and help better organize complex scenarios
|
|
164
|
+
"PT018"
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
[tool.ruff.lint.flake8-tidy-imports.banned-api]
|
|
168
|
+
# For these libraries, we do use them, but have our own derived versions we want
|
|
169
|
+
# to use instead as we've rewritten fundamental components like error messaging.
|
|
170
|
+
"marshmallow.validate".msg = "Do not import marshmallow.validate directly - instead use grants_shared.api.schemas.extension.validators as we've rewritten the error messaging"
|
|
171
|
+
"apiflask.validators".msg = "Do not import apiflask.validators directly - instead use grants_shared.api.schemas.extension.validators as we've rewritten the error messaging"
|
|
172
|
+
"apiflask.fields".msg = "Do not import apiflask.fields directly - instead use grants_shared.api.schemas.extension.fields as we've rewritten the error messaging"
|
|
173
|
+
"apiflask.Schema".msg = "Do not import apiflask.Schema directly - instead use grants_shared.api.schemas.extension"
|
|
174
|
+
|
|
175
|
+
[tool.mypy]
|
|
176
|
+
# https://mypy.readthedocs.io/en/stable/config_file.html
|
|
177
|
+
color_output = true
|
|
178
|
+
error_summary = true
|
|
179
|
+
pretty = true
|
|
180
|
+
show_error_codes = true
|
|
181
|
+
show_column_numbers = true
|
|
182
|
+
show_error_context = true
|
|
183
|
+
|
|
184
|
+
namespace_packages = true
|
|
185
|
+
ignore_missing_imports = true
|
|
186
|
+
warn_unused_configs = true
|
|
187
|
+
|
|
188
|
+
check_untyped_defs = true
|
|
189
|
+
disallow_incomplete_defs = true
|
|
190
|
+
disallow_untyped_defs = true
|
|
191
|
+
no_implicit_optional = true
|
|
192
|
+
strict_equality = true
|
|
193
|
+
warn_no_return = true
|
|
194
|
+
warn_redundant_casts = true
|
|
195
|
+
warn_unreachable = true
|
|
196
|
+
warn_unused_ignores = true
|
|
197
|
+
|
|
198
|
+
plugins = ["pydantic.mypy"]
|
|
199
|
+
|
|
200
|
+
[tool.bandit]
|
|
201
|
+
# Ignore audit logging test file since test audit logging requires a lot of operations that trigger bandit warnings
|
|
202
|
+
exclude_dirs = ["./tests/grants_shared/logs/test_audit.py"]
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
[tool.pytest.ini_options]
|
|
206
|
+
testpaths = ["tests"]
|
|
207
|
+
pythonpath = ["src"]
|
|
208
|
+
|
|
209
|
+
markers = [
|
|
210
|
+
"audit: mark a test as a security audit log test, to be run isolated from other tests",
|
|
211
|
+
]
|
|
212
|
+
|
|
213
|
+
[tool.coverage.run]
|
|
214
|
+
omit = [
|
|
215
|
+
# Decodelog is only used for formatting logs locally
|
|
216
|
+
"src/grants_shared/logs/decodelog.py"
|
|
217
|
+
]
|
|
218
|
+
|
|
219
|
+
[tool.coverage.report]
|
|
220
|
+
fail_under = 80
|
|
221
|
+
|
|
222
|
+
exclude_lines = [
|
|
223
|
+
# Exclude abstract & overloaad methods from
|
|
224
|
+
# code coverage reports as they won't ever directly run
|
|
225
|
+
"@abc.abstractmethod",
|
|
226
|
+
"@abstractmethod",
|
|
227
|
+
"@typing.overload",
|
|
228
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Module for initializing logging configuration for the application.
|
|
2
|
+
|
|
3
|
+
There are two formatters for the log messages: human-readable and JSON.
|
|
4
|
+
The formatter that is used is determined by the environment variable
|
|
5
|
+
LOG_FORMAT. If the environment variable is not set, the JSON formatter
|
|
6
|
+
is used by default. See grants_shared.logs.formatters for more information.
|
|
7
|
+
|
|
8
|
+
The logger also adds a PII mask filter to the root logger. See
|
|
9
|
+
grants_shared.logs.pii for more information.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
import grants_shared.logs
|
|
13
|
+
|
|
14
|
+
with grants_shared.logs.init("program name"):
|
|
15
|
+
...
|
|
16
|
+
|
|
17
|
+
Once the module has been initialized, the standard logging module can be
|
|
18
|
+
used to log messages:
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
import logging
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
logger.info("message")
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import grants_shared.logs.config as config
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def init(program_name: str) -> config.LoggingContext:
|
|
31
|
+
return config.LoggingContext(program_name)
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Application-level audit logging.
|
|
3
|
+
#
|
|
4
|
+
# See https://docs.python.org/3/library/audit_events.html
|
|
5
|
+
# https://docs.python.org/3/library/sys.html#sys.addaudithook
|
|
6
|
+
# https://www.python.org/dev/peps/pep-0578/
|
|
7
|
+
#
|
|
8
|
+
import collections
|
|
9
|
+
import logging
|
|
10
|
+
import sys
|
|
11
|
+
from collections.abc import Hashable, Sequence
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
AUDIT = 32
|
|
17
|
+
logging.addLevelName(AUDIT, "AUDIT")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def init() -> None:
|
|
21
|
+
"""Initialize the audit logging module to start
|
|
22
|
+
logging security audit events."""
|
|
23
|
+
sys.addaudithook(handle_audit_event)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def handle_audit_event(event_name: str, args: tuple[Any, ...]) -> None:
|
|
27
|
+
# Define events to log and the arguments to log for each event.
|
|
28
|
+
# For more information about these events and what they mean, see https://peps.python.org/pep-0578/#suggested-audit-hook-locations
|
|
29
|
+
# For the full list of auditable events, see https://docs.python.org/3/library/audit_events.html
|
|
30
|
+
# Define this variable locally so it can't be modified by other modules.
|
|
31
|
+
|
|
32
|
+
EVENTS_TO_LOG = {
|
|
33
|
+
# Detect dynamic execution of code objects. This only occurs for explicit
|
|
34
|
+
# calls, and is not raised for normal function invocation.
|
|
35
|
+
"exec": ("code_object",),
|
|
36
|
+
# Detect when a file is about to be opened. path and mode are the usual
|
|
37
|
+
# parameters to open if available, while flags is provided instead of
|
|
38
|
+
# mode in some cases.
|
|
39
|
+
"open": ("path", "mode", "flags"),
|
|
40
|
+
# Detect when a signal is sent to a process.
|
|
41
|
+
"os.kill": ("pid", "sig"),
|
|
42
|
+
# Detect when a file is renamed.
|
|
43
|
+
"os.rename": ("src", "dst", "src_dir_fd", "dst_dir_fd"),
|
|
44
|
+
# Detect when a subprocess is started.
|
|
45
|
+
"subprocess.Popen": ("executable", "args", "cwd", "_"),
|
|
46
|
+
# Detect access to network resources. The address is unmodified from the original call.
|
|
47
|
+
"socket.connect": ("socket", "address"),
|
|
48
|
+
"socket.getaddrinfo": ("host", "port", "family", "type", "protocol"),
|
|
49
|
+
# Detect when new audit hooks are being added.
|
|
50
|
+
"sys.addaudithook": (),
|
|
51
|
+
# Detects URL requests.
|
|
52
|
+
# Don't log data or headers because they may contain sensitive information.
|
|
53
|
+
"urllib.Request": ("url", "_", "_", "method"),
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if event_name not in EVENTS_TO_LOG:
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
arg_names = EVENTS_TO_LOG[event_name]
|
|
60
|
+
log_audit_event(event_name, args, arg_names)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# Set the audit hook to be traceable so that coverage module can track calls to it
|
|
64
|
+
# The coverage module relies on Python's trace hooks
|
|
65
|
+
# (See https://coverage.readthedocs.io/en/7.1.0/howitworks.html#execution)
|
|
66
|
+
# According to the docs for sys.addaudithook, the audit hook is only traced if the callable
|
|
67
|
+
# has a __cantrace__ member that is set to a true value.
|
|
68
|
+
# (See https://docs.python.org/3/library/sys.html#sys.addaudithook)
|
|
69
|
+
handle_audit_event.__cantrace__ = True # type: ignore
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def log_audit_event(event_name: str, args: Sequence[Any], arg_names: Sequence[str]) -> None:
|
|
73
|
+
"""Log a message but only log recently repeated messages at intervals."""
|
|
74
|
+
extra = {
|
|
75
|
+
f"audit.args.{arg_name}": arg
|
|
76
|
+
for arg_name, arg in zip(arg_names, args, strict=True)
|
|
77
|
+
if arg_name != "_"
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
key = (event_name, repr(args))
|
|
81
|
+
if key not in audit_message_count:
|
|
82
|
+
count = 1
|
|
83
|
+
else:
|
|
84
|
+
count = audit_message_count[key] + 1
|
|
85
|
+
audit_message_count[key] = count
|
|
86
|
+
|
|
87
|
+
if count > 100 and count % 100 != 0:
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
if count > 10 and count % 10 != 0:
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
extra["count"] = count
|
|
94
|
+
|
|
95
|
+
logger.log(AUDIT, event_name, extra=extra)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class LeastRecentlyUsedDict(collections.OrderedDict):
|
|
99
|
+
"""A dict with a maximum size, evicting the least recently written key when full.
|
|
100
|
+
|
|
101
|
+
Getting a key that is not present returns a default value of 0.
|
|
102
|
+
|
|
103
|
+
Setting a key marks it as most recently used and removes the oldest key if full.
|
|
104
|
+
|
|
105
|
+
May be useful for tracking the count of items where limited memory usage is needed even if
|
|
106
|
+
the set of items can be unlimited.
|
|
107
|
+
|
|
108
|
+
Based on the example at
|
|
109
|
+
https://docs.python.org/3/library/collections.html#ordereddict-examples-and-recipes
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(self, maxsize: int = 128, *args: Any, **kwargs: Any) -> None:
|
|
113
|
+
self.maxsize = maxsize
|
|
114
|
+
super().__init__(*args, **kwargs)
|
|
115
|
+
|
|
116
|
+
def __getitem__(self, key: Hashable) -> int:
|
|
117
|
+
if key in self:
|
|
118
|
+
return super().__getitem__(key)
|
|
119
|
+
return 0
|
|
120
|
+
|
|
121
|
+
def __setitem__(self, key: Hashable, value: int) -> None:
|
|
122
|
+
if key in self:
|
|
123
|
+
self.move_to_end(key)
|
|
124
|
+
super().__setitem__(key, value)
|
|
125
|
+
if self.maxsize < len(self):
|
|
126
|
+
self.popitem(last=False)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
audit_message_count = LeastRecentlyUsedDict()
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import platform
|
|
5
|
+
import pwd
|
|
6
|
+
import sys
|
|
7
|
+
from typing import Any, cast
|
|
8
|
+
|
|
9
|
+
from pydantic_settings import SettingsConfigDict
|
|
10
|
+
|
|
11
|
+
import grants_shared.logs.audit
|
|
12
|
+
import grants_shared.logs.formatters as formatters
|
|
13
|
+
import grants_shared.logs.pii as pii
|
|
14
|
+
from grants_shared.util.env_config import PydanticBaseEnvConfig
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
_original_argv = tuple(sys.argv)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class HumanReadableFormatterConfig(PydanticBaseEnvConfig):
|
|
22
|
+
message_width: int = formatters.HUMAN_READABLE_FORMATTER_DEFAULT_MESSAGE_WIDTH
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class LoggingConfig(PydanticBaseEnvConfig):
|
|
26
|
+
model_config = SettingsConfigDict(env_prefix="log_", env_nested_delimiter="__")
|
|
27
|
+
|
|
28
|
+
format: str = "json"
|
|
29
|
+
level: str = "INFO"
|
|
30
|
+
enable_audit: bool = False
|
|
31
|
+
human_readable_formatter: HumanReadableFormatterConfig = HumanReadableFormatterConfig()
|
|
32
|
+
|
|
33
|
+
# Specify logging_level_overrides formatted as "<logger>=<level>" like "newrelic=INFO,something.else=ERROR"
|
|
34
|
+
level_overrides: str | None = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class LoggingContext(contextlib.AbstractContextManager[None]):
|
|
38
|
+
"""
|
|
39
|
+
A context manager for handling setting up the logging stream.
|
|
40
|
+
|
|
41
|
+
To help facillitate being able to test logging, we need to be able
|
|
42
|
+
to easily create temporary output streams and then tear them down.
|
|
43
|
+
|
|
44
|
+
When this context manager is torn down, the stream handler created
|
|
45
|
+
with it will be removed.
|
|
46
|
+
|
|
47
|
+
For example:
|
|
48
|
+
```py
|
|
49
|
+
import logging
|
|
50
|
+
|
|
51
|
+
logger = logging.getLogger(__name__)
|
|
52
|
+
|
|
53
|
+
with LoggingContext("example_program_name"):
|
|
54
|
+
# This log message will go to stdout
|
|
55
|
+
logger.info("example log message")
|
|
56
|
+
|
|
57
|
+
# This log message won't go to stdout as the
|
|
58
|
+
# handler will have been removed
|
|
59
|
+
logger.info("example log message")
|
|
60
|
+
```
|
|
61
|
+
Note that any other handlers added to the root logger won't be affected
|
|
62
|
+
and calling this multiple times before exit would result in duplicate logs.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self, program_name: str) -> None:
|
|
66
|
+
self._configure_logging()
|
|
67
|
+
log_program_info(program_name)
|
|
68
|
+
|
|
69
|
+
def __enter__(self) -> None:
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
73
|
+
# Remove the console handler to stop logs from being sent to stdout
|
|
74
|
+
# This is useful in the test suite, since multiple tests may initialize
|
|
75
|
+
# separate duplicate handlers. This allows for easier cleanup for each
|
|
76
|
+
# of those tests.
|
|
77
|
+
logging.root.removeHandler(self.console_handler)
|
|
78
|
+
|
|
79
|
+
def _configure_logging(self) -> None:
|
|
80
|
+
"""Configure logging for the application.
|
|
81
|
+
|
|
82
|
+
Configures the root module logger to log to stdout.
|
|
83
|
+
Adds a PII mask filter to the root logger.
|
|
84
|
+
Also configures log levels third party packages.
|
|
85
|
+
"""
|
|
86
|
+
config = LoggingConfig()
|
|
87
|
+
|
|
88
|
+
# Loggers can be configured using config functions defined
|
|
89
|
+
# in logging.config or by directly making calls to the main API
|
|
90
|
+
# of the logging module (see https://docs.python.org/3/library/logging.config.html)
|
|
91
|
+
# We opt to use the main API using functions like `addHandler` which is
|
|
92
|
+
# non-destructive, i.e. it does not overwrite any existing handlers.
|
|
93
|
+
# In contrast, logging.config.dictConfig() would overwrite any existing loggers.
|
|
94
|
+
# This is important during testing, since fixtures like `caplog` add handlers that would
|
|
95
|
+
# get overwritten if we call logging.config.dictConfig() during the scope of the test.
|
|
96
|
+
self.console_handler = logging.StreamHandler(sys.stdout)
|
|
97
|
+
formatter = get_formatter(config)
|
|
98
|
+
self.console_handler.setFormatter(formatter)
|
|
99
|
+
self.console_handler.addFilter(pii.mask_pii)
|
|
100
|
+
logging.root.addHandler(self.console_handler)
|
|
101
|
+
logging.root.setLevel(config.level)
|
|
102
|
+
|
|
103
|
+
if config.enable_audit:
|
|
104
|
+
grants_shared.logs.audit.init()
|
|
105
|
+
|
|
106
|
+
# Configure loggers for third party packages
|
|
107
|
+
logging.getLogger("alembic").setLevel(logging.INFO)
|
|
108
|
+
logging.getLogger("werkzeug").setLevel(logging.WARN)
|
|
109
|
+
logging.getLogger("sqlalchemy.pool").setLevel(logging.INFO)
|
|
110
|
+
logging.getLogger("sqlalchemy.dialects.postgresql").setLevel(logging.INFO)
|
|
111
|
+
|
|
112
|
+
# Allow an env var to override logging config, mostly for development purposes
|
|
113
|
+
# Parsing string formatted like "logger1=INFO,logger2=ERROR"
|
|
114
|
+
if config.level_overrides is not None:
|
|
115
|
+
for override in config.level_overrides.split(","):
|
|
116
|
+
logger_override, level_override = override.split("=")
|
|
117
|
+
logging.getLogger(logger_override).setLevel(level_override)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def get_formatter(config: LoggingConfig) -> logging.Formatter:
|
|
121
|
+
"""Return the formatter used by the root logger.
|
|
122
|
+
|
|
123
|
+
The formatter is determined by the environment variable LOG_FORMAT. If the
|
|
124
|
+
environment variable is not set, the JSON formatter is used by default.
|
|
125
|
+
"""
|
|
126
|
+
if config.format == "human-readable":
|
|
127
|
+
return get_human_readable_formatter(config.human_readable_formatter)
|
|
128
|
+
return formatters.JsonFormatter()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def log_program_info(program_name: str) -> None:
|
|
132
|
+
logger.info(
|
|
133
|
+
"start %s: %s %s %s, hostname %s, pid %i, user %i(%s)",
|
|
134
|
+
program_name,
|
|
135
|
+
platform.python_implementation(),
|
|
136
|
+
platform.python_version(),
|
|
137
|
+
platform.system(),
|
|
138
|
+
platform.node(),
|
|
139
|
+
os.getpid(),
|
|
140
|
+
os.getuid(),
|
|
141
|
+
pwd.getpwuid(os.getuid()).pw_name,
|
|
142
|
+
extra={
|
|
143
|
+
"hostname": platform.node(),
|
|
144
|
+
"cpu_count": os.cpu_count(),
|
|
145
|
+
# If mypy is run on a mac, it will throw a module has no attribute error, even though
|
|
146
|
+
# we never actually access it with the conditional.
|
|
147
|
+
#
|
|
148
|
+
# However, we can't just silence this error, because on linux (e.g. CI/CD) that will
|
|
149
|
+
# throw an unused “type: ignore” comment error. Casting to Any instead ensures this
|
|
150
|
+
# passes regardless of where mypy is being run
|
|
151
|
+
"cpu_usable": (
|
|
152
|
+
len(cast(Any, os).sched_getaffinity(0))
|
|
153
|
+
if "sched_getaffinity" in dir(os)
|
|
154
|
+
else "unknown"
|
|
155
|
+
),
|
|
156
|
+
},
|
|
157
|
+
)
|
|
158
|
+
logger.info("invoked as: %s", " ".join(_original_argv))
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def get_human_readable_formatter(
|
|
162
|
+
config: HumanReadableFormatterConfig,
|
|
163
|
+
) -> formatters.HumanReadableFormatter:
|
|
164
|
+
"""Return the human readable formatter used by the root logger."""
|
|
165
|
+
return formatters.HumanReadableFormatter(message_width=config.message_width)
|