objectstore-client 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,105 @@
1
+ # Functional Source License, Version 1.1, Apache 2.0 Future License
2
+
3
+ ## Abbreviation
4
+
5
+ FSL-1.1-Apache-2.0
6
+
7
+ ## Notice
8
+
9
+ Copyright 2018-2024 Functional Software, Inc. dba Sentry
10
+
11
+ ## Terms and Conditions
12
+
13
+ ### Licensor ("We")
14
+
15
+ The party offering the Software under these Terms and Conditions.
16
+
17
+ ### The Software
18
+
19
+ The "Software" is each version of the software that we make available under
20
+ these Terms and Conditions, as indicated by our inclusion of these Terms and
21
+ Conditions with the Software.
22
+
23
+ ### License Grant
24
+
25
+ Subject to your compliance with this License Grant and the Patents,
26
+ Redistribution and Trademark clauses below, we hereby grant you the right to
27
+ use, copy, modify, create derivative works, publicly perform, publicly display
28
+ and redistribute the Software for any Permitted Purpose identified below.
29
+
30
+ ### Permitted Purpose
31
+
32
+ A Permitted Purpose is any purpose other than a Competing Use. A Competing Use
33
+ means making the Software available to others in a commercial product or
34
+ service that:
35
+
36
+ 1. substitutes for the Software;
37
+
38
+ 2. substitutes for any other product or service we offer using the Software
39
+ that exists as of the date we make the Software available; or
40
+
41
+ 3. offers the same or substantially similar functionality as the Software.
42
+
43
+ Permitted Purposes specifically include using the Software:
44
+
45
+ 1. for your internal use and access;
46
+
47
+ 2. for non-commercial education;
48
+
49
+ 3. for non-commercial research; and
50
+
51
+ 4. in connection with professional services that you provide to a licensee
52
+ using the Software in accordance with these Terms and Conditions.
53
+
54
+ ### Patents
55
+
56
+ To the extent your use for a Permitted Purpose would necessarily infringe our
57
+ patents, the license grant above includes a license under our patents. If you
58
+ make a claim against any party that the Software infringes or contributes to
59
+ the infringement of any patent, then your patent license to the Software ends
60
+ immediately.
61
+
62
+ ### Redistribution
63
+
64
+ The Terms and Conditions apply to all copies, modifications and derivatives of
65
+ the Software.
66
+
67
+ If you redistribute any copies, modifications or derivatives of the Software,
68
+ you must include a copy of or a link to these Terms and Conditions and not
69
+ remove any copyright notices provided in or with the Software.
70
+
71
+ ### Disclaimer
72
+
73
+ THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR
74
+ IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR
75
+ PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT.
76
+
77
+ IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE
78
+ SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES,
79
+ EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE.
80
+
81
+ ### Trademarks
82
+
83
+ Except for displaying the License Details and identifying us as the origin of
84
+ the Software, you have no right under these Terms and Conditions to use our
85
+ trademarks, trade names, service marks or product names.
86
+
87
+ ## Grant of Future License
88
+
89
+ We hereby irrevocably grant you an additional license to use the Software under
90
+ the Apache License, Version 2.0 that is effective on the second anniversary of
91
+ the date we make the Software available. On or after that date, you may use the
92
+ Software under the Apache License, Version 2.0, in which case the following
93
+ will apply:
94
+
95
+ Licensed under the Apache License, Version 2.0 (the "License"); you may not use
96
+ this file except in compliance with the License.
97
+
98
+ You may obtain a copy of the License at
99
+
100
+ http://www.apache.org/licenses/LICENSE-2.0
101
+
102
+ Unless required by applicable law or agreed to in writing, software distributed
103
+ under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
104
+ CONDITIONS OF ANY KIND, either express or implied. See the License for the
105
+ specific language governing permissions and limitations under the License.
@@ -0,0 +1,168 @@
1
+ Metadata-Version: 2.3
2
+ Name: objectstore-client
3
+ Version: 0.0.2
4
+ Summary: Client SDK for Objectstore, the Sentry object storage platform
5
+ Author: Sentry
6
+ Author-email: Sentry <oss@sentry.io>
7
+ License: # Functional Source License, Version 1.1, Apache 2.0 Future License
8
+
9
+ ## Abbreviation
10
+
11
+ FSL-1.1-Apache-2.0
12
+
13
+ ## Notice
14
+
15
+ Copyright 2018-2024 Functional Software, Inc. dba Sentry
16
+
17
+ ## Terms and Conditions
18
+
19
+ ### Licensor ("We")
20
+
21
+ The party offering the Software under these Terms and Conditions.
22
+
23
+ ### The Software
24
+
25
+ The "Software" is each version of the software that we make available under
26
+ these Terms and Conditions, as indicated by our inclusion of these Terms and
27
+ Conditions with the Software.
28
+
29
+ ### License Grant
30
+
31
+ Subject to your compliance with this License Grant and the Patents,
32
+ Redistribution and Trademark clauses below, we hereby grant you the right to
33
+ use, copy, modify, create derivative works, publicly perform, publicly display
34
+ and redistribute the Software for any Permitted Purpose identified below.
35
+
36
+ ### Permitted Purpose
37
+
38
+ A Permitted Purpose is any purpose other than a Competing Use. A Competing Use
39
+ means making the Software available to others in a commercial product or
40
+ service that:
41
+
42
+ 1. substitutes for the Software;
43
+
44
+ 2. substitutes for any other product or service we offer using the Software
45
+ that exists as of the date we make the Software available; or
46
+
47
+ 3. offers the same or substantially similar functionality as the Software.
48
+
49
+ Permitted Purposes specifically include using the Software:
50
+
51
+ 1. for your internal use and access;
52
+
53
+ 2. for non-commercial education;
54
+
55
+ 3. for non-commercial research; and
56
+
57
+ 4. in connection with professional services that you provide to a licensee
58
+ using the Software in accordance with these Terms and Conditions.
59
+
60
+ ### Patents
61
+
62
+ To the extent your use for a Permitted Purpose would necessarily infringe our
63
+ patents, the license grant above includes a license under our patents. If you
64
+ make a claim against any party that the Software infringes or contributes to
65
+ the infringement of any patent, then your patent license to the Software ends
66
+ immediately.
67
+
68
+ ### Redistribution
69
+
70
+ The Terms and Conditions apply to all copies, modifications and derivatives of
71
+ the Software.
72
+
73
+ If you redistribute any copies, modifications or derivatives of the Software,
74
+ you must include a copy of or a link to these Terms and Conditions and not
75
+ remove any copyright notices provided in or with the Software.
76
+
77
+ ### Disclaimer
78
+
79
+ THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR
80
+ IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR
81
+ PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT.
82
+
83
+ IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE
84
+ SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES,
85
+ EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE.
86
+
87
+ ### Trademarks
88
+
89
+ Except for displaying the License Details and identifying us as the origin of
90
+ the Software, you have no right under these Terms and Conditions to use our
91
+ trademarks, trade names, service marks or product names.
92
+
93
+ ## Grant of Future License
94
+
95
+ We hereby irrevocably grant you an additional license to use the Software under
96
+ the Apache License, Version 2.0 that is effective on the second anniversary of
97
+ the date we make the Software available. On or after that date, you may use the
98
+ Software under the Apache License, Version 2.0, in which case the following
99
+ will apply:
100
+
101
+ Licensed under the Apache License, Version 2.0 (the "License"); you may not use
102
+ this file except in compliance with the License.
103
+
104
+ You may obtain a copy of the License at
105
+
106
+ http://www.apache.org/licenses/LICENSE-2.0
107
+
108
+ Unless required by applicable law or agreed to in writing, software distributed
109
+ under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
110
+ CONDITIONS OF ANY KIND, either express or implied. See the License for the
111
+ specific language governing permissions and limitations under the License.
112
+ Requires-Dist: sentry-sdk>=2.42.1
113
+ Requires-Dist: urllib3>=2.5.0
114
+ Requires-Dist: zstandard>=0.18.0
115
+ Requires-Python: >=3.13
116
+ Description-Content-Type: text/markdown
117
+
118
+ # Objectstore Client
119
+
120
+ The client is used to interface with the objectstore backend. It handles
121
+ responsibilities like transparent compression, and making sure that uploads and
122
+ downloads are done as efficiently as possible.
123
+
124
+ ## Usage
125
+
126
+ ```python
127
+ import datetime
128
+
129
+ from objectstore_client import ClientBuilder, NoOpMetricsBackend, TimeToLive
130
+
131
+ client_builder = ClientBuilder(
132
+ "http://localhost:8888",
133
+ "my_usecase",
134
+ metrics_backend=NoOpMetricsBackend(), # optionally, provide your own MetricsBackend implementation
135
+ )
136
+ client = client_builder.for_project(42, 424242)
137
+
138
+ object_id = client.put(
139
+ b"Hello, world!",
140
+ metadata={"key": "value"},
141
+ expiration_policy=TimeToLive(datetime.timedelta(days=1)),
142
+ )
143
+
144
+ result = client.get(object_id)
145
+
146
+ content = result.payload.read()
147
+ assert content == b"Hello, world!"
148
+ assert result.metadata.custom["key"] == "value"
149
+
150
+ client.delete(object_id)
151
+ ```
152
+
153
+ ## Development
154
+
155
+ ### Environment Setup
156
+
157
+ The considerations for setting up the development environment that can be found in the main [README](../README.md) apply for this package as well.
158
+
159
+ ### Pre-commit hook
160
+
161
+ A configuration to set up a git pre-commit hook using [pre-commit](https://github.com/pre-commit/pre-commit) is available at the root of the repository.
162
+
163
+ To install it, run
164
+ ```sh
165
+ pre-commit install
166
+ ```
167
+
168
+ The hook will automatically run some checks before every commit, including the linters and formatters we run in CI.
@@ -0,0 +1,51 @@
1
+ # Objectstore Client
2
+
3
+ The client is used to interface with the objectstore backend. It handles
4
+ responsibilities like transparent compression, and making sure that uploads and
5
+ downloads are done as efficiently as possible.
6
+
7
+ ## Usage
8
+
9
+ ```python
10
+ import datetime
11
+
12
+ from objectstore_client import ClientBuilder, NoOpMetricsBackend, TimeToLive
13
+
14
+ client_builder = ClientBuilder(
15
+ "http://localhost:8888",
16
+ "my_usecase",
17
+ metrics_backend=NoOpMetricsBackend(), # optionally, provide your own MetricsBackend implementation
18
+ )
19
+ client = client_builder.for_project(42, 424242)
20
+
21
+ object_id = client.put(
22
+ b"Hello, world!",
23
+ metadata={"key": "value"},
24
+ expiration_policy=TimeToLive(datetime.timedelta(days=1)),
25
+ )
26
+
27
+ result = client.get(object_id)
28
+
29
+ content = result.payload.read()
30
+ assert content == b"Hello, world!"
31
+ assert result.metadata.custom["key"] == "value"
32
+
33
+ client.delete(object_id)
34
+ ```
35
+
36
+ ## Development
37
+
38
+ ### Environment Setup
39
+
40
+ The considerations for setting up the development environment that can be found in the main [README](../README.md) apply for this package as well.
41
+
42
+ ### Pre-commit hook
43
+
44
+ A configuration to set up a git pre-commit hook using [pre-commit](https://github.com/pre-commit/pre-commit) is available at the root of the repository.
45
+
46
+ To install it, run
47
+ ```sh
48
+ pre-commit install
49
+ ```
50
+
51
+ The hook will automatically run some checks before every commit, including the linters and formatters we run in CI.
@@ -0,0 +1,21 @@
1
+ [project]
2
+ name = "objectstore-client"
3
+ version = "0.0.2"
4
+ description = "Client SDK for Objectstore, the Sentry object storage platform"
5
+ readme = "README.md"
6
+ authors = [
7
+ {name = "Sentry", email = "oss@sentry.io"},
8
+ ]
9
+ homepage = "https://getsentry.github.io/objectstore/"
10
+ repository = "https://github.com/getsentry/objectstore"
11
+ license = { file = "LICENSE.md" }
12
+ requires-python = ">=3.13"
13
+ dependencies = [
14
+ "sentry-sdk>=2.42.1",
15
+ "urllib3>=2.5.0",
16
+ "zstandard>=0.18.0",
17
+ ]
18
+
19
+ [build-system]
20
+ requires = ["uv_build"]
21
+ build-backend = "uv_build"
@@ -0,0 +1,23 @@
1
+ from objectstore_client.client import Client, ClientBuilder, ClientError, GetResult
2
+ from objectstore_client.metadata import (
3
+ Compression,
4
+ ExpirationPolicy,
5
+ Metadata,
6
+ TimeToIdle,
7
+ TimeToLive,
8
+ )
9
+ from objectstore_client.metrics import MetricsBackend, NoOpMetricsBackend
10
+
11
+ __all__ = [
12
+ "Client",
13
+ "ClientBuilder",
14
+ "ClientError",
15
+ "GetResult",
16
+ "Compression",
17
+ "ExpirationPolicy",
18
+ "Metadata",
19
+ "TimeToIdle",
20
+ "TimeToLive",
21
+ "MetricsBackend",
22
+ "NoOpMetricsBackend",
23
+ ]
@@ -0,0 +1,269 @@
1
+ from __future__ import annotations
2
+
3
+ from io import BytesIO
4
+ from typing import IO, Literal, NamedTuple, NotRequired, Self, TypedDict, cast
5
+ from urllib.parse import urlencode
6
+
7
+ import sentry_sdk
8
+ import urllib3
9
+ import zstandard
10
+ from urllib3.connectionpool import HTTPConnectionPool
11
+
12
+ from objectstore_client.metadata import (
13
+ HEADER_EXPIRATION,
14
+ HEADER_META_PREFIX,
15
+ Compression,
16
+ ExpirationPolicy,
17
+ Metadata,
18
+ format_expiration,
19
+ )
20
+ from objectstore_client.metrics import (
21
+ MetricsBackend,
22
+ NoOpMetricsBackend,
23
+ measure_storage_operation,
24
+ )
25
+
26
+ Permission = Literal["read", "write"]
27
+
28
+
29
+ class Scope(TypedDict):
30
+ organization: int
31
+ project: NotRequired[int]
32
+
33
+
34
+ class GetResult(NamedTuple):
35
+ metadata: Metadata
36
+ payload: IO[bytes]
37
+
38
+
39
+ class ClientBuilder:
40
+ def __init__(
41
+ self,
42
+ objectstore_base_url: str,
43
+ usecase: str,
44
+ metrics_backend: MetricsBackend | None = None,
45
+ propagate_traces: bool = False,
46
+ default_expiration_policy: ExpirationPolicy | None = None,
47
+ retries: urllib3.Retry | None = None,
48
+ timeout: urllib3.Timeout | None = None,
49
+ ):
50
+ self._base_url = objectstore_base_url
51
+ self._usecase = usecase
52
+
53
+ # We only retry connection problems, as we cannot rewind our compression stream.
54
+ self._retries = retries or urllib3.Retry(connect=3, redirect=5)
55
+ # The read timeout is defined to be "between consecutive read operations",
56
+ # which should mean one chunk of the response, with a large response being
57
+ # split into multiple chunks.
58
+ # We define both as 500ms which is still very conservative,
59
+ # given that we are in the same network,
60
+ # and expect our backends to respond in <100ms.
61
+ self._timeout = timeout or urllib3.Timeout(connect=0.5, read=0.5)
62
+
63
+ self._default_compression: Compression = "zstd"
64
+ self._default_expiration_policy = (
65
+ format_expiration(default_expiration_policy)
66
+ if default_expiration_policy
67
+ else None
68
+ )
69
+ self._propagate_traces = propagate_traces
70
+ self._metrics_backend = metrics_backend or NoOpMetricsBackend()
71
+
72
+ def _make_client(self, scope: str) -> Client:
73
+ pool = urllib3.connectionpool.connection_from_url(
74
+ self._base_url, retries=self._retries, timeout=self._timeout
75
+ )
76
+ return Client(
77
+ pool,
78
+ self._default_compression,
79
+ self._default_expiration_policy,
80
+ self._usecase,
81
+ scope,
82
+ self._propagate_traces,
83
+ self._metrics_backend,
84
+ )
85
+
86
+ def default_compression(self, default_compression: Compression) -> Self:
87
+ self._default_compression = default_compression
88
+ return self
89
+
90
+ def for_organization(self, organization_id: int) -> Client:
91
+ return self._make_client(f"org.{organization_id}")
92
+
93
+ def for_project(self, organization_id: int, project_id: int) -> Client:
94
+ return self._make_client(f"org.{organization_id}/proj.{project_id}")
95
+
96
+
97
+ class Client:
98
+ _default_compression: Compression
99
+
100
+ def __init__(
101
+ self,
102
+ pool: HTTPConnectionPool,
103
+ default_compression: Compression,
104
+ default_expiration_policy: str | None,
105
+ usecase: str,
106
+ scope: str,
107
+ propagate_traces: bool,
108
+ metrics_backend: MetricsBackend,
109
+ ):
110
+ self._pool = pool
111
+ self._default_compression = default_compression
112
+ self._default_expiration_policy = default_expiration_policy
113
+ self._usecase = usecase
114
+ self._scope = scope
115
+ self._propagate_traces = propagate_traces
116
+ self._metrics_backend = metrics_backend
117
+
118
+ def _make_headers(self) -> dict[str, str]:
119
+ if self._propagate_traces:
120
+ return dict(sentry_sdk.get_current_scope().iter_trace_propagation_headers())
121
+ return {}
122
+
123
+ def _make_url(self, id: str | None, full: bool = False) -> str:
124
+ base_path = f"/v1/{id}" if id else "/v1/"
125
+ qs = urlencode({"usecase": self._usecase, "scope": self._scope})
126
+ if full:
127
+ return f"http://{self._pool.host}:{self._pool.port}{base_path}?{qs}"
128
+ else:
129
+ return f"{base_path}?{qs}"
130
+
131
+ def put(
132
+ self,
133
+ contents: bytes | IO[bytes],
134
+ id: str | None = None,
135
+ compression: Compression | Literal["none"] | None = None,
136
+ content_type: str | None = None,
137
+ metadata: dict[str, str] | None = None,
138
+ expiration_policy: ExpirationPolicy | None = None,
139
+ ) -> str:
140
+ """
141
+ Uploads the given `contents` to blob storage.
142
+
143
+ If no `id` is provided, one will be automatically generated and returned
144
+ from this function.
145
+
146
+ The client will select the configured `default_compression` if none is given
147
+ explicitly.
148
+ This can be overridden by explicitly giving a `compression` argument.
149
+ Providing `"none"` as the argument will instruct the client to not apply
150
+ any compression to this upload, which is useful for uncompressible formats.
151
+ """
152
+ headers = self._make_headers()
153
+ body = BytesIO(contents) if isinstance(contents, bytes) else contents
154
+ original_body: IO[bytes] = body
155
+
156
+ compression = compression or self._default_compression
157
+ if compression == "zstd":
158
+ cctx = zstandard.ZstdCompressor()
159
+ body = cctx.stream_reader(original_body)
160
+ headers["Content-Encoding"] = "zstd"
161
+
162
+ if content_type:
163
+ headers["Content-Type"] = content_type
164
+
165
+ if expiration_policy:
166
+ headers[HEADER_EXPIRATION] = format_expiration(expiration_policy)
167
+ elif self._default_expiration_policy:
168
+ headers[HEADER_EXPIRATION] = self._default_expiration_policy
169
+
170
+ if metadata:
171
+ for k, v in metadata.items():
172
+ headers[f"{HEADER_META_PREFIX}{k}"] = v
173
+
174
+ with measure_storage_operation(
175
+ self._metrics_backend, "put", self._usecase
176
+ ) as metric_emitter:
177
+ response = self._pool.request(
178
+ "PUT",
179
+ self._make_url(id),
180
+ body=body,
181
+ headers=headers,
182
+ preload_content=True,
183
+ decode_content=True,
184
+ )
185
+ raise_for_status(response)
186
+ res = response.json()
187
+
188
+ # Must do this after streaming `body` as that's what is responsible
189
+ # for advancing the seek position in both streams
190
+ metric_emitter.record_uncompressed_size(original_body.tell())
191
+ if compression and compression != "none":
192
+ metric_emitter.record_compressed_size(body.tell(), compression)
193
+ return res["key"]
194
+
195
+ def get(self, id: str, decompress: bool = True) -> GetResult:
196
+ """
197
+ This fetches the blob with the given `id`, returning an `IO` stream that
198
+ can be read.
199
+
200
+ By default, content that was uploaded compressed will be automatically
201
+ decompressed, unless `decompress=True` is passed.
202
+ """
203
+
204
+ headers = self._make_headers()
205
+ with measure_storage_operation(self._metrics_backend, "get", self._usecase):
206
+ response = self._pool.request(
207
+ "GET",
208
+ self._make_url(id),
209
+ preload_content=False,
210
+ decode_content=False,
211
+ headers=headers,
212
+ )
213
+ raise_for_status(response)
214
+ # OR: should I use `response.stream()`?
215
+ stream = cast(IO[bytes], response)
216
+ metadata = Metadata.from_headers(response.headers)
217
+
218
+ if metadata.compression and decompress:
219
+ if metadata.compression != "zstd":
220
+ raise NotImplementedError(
221
+ "Transparent decoding of anything but `zstd` is not implemented yet"
222
+ )
223
+
224
+ metadata.compression = None
225
+ dctx = zstandard.ZstdDecompressor()
226
+ stream = dctx.stream_reader(stream, read_across_frames=True)
227
+
228
+ return GetResult(metadata, stream)
229
+
230
+ def object_url(self, id: str) -> str:
231
+ """
232
+ Generates a GET url to the object with the given `id`.
233
+
234
+ This can then be used by downstream services to fetch the given object.
235
+ NOTE however that the service does not strictly follow HTTP semantics,
236
+ in particular in relation to `Accept-Encoding`.
237
+ """
238
+ return self._make_url(id, full=True)
239
+
240
+ def delete(self, id: str) -> None:
241
+ """
242
+ Deletes the blob with the given `id`.
243
+ """
244
+
245
+ headers = self._make_headers()
246
+ with measure_storage_operation(self._metrics_backend, "delete", self._usecase):
247
+ response = self._pool.request(
248
+ "DELETE",
249
+ self._make_url(id),
250
+ headers=headers,
251
+ )
252
+ raise_for_status(response)
253
+
254
+
255
+ class ClientError(Exception):
256
+ def __init__(self, message: str, status: int, response: str):
257
+ super().__init__(message)
258
+ self.status = status
259
+ self.response = response
260
+
261
+
262
+ def raise_for_status(response: urllib3.BaseHTTPResponse) -> None:
263
+ if response.status >= 400:
264
+ res = str(response.data or response.read())
265
+ raise ClientError(
266
+ f"Objectstore request failed with status {response.status}",
267
+ response.status,
268
+ res,
269
+ )
@@ -0,0 +1,112 @@
1
+ from __future__ import annotations
2
+
3
+ import itertools
4
+ import re
5
+ from collections.abc import Mapping
6
+ from dataclasses import dataclass
7
+ from datetime import timedelta
8
+ from typing import Literal, cast
9
+
10
+ Compression = Literal["zstd"]
11
+
12
+ HEADER_EXPIRATION = "x-sn-expiration"
13
+ HEADER_META_PREFIX = "x-snme-"
14
+
15
+
16
+ @dataclass
17
+ class TimeToIdle:
18
+ delta: timedelta
19
+
20
+
21
+ @dataclass
22
+ class TimeToLive:
23
+ delta: timedelta
24
+
25
+
26
+ ExpirationPolicy = TimeToIdle | TimeToLive
27
+
28
+
29
+ @dataclass
30
+ class Metadata:
31
+ content_type: str | None
32
+ compression: Compression | None
33
+ expiration_policy: ExpirationPolicy | None
34
+ custom: dict[str, str]
35
+
36
+ @classmethod
37
+ def from_headers(cls, headers: Mapping[str, str]) -> Metadata:
38
+ content_type = "application/octet-stream"
39
+ compression = None
40
+ expiration_policy = None
41
+ custom_metadata = {}
42
+
43
+ for k, v in headers.items():
44
+ if k == "content-type":
45
+ content_type = v
46
+ elif k == "content-encoding":
47
+ compression = cast(Compression | None, v)
48
+ elif k == HEADER_EXPIRATION:
49
+ expiration_policy = parse_expiration(v)
50
+ elif k.startswith(HEADER_META_PREFIX):
51
+ custom_metadata[k[len(HEADER_META_PREFIX) :]] = v
52
+
53
+ return Metadata(
54
+ content_type=content_type,
55
+ compression=compression,
56
+ expiration_policy=expiration_policy,
57
+ custom=custom_metadata,
58
+ )
59
+
60
+
61
+ def format_expiration(expiration_policy: ExpirationPolicy) -> str:
62
+ if isinstance(expiration_policy, TimeToIdle):
63
+ return f"tti:{format_timedelta(expiration_policy.delta)}"
64
+ elif isinstance(expiration_policy, TimeToLive):
65
+ return f"ttl:{format_timedelta(expiration_policy.delta)}"
66
+
67
+
68
+ def parse_expiration(value: str) -> ExpirationPolicy | None:
69
+ if value.startswith("tti:"):
70
+ return TimeToIdle(parse_timedelta(value[4:]))
71
+ elif value.startswith("ttl:"):
72
+ return TimeToLive(parse_timedelta(value[4:]))
73
+
74
+ return None
75
+
76
+
77
+ def format_timedelta(delta: timedelta) -> str:
78
+ days = delta.days
79
+ output = f"{days} days" if days else ""
80
+ if seconds := delta.seconds:
81
+ if output:
82
+ output += " "
83
+ output += f"{seconds} seconds"
84
+
85
+ return output
86
+
87
+
88
+ TIME_SPLIT = re.compile(r"[^\W\d_]+|\d+")
89
+
90
+
91
+ def parse_timedelta(delta: str) -> timedelta:
92
+ words = TIME_SPLIT.findall(delta)
93
+ seconds = 0
94
+
95
+ for num, unit in itertools.batched(words, n=2, strict=True):
96
+ num = int(num)
97
+ multiplier = 0
98
+
99
+ if unit.startswith("w"):
100
+ multiplier = 86400 * 7
101
+ elif unit.startswith("d"):
102
+ multiplier = 86400
103
+ elif unit.startswith("h"):
104
+ multiplier = 3600
105
+ elif unit.startswith("m") and not unit.startswith("ms"):
106
+ multiplier = 60
107
+ elif unit.startswith("s"):
108
+ multiplier = 1
109
+
110
+ seconds += num * multiplier
111
+
112
+ return timedelta(seconds=seconds)
@@ -0,0 +1,182 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from abc import abstractmethod
5
+ from collections.abc import Generator, Mapping
6
+ from contextlib import contextmanager
7
+ from typing import Protocol, runtime_checkable
8
+
9
+ Tags = Mapping[str, str]
10
+
11
+
12
+ @runtime_checkable
13
+ class MetricsBackend(Protocol):
14
+ """
15
+ An abstract class that defines the interface for metrics backends.
16
+ """
17
+
18
+ @abstractmethod
19
+ def increment(
20
+ self,
21
+ name: str,
22
+ value: int | float = 1,
23
+ tags: Tags | None = None,
24
+ ) -> None:
25
+ """
26
+ Increments a counter metric by a given value.
27
+ """
28
+ raise NotImplementedError
29
+
30
+ @abstractmethod
31
+ def gauge(self, name: str, value: int | float, tags: Tags | None = None) -> None:
32
+ """
33
+ Sets a gauge metric to the given value.
34
+ """
35
+ raise NotImplementedError
36
+
37
+ @abstractmethod
38
+ def distribution(
39
+ self,
40
+ name: str,
41
+ value: int | float,
42
+ tags: Tags | None = None,
43
+ unit: str | None = None,
44
+ ) -> None:
45
+ """
46
+ Records a distribution metric.
47
+ """
48
+ raise NotImplementedError
49
+
50
+
51
+ class NoOpMetricsBackend(MetricsBackend):
52
+ """
53
+ Default metrics backend that does not record anything.
54
+ """
55
+
56
+ def increment(
57
+ self,
58
+ name: str,
59
+ value: int | float = 1,
60
+ tags: Tags | None = None,
61
+ ) -> None:
62
+ pass
63
+
64
+ def gauge(self, name: str, value: int | float, tags: Tags | None = None) -> None:
65
+ pass
66
+
67
+ def distribution(
68
+ self,
69
+ name: str,
70
+ value: int | float,
71
+ tags: Tags | None = None,
72
+ unit: str | None = None,
73
+ ) -> None:
74
+ pass
75
+
76
+
77
+ class StorageMetricEmitter:
78
+ def __init__(self, backend: MetricsBackend, operation: str, usecase: str):
79
+ self.backend = backend
80
+ self.operation = operation
81
+ self.usecase = usecase
82
+
83
+ # These may be set during or after the enclosed operation
84
+ self.start: int | None = None
85
+ self.elapsed: float | None = None
86
+ self.uncompressed_size: int | None = None
87
+ self.compressed_size: int | None = None
88
+ self.compression: str = "unknown"
89
+
90
+ def record_latency(self, elapsed: float) -> None:
91
+ tags = {"usecase": self.usecase}
92
+ self.backend.distribution(
93
+ f"storage.{self.operation}.latency", elapsed, tags=tags
94
+ )
95
+ self.elapsed = elapsed
96
+
97
+ def record_uncompressed_size(self, value: int) -> None:
98
+ tags = {"usecase": self.usecase, "compression": "none"}
99
+ self.backend.distribution(
100
+ f"storage.{self.operation}.size", value, tags=tags, unit="byte"
101
+ )
102
+ self.uncompressed_size = value
103
+
104
+ def record_compressed_size(self, value: int, compression: str = "unknown") -> None:
105
+ tags = {"usecase": self.usecase, "compression": compression}
106
+ self.backend.distribution(
107
+ f"storage.{self.operation}.size", value, tags=tags, unit="byte"
108
+ )
109
+ self.compressed_size = value
110
+ self.compression = compression
111
+
112
+ def maybe_record_compression_ratio(self) -> None:
113
+ if not self.uncompressed_size or not self.compressed_size:
114
+ return None
115
+
116
+ tags = {"usecase": self.usecase, "compression": self.compression}
117
+ self.backend.distribution(
118
+ f"storage.{self.operation}.compression_ratio",
119
+ self.compressed_size / self.uncompressed_size,
120
+ tags=tags,
121
+ )
122
+
123
+ def maybe_record_throughputs(self) -> None:
124
+ if not self.elapsed or self.elapsed <= 0:
125
+ return None
126
+
127
+ sizes = []
128
+ if self.uncompressed_size:
129
+ sizes.append((self.uncompressed_size, "none"))
130
+ if self.compressed_size:
131
+ sizes.append((self.compressed_size, self.compression))
132
+
133
+ for size, compression in sizes:
134
+ tags = {"usecase": self.usecase, "compression": compression}
135
+ self.backend.distribution(
136
+ f"storage.{self.operation}.throughput", size / self.elapsed, tags=tags
137
+ )
138
+ self.backend.distribution(
139
+ f"storage.{self.operation}.inverse_throughput",
140
+ self.elapsed / size,
141
+ tags=tags,
142
+ )
143
+
144
+
145
+ @contextmanager
146
+ def measure_storage_operation(
147
+ backend: MetricsBackend,
148
+ operation: str,
149
+ usecase: str,
150
+ uncompressed_size: int | None = None,
151
+ compressed_size: int | None = None,
152
+ compression: str = "unknown",
153
+ ) -> Generator[StorageMetricEmitter]:
154
+ """
155
+ Context manager which records the latency of the enclosed storage operation.
156
+ Can also record the compressed or uncompressed size of an object, the
157
+ compression ratio, the throughput, and the inverse throughput.
158
+
159
+ Yields a `StorageMetricEmitter` because for some operations (GET) the size
160
+ is not known until the inside of the enclosed block.
161
+ """
162
+ emitter = StorageMetricEmitter(backend, operation, usecase)
163
+
164
+ if uncompressed_size:
165
+ emitter.record_uncompressed_size(uncompressed_size)
166
+ if compressed_size:
167
+ emitter.record_compressed_size(compressed_size, compression)
168
+
169
+ start = time.monotonic()
170
+
171
+ # Yield an emitter in case the size becomes known inside the enclosed block
172
+ try:
173
+ yield emitter
174
+
175
+ finally:
176
+ elapsed = time.monotonic() - start
177
+ emitter.record_latency(elapsed)
178
+
179
+ # If `uncompressed_size` and/or `compressed_size` have been set, we have
180
+ # extra metrics we can send.
181
+ emitter.maybe_record_compression_ratio()
182
+ emitter.maybe_record_throughputs()