pymongoftdc 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ checks:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - name: Check out repository
16
+ uses: actions/checkout@v6
17
+
18
+ - name: Set up Python
19
+ uses: actions/setup-python@v6
20
+ with:
21
+ python-version: "3.10"
22
+ cache: pip
23
+ cache-dependency-path: pyproject.toml
24
+
25
+ - name: Lint
26
+ run: make lint
27
+
28
+ - name: Test
29
+ run: make test
@@ -0,0 +1,81 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types:
6
+ - published
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ build:
13
+ name: Build distributions
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - name: Check out release
17
+ uses: actions/checkout@v6
18
+ with:
19
+ ref: ${{ github.event.release.tag_name }}
20
+
21
+ - name: Set up Python
22
+ uses: actions/setup-python@v6
23
+ with:
24
+ python-version: "3.12"
25
+ cache: pip
26
+ cache-dependency-path: pyproject.toml
27
+
28
+ - name: Install build tools
29
+ run: python -m pip install --upgrade build twine
30
+
31
+ - name: Verify release version
32
+ env:
33
+ RELEASE_TAG: ${{ github.event.release.tag_name }}
34
+ run: |
35
+ python - <<'PY'
36
+ import os
37
+ import tomllib
38
+
39
+ with open("pyproject.toml", "rb") as stream:
40
+ package_version = tomllib.load(stream)["project"]["version"]
41
+
42
+ release_version = os.environ["RELEASE_TAG"].removeprefix("v")
43
+ if release_version != package_version:
44
+ raise SystemExit(
45
+ f"release tag version {release_version!r} does not match "
46
+ f"pyproject.toml version {package_version!r}"
47
+ )
48
+ PY
49
+
50
+ - name: Build distributions
51
+ run: python -m build
52
+
53
+ - name: Check distributions
54
+ run: python -m twine check dist/*
55
+
56
+ - name: Upload distributions
57
+ uses: actions/upload-artifact@v6
58
+ with:
59
+ name: python-package-distributions
60
+ path: dist/
61
+ if-no-files-found: error
62
+ retention-days: 1
63
+
64
+ publish:
65
+ name: Publish distributions
66
+ needs: build
67
+ runs-on: ubuntu-latest
68
+ environment:
69
+ name: pypi
70
+ url: https://pypi.org/p/pyftdc
71
+ permissions:
72
+ id-token: write
73
+ steps:
74
+ - name: Download distributions
75
+ uses: actions/download-artifact@v6
76
+ with:
77
+ name: python-package-distributions
78
+ path: dist/
79
+
80
+ - name: Publish distributions to PyPI
81
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,221 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+ # Temporary file for partial code execution
204
+ tempCodeRunnerFile.py
205
+
206
+ # Ruff stuff:
207
+ .ruff_cache/
208
+
209
+ # Qwen Code
210
+ .qwen/
211
+
212
+ # PyPI configuration file
213
+ .pypirc
214
+
215
+ # Marimo
216
+ marimo/_static/
217
+ marimo/_lsp/
218
+ __marimo__/
219
+
220
+ # Streamlit
221
+ .streamlit/secrets.toml
@@ -0,0 +1,15 @@
1
+ {
2
+ "version": "0.2.0",
3
+ "configurations": [
4
+ {
5
+ "name": "Run All Unit Tests",
6
+ "type": "debugpy",
7
+ "request": "launch",
8
+ "module": "pytest",
9
+ "python": "${workspaceFolder}/.venv/bin/python",
10
+ "cwd": "${workspaceFolder}",
11
+ "console": "integratedTerminal",
12
+ "justMyCode": false
13
+ }
14
+ ]
15
+ }
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Yaoxing
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,19 @@
1
+ PYTHON ?= python3
2
+ VENV := .venv
3
+ VENV_PYTHON := $(VENV)/bin/python
4
+
5
+ .PHONY: venv deps lint test
6
+
7
+ venv:
8
+ @test -x "$(VENV_PYTHON)" || "$(PYTHON)" -m venv "$(VENV)"
9
+
10
+ deps: venv
11
+ "$(VENV_PYTHON)" -m pip install -e '.[test]'
12
+
13
+ lint: deps
14
+ "$(VENV_PYTHON)" -m ruff check src tests
15
+ "$(VENV_PYTHON)" -m pylint src tests
16
+ "$(VENV_PYTHON)" -m pyright
17
+
18
+ test: deps
19
+ "$(VENV_PYTHON)" -m pytest
@@ -0,0 +1,98 @@
1
+ Metadata-Version: 2.4
2
+ Name: pymongoftdc
3
+ Version: 0.1.0
4
+ Summary: A typed reader for MongoDB FTDC metric archives
5
+ License: MIT License
6
+
7
+ Copyright (c) 2026 Yaoxing
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
26
+ License-File: LICENSE
27
+ Requires-Python: >=3.10
28
+ Requires-Dist: pymongo<5,>=4.6
29
+ Provides-Extra: test
30
+ Requires-Dist: pylint>=3; extra == 'test'
31
+ Requires-Dist: pyright>=1.1.400; extra == 'test'
32
+ Requires-Dist: pytest-cov>=5; extra == 'test'
33
+ Requires-Dist: pytest>=8; extra == 'test'
34
+ Requires-Dist: ruff>=0.11; extra == 'test'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # pymongoftdc
38
+
39
+ [![CI](https://github.com/zhangyaoxing/pyftdc/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/zhangyaoxing/pyftdc/actions/workflows/ci.yml)
40
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
41
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
42
+
43
+ `pymongoftdc` reads numeric time-series metrics directly from MongoDB Full-Time
44
+ Diagnostic Data Capture (FTDC) archive files.
45
+
46
+ ## Install
47
+
48
+ ```bash
49
+ python -m pip install -e .
50
+ ```
51
+
52
+ For development:
53
+
54
+ ```bash
55
+ python -m pip install -e '.[test]'
56
+ pytest
57
+ ```
58
+
59
+ ## Use
60
+
61
+ ```python
62
+ from datetime import datetime, timezone
63
+ from pyftdc import FTDCReader
64
+
65
+ reader = FTDCReader("/var/lib/mongo/diagnostic.data")
66
+ metrics = reader.get_metric(
67
+ {"serverStatus.connections.current"},
68
+ start=datetime(2026, 1, 1, tzinfo=timezone.utc),
69
+ end=datetime(2026, 1, 1, 1, tzinfo=timezone.utc),
70
+ sample_rate=0.1,
71
+ )
72
+ points = metrics["serverStatus.connections.current"]
73
+ ```
74
+
75
+ The source may be one `metrics.*` file or a `diagnostic.data` directory.
76
+ Timespan endpoints are inclusive and must be timezone-aware. Omit `start` or
77
+ `end` to use the earliest or latest timestamp in the source. The result maps each
78
+ requested name to points ordered by UTC timestamp. Pass an empty set to read every
79
+ metric. `sample_rate` must be greater than 0 and at most 1;
80
+ for example, `0.1` returns approximately 10% of points. Its default is `1.0`.
81
+ `query()` is an alias for `get_metric()`.
82
+
83
+ Use `reader.list_metrics()` to discover dotted metric paths. A missing requested
84
+ metric raises `MetricNotFoundError`; an invalid archive raises `FTDCDecodeError`.
85
+
86
+ ## Project layout
87
+
88
+ ```text
89
+ src/pyftdc/
90
+ _codec.py BSON framing and FTDC decompression
91
+ reader.py public query API
92
+ models.py returned value objects
93
+ exceptions.py library-specific errors
94
+ tests/ pytest tests and fixture builders
95
+ ```
96
+
97
+ The reader supports BSON-framed type-1 metric chunks using MongoDB's
98
+ delta/RLE/varint/zlib encoding. Metadata documents are safely skipped.
@@ -0,0 +1,62 @@
1
+ # pymongoftdc
2
+
3
+ [![CI](https://github.com/zhangyaoxing/pyftdc/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/zhangyaoxing/pyftdc/actions/workflows/ci.yml)
4
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
5
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
6
+
7
+ `pymongoftdc` reads numeric time-series metrics directly from MongoDB Full-Time
8
+ Diagnostic Data Capture (FTDC) archive files.
9
+
10
+ ## Install
11
+
12
+ ```bash
13
+ python -m pip install -e .
14
+ ```
15
+
16
+ For development:
17
+
18
+ ```bash
19
+ python -m pip install -e '.[test]'
20
+ pytest
21
+ ```
22
+
23
+ ## Use
24
+
25
+ ```python
26
+ from datetime import datetime, timezone
27
+ from pyftdc import FTDCReader
28
+
29
+ reader = FTDCReader("/var/lib/mongo/diagnostic.data")
30
+ metrics = reader.get_metric(
31
+ {"serverStatus.connections.current"},
32
+ start=datetime(2026, 1, 1, tzinfo=timezone.utc),
33
+ end=datetime(2026, 1, 1, 1, tzinfo=timezone.utc),
34
+ sample_rate=0.1,
35
+ )
36
+ points = metrics["serverStatus.connections.current"]
37
+ ```
38
+
39
+ The source may be one `metrics.*` file or a `diagnostic.data` directory.
40
+ Timespan endpoints are inclusive and must be timezone-aware. Omit `start` or
41
+ `end` to use the earliest or latest timestamp in the source. The result maps each
42
+ requested name to points ordered by UTC timestamp. Pass an empty set to read every
43
+ metric. `sample_rate` must be greater than 0 and at most 1;
44
+ for example, `0.1` returns approximately 10% of points. Its default is `1.0`.
45
+ `query()` is an alias for `get_metric()`.
46
+
47
+ Use `reader.list_metrics()` to discover dotted metric paths. A missing requested
48
+ metric raises `MetricNotFoundError`; an invalid archive raises `FTDCDecodeError`.
49
+
50
+ ## Project layout
51
+
52
+ ```text
53
+ src/pyftdc/
54
+ _codec.py BSON framing and FTDC decompression
55
+ reader.py public query API
56
+ models.py returned value objects
57
+ exceptions.py library-specific errors
58
+ tests/ pytest tests and fixture builders
59
+ ```
60
+
61
+ The reader supports BSON-framed type-1 metric chunks using MongoDB's
62
+ delta/RLE/varint/zlib encoding. Metadata documents are safely skipped.
@@ -0,0 +1,53 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.25"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "pymongoftdc"
7
+ version = "0.1.0"
8
+ description = "A typed reader for MongoDB FTDC metric archives"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { file = "LICENSE" }
12
+ dependencies = ["pymongo>=4.6,<5"]
13
+
14
+ [project.optional-dependencies]
15
+ test = [
16
+ "pylint>=3",
17
+ "pyright>=1.1.400",
18
+ "pytest>=8",
19
+ "pytest-cov>=5",
20
+ "ruff>=0.11",
21
+ ]
22
+
23
+ [tool.hatch.build.targets.wheel]
24
+ packages = ["src/pyftdc"]
25
+
26
+ [tool.pytest.ini_options]
27
+ addopts = "-ra --strict-markers --strict-config"
28
+ testpaths = ["tests"]
29
+
30
+ [tool.coverage.run]
31
+ branch = true
32
+ source = ["pyftdc"]
33
+
34
+ [tool.coverage.report]
35
+ show_missing = true
36
+ fail_under = 70
37
+
38
+ [tool.ruff]
39
+ line-length = 100
40
+ target-version = "py310"
41
+
42
+ [tool.ruff.lint]
43
+ select = ["E", "F", "I", "UP", "B", "SIM"]
44
+
45
+ [tool.pyright]
46
+ include = ["src", "tests"]
47
+ pythonVersion = "3.10"
48
+ typeCheckingMode = "strict"
49
+ venv = ".venv"
50
+ venvPath = "."
51
+
52
+ [tool.pylint.messages_control]
53
+ disable = ["R0914"]
@@ -0,0 +1,13 @@
1
+ """Read time-series metrics from MongoDB FTDC archives."""
2
+
3
+ from pyftdc.exceptions import FTDCDecodeError, FTDCError, MetricNotFoundError
4
+ from pyftdc.models import DataPoint
5
+ from pyftdc.reader import FTDCReader
6
+
7
+ __all__ = [
8
+ "DataPoint",
9
+ "FTDCDecodeError",
10
+ "FTDCError",
11
+ "FTDCReader",
12
+ "MetricNotFoundError",
13
+ ]
@@ -0,0 +1,214 @@
1
+ """Low-level BSON framing and FTDC metric chunk decoding."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ import struct
7
+ import zlib
8
+ from collections.abc import Iterator, Mapping, Sequence
9
+ from dataclasses import dataclass
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Any, BinaryIO, cast
13
+
14
+ from bson import BSON
15
+ from bson.codec_options import CodecOptions
16
+ from bson.decimal128 import Decimal128
17
+ from bson.timestamp import Timestamp
18
+
19
+ from pyftdc.exceptions import FTDCDecodeError
20
+ from pyftdc.models import MetricValue
21
+
22
+ _UINT64_MASK = (1 << 64) - 1
23
+ _MIN_BSON_SIZE = 5
24
+ _CODEC_OPTIONS: CodecOptions[Any] = CodecOptions(tz_aware=True, tzinfo=timezone.utc)
25
+
26
+
27
+ @dataclass(frozen=True, slots=True)
28
+ class MetricSlot:
29
+ """A compressed numeric field and its location in a reference document."""
30
+
31
+ path: str
32
+ initial: int
33
+ kind: str
34
+ part: int = 0
35
+
36
+
37
+ @dataclass(frozen=True, slots=True)
38
+ class DecodedChunk:
39
+ """A reference document and its decoded metric rows."""
40
+
41
+ reference: Mapping[str, Any]
42
+ slots: tuple[MetricSlot, ...]
43
+ rows: tuple[tuple[int, ...], ...]
44
+
45
+
46
+ def iter_bson_documents(stream: BinaryIO, source: Path) -> Iterator[Mapping[str, Any]]:
47
+ """Yield the concatenated BSON documents in an FTDC file."""
48
+
49
+ while prefix := stream.read(4):
50
+ if len(prefix) != 4:
51
+ raise FTDCDecodeError(f"{source}: truncated BSON length")
52
+ (length,) = struct.unpack("<I", prefix)
53
+ if length == 0: # Zero bytes terminate an interim file.
54
+ return
55
+ if length < _MIN_BSON_SIZE:
56
+ raise FTDCDecodeError(f"{source}: invalid BSON length {length}")
57
+ remainder = stream.read(length - 4)
58
+ if len(remainder) != length - 4:
59
+ raise FTDCDecodeError(f"{source}: truncated BSON document")
60
+ try:
61
+ yield BSON(prefix + remainder).decode(codec_options=_CODEC_OPTIONS)
62
+ except Exception as exc:
63
+ raise FTDCDecodeError(f"{source}: invalid BSON document") from exc
64
+
65
+
66
+ def decode_metric_document(document: Mapping[str, Any]) -> DecodedChunk:
67
+ """Decode one outer FTDC document whose type is 1."""
68
+
69
+ raw = _decompress_payload(document)
70
+ if len(raw) < _MIN_BSON_SIZE:
71
+ raise FTDCDecodeError("metric chunk has no reference document")
72
+
73
+ (reference_size,) = struct.unpack_from("<I", raw)
74
+ if reference_size < _MIN_BSON_SIZE or reference_size + 8 > len(raw):
75
+ raise FTDCDecodeError("invalid reference document size")
76
+ try:
77
+ reference = BSON(raw[:reference_size]).decode(codec_options=_CODEC_OPTIONS)
78
+ except Exception as exc:
79
+ raise FTDCDecodeError("invalid metric reference document") from exc
80
+
81
+ metric_count, delta_count = struct.unpack_from("<II", raw, reference_size)
82
+ slots = tuple(_extract_slots(reference))
83
+ if len(slots) != metric_count:
84
+ raise FTDCDecodeError(
85
+ f"reference contains {len(slots)} numeric slots, chunk declares {metric_count}"
86
+ )
87
+
88
+ encoded = memoryview(raw)[reference_size + 8 :]
89
+ flat_deltas = _decode_deltas(encoded, metric_count * delta_count)
90
+ current = [slot.initial for slot in slots]
91
+ rows: list[tuple[int, ...]] = [tuple(current)]
92
+ for sample_index in range(delta_count):
93
+ for metric_index in range(metric_count):
94
+ offset = metric_index * delta_count + sample_index
95
+ current[metric_index] = (current[metric_index] + flat_deltas[offset]) & _UINT64_MASK
96
+ rows.append(tuple(current))
97
+
98
+ return DecodedChunk(reference, slots, tuple(rows))
99
+
100
+
101
+ def _decompress_payload(document: Mapping[str, Any]) -> bytes:
102
+ """Validate and decompress the binary payload of a metric document."""
103
+
104
+ payload = document.get("data", document.get("doc"))
105
+ if not isinstance(payload, (bytes, bytearray, memoryview)):
106
+ raise FTDCDecodeError("metric document has no binary 'data' or 'doc' field")
107
+ data = payload.tobytes() if isinstance(payload, memoryview) else bytes(payload)
108
+ if len(data) < 5:
109
+ raise FTDCDecodeError("compressed metric chunk is too short")
110
+
111
+ (expected_size,) = struct.unpack_from("<I", data)
112
+ try:
113
+ raw = zlib.decompress(data[4:])
114
+ except zlib.error as exc:
115
+ raise FTDCDecodeError("invalid zlib metric payload") from exc
116
+ if len(raw) != expected_size:
117
+ raise FTDCDecodeError(
118
+ f"metric chunk size mismatch: expected {expected_size}, got {len(raw)}"
119
+ )
120
+ return raw
121
+
122
+
123
+ def value_for_slot(slot: MetricSlot, raw_value: int) -> MetricValue:
124
+ """Restore a compressed integer to the reference field's useful Python type."""
125
+
126
+ signed = _as_signed(raw_value)
127
+ if slot.kind == "bool":
128
+ return bool(raw_value)
129
+ if slot.kind == "float":
130
+ return float(signed)
131
+ return signed
132
+
133
+
134
+ def timestamp_for_row(chunk: DecodedChunk, row: Sequence[int]) -> datetime:
135
+ """Return the top-level collection start time for a decoded sample."""
136
+
137
+ for index, slot in enumerate(chunk.slots):
138
+ if slot.path == "start" and slot.kind == "datetime":
139
+ return datetime.fromtimestamp(_as_signed(row[index]) / 1000, tz=timezone.utc)
140
+ raise FTDCDecodeError("metric reference document has no top-level datetime 'start'")
141
+
142
+
143
+ def _extract_slots(value: object, path: str = "") -> Iterator[MetricSlot]:
144
+ if isinstance(value, Mapping):
145
+ mapping = cast(Mapping[object, object], value)
146
+ for name, child in mapping.items():
147
+ child_path = f"{path}.{name}" if path else str(name)
148
+ yield from _extract_slots(child, child_path)
149
+ return
150
+ if isinstance(value, (list, tuple)):
151
+ sequence = cast(Sequence[object], value)
152
+ for index, child in enumerate(sequence):
153
+ child_path = f"{path}.{index}" if path else str(index)
154
+ yield from _extract_slots(child, child_path)
155
+ return
156
+
157
+ if isinstance(value, bool):
158
+ yield MetricSlot(path, int(value), "bool")
159
+ elif isinstance(value, int):
160
+ yield MetricSlot(path, value & _UINT64_MASK, "int")
161
+ elif isinstance(value, float):
162
+ number = (
163
+ 0
164
+ if math.isnan(value)
165
+ else max(-(1 << 63), min((1 << 63) - 1, int(value)))
166
+ )
167
+ yield MetricSlot(path, number & _UINT64_MASK, "float")
168
+ elif isinstance(value, datetime):
169
+ moment = value if value.tzinfo is not None else value.replace(tzinfo=timezone.utc)
170
+ yield MetricSlot(path, int(moment.timestamp() * 1000) & _UINT64_MASK, "datetime")
171
+ elif isinstance(value, Timestamp):
172
+ yield MetricSlot(path, value.time & _UINT64_MASK, "timestamp", 0)
173
+ yield MetricSlot(path, value.inc & _UINT64_MASK, "timestamp", 1)
174
+ elif isinstance(value, Decimal128):
175
+ low, high = struct.unpack("<QQ", value.bid)
176
+ yield MetricSlot(path, low, "decimal128", 0)
177
+ yield MetricSlot(path, high, "decimal128", 1)
178
+
179
+
180
+ def _decode_deltas(data: memoryview, expected_count: int) -> list[int]:
181
+ values: list[int] = []
182
+ position = 0
183
+ while len(values) < expected_count:
184
+ value, position = _read_varint(data, position)
185
+ if value:
186
+ values.append(value)
187
+ continue
188
+ run_minus_one, position = _read_varint(data, position)
189
+ run_length = run_minus_one + 1
190
+ if len(values) + run_length > expected_count:
191
+ raise FTDCDecodeError("zero run exceeds declared metric data")
192
+ values.extend([0] * run_length)
193
+ if position != len(data):
194
+ raise FTDCDecodeError("unexpected bytes after compressed metric data")
195
+ return values
196
+
197
+
198
+ def _read_varint(data: memoryview, position: int) -> tuple[int, int]:
199
+ value = 0
200
+ for shift in range(0, 70, 7):
201
+ if position >= len(data):
202
+ raise FTDCDecodeError("truncated varint metric data")
203
+ byte = data[position]
204
+ position += 1
205
+ value |= (byte & 0x7F) << shift
206
+ if not byte & 0x80:
207
+ if value > _UINT64_MASK:
208
+ raise FTDCDecodeError("varint exceeds uint64")
209
+ return value, position
210
+ raise FTDCDecodeError("varint exceeds uint64")
211
+
212
+
213
+ def _as_signed(value: int) -> int:
214
+ return value if value < (1 << 63) else value - (1 << 64)
@@ -0,0 +1,13 @@
1
+ """Exceptions raised by pyftdc."""
2
+
3
+
4
+ class FTDCError(Exception):
5
+ """Base class for pyftdc errors."""
6
+
7
+
8
+ class FTDCDecodeError(FTDCError):
9
+ """An FTDC file or compressed metric chunk is invalid."""
10
+
11
+
12
+ class MetricNotFoundError(FTDCError, KeyError):
13
+ """The requested metric does not occur in the source."""
@@ -0,0 +1,15 @@
1
+ """Public value objects."""
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime
5
+ from typing import TypeAlias
6
+
7
+ MetricValue: TypeAlias = int | float | bool
8
+
9
+
10
+ @dataclass(frozen=True, slots=True)
11
+ class DataPoint:
12
+ """One metric observation."""
13
+
14
+ timestamp: datetime
15
+ value: MetricValue
@@ -0,0 +1,163 @@
1
+ """High-level FTDC metric query API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ from collections.abc import Iterator
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+
10
+ from pyftdc._codec import (
11
+ DecodedChunk,
12
+ decode_metric_document,
13
+ iter_bson_documents,
14
+ timestamp_for_row,
15
+ value_for_slot,
16
+ )
17
+ from pyftdc.exceptions import FTDCError, MetricNotFoundError
18
+ from pyftdc.models import DataPoint
19
+
20
+
21
+ class FTDCReader:
22
+ """Read metrics from one FTDC file or a ``diagnostic.data`` directory."""
23
+
24
+ def __init__(self, source: str | Path) -> None:
25
+ self.source = Path(source)
26
+ if not self.source.exists():
27
+ raise FileNotFoundError(self.source)
28
+ if not self.source.is_file() and not self.source.is_dir():
29
+ raise FTDCError(f"FTDC source is not a regular file or directory: {self.source}")
30
+
31
+ def get_metric(
32
+ self,
33
+ name: set[str],
34
+ start: datetime | None = None,
35
+ end: datetime | None = None,
36
+ sample_rate: float = 1.0,
37
+ ) -> dict[str, list[DataPoint]]:
38
+ """Return sampled observations by metric name in the inclusive UTC timespan."""
39
+
40
+ requested_names = set(name)
41
+ if "" in requested_names:
42
+ raise ValueError("metric names must not be empty")
43
+ start_utc = _as_utc(start, "start") if start is not None else None
44
+ end_utc = _as_utc(end, "end") if end is not None else None
45
+ if start_utc is not None and end_utc is not None and start_utc > end_utc:
46
+ raise ValueError("start must be before or equal to end")
47
+ if not math.isfinite(sample_rate) or not 0 < sample_rate <= 1:
48
+ raise ValueError("sample_rate must be greater than 0 and at most 1")
49
+
50
+ found_names: set[str] = set()
51
+ point_numbers: dict[str, int] = {}
52
+ points_by_name: dict[str, dict[datetime, DataPoint]] = {}
53
+ for chunk in self._metric_chunks(start_utc, end_utc):
54
+ matching_slots = {
55
+ slot.path: (index, slot)
56
+ for index, slot in enumerate(chunk.slots)
57
+ if slot.part == 0 and (not requested_names or slot.path in requested_names)
58
+ }
59
+ if not matching_slots:
60
+ continue
61
+ found_names.update(matching_slots)
62
+ for metric_name in matching_slots:
63
+ point_numbers.setdefault(metric_name, 0)
64
+ points_by_name.setdefault(metric_name, {})
65
+ for row in chunk.rows:
66
+ timestamp = timestamp_for_row(chunk, row)
67
+ if (start_utc is None or start_utc <= timestamp) and (
68
+ end_utc is None or timestamp <= end_utc
69
+ ):
70
+ for metric_name, (metric_index, slot) in matching_slots.items():
71
+ point_number = point_numbers[metric_name] + 1
72
+ point_numbers[metric_name] = point_number
73
+ if int(point_number * sample_rate) == int((point_number - 1) * sample_rate):
74
+ continue
75
+ points_by_name[metric_name][timestamp] = DataPoint(
76
+ timestamp=timestamp,
77
+ value=value_for_slot(slot, row[metric_index]),
78
+ )
79
+
80
+ missing_names = requested_names - found_names
81
+ if missing_names:
82
+ raise MetricNotFoundError(sorted(missing_names)[0])
83
+ return {
84
+ metric_name: [points[timestamp] for timestamp in sorted(points)]
85
+ for metric_name, points in sorted(points_by_name.items())
86
+ }
87
+
88
+ query = get_metric
89
+
90
+ def list_metrics(self) -> list[str]:
91
+ """Return sorted dotted names for numeric fields in the source."""
92
+
93
+ names: set[str] = set()
94
+ for chunk in self._metric_chunks():
95
+ names.update(slot.path for slot in chunk.slots)
96
+ return sorted(names)
97
+
98
+ def _metric_chunks(
99
+ self,
100
+ start: datetime | None = None,
101
+ end: datetime | None = None,
102
+ ) -> Iterator[DecodedChunk]:
103
+ for path in self._paths(start, end):
104
+ with path.open("rb") as stream:
105
+ for document in iter_bson_documents(stream, path):
106
+ if document.get("type") == 1:
107
+ yield decode_metric_document(document)
108
+
109
+ def _paths(
110
+ self,
111
+ start: datetime | None = None,
112
+ end: datetime | None = None,
113
+ ) -> list[Path]:
114
+ if self.source.is_file():
115
+ return [self.source]
116
+ paths = sorted(
117
+ path
118
+ for path in self.source.glob("metrics.*")
119
+ if path.is_file() and not path.name.endswith(".tmp")
120
+ )
121
+ times = {path: _time_from_filename(path) for path in paths}
122
+ timestamped = [file_time for file_time in times.values() if file_time is not None]
123
+ if not timestamped:
124
+ return paths
125
+
126
+ first_time = min(timestamped)
127
+ lower_file_time: datetime | None = None
128
+ if start is not None:
129
+ preceding = [file_time for file_time in timestamped if file_time <= start]
130
+ lower_file_time = max(preceding, default=first_time)
131
+
132
+ upper_file_time = end
133
+ if end is not None and end < first_time:
134
+ upper_file_time = first_time
135
+
136
+ return [
137
+ path
138
+ for path in paths
139
+ if (file_time := times[path]) is None
140
+ or (
141
+ (lower_file_time is None or lower_file_time <= file_time)
142
+ and (upper_file_time is None or file_time <= upper_file_time)
143
+ )
144
+ ]
145
+
146
+
147
+ def _as_utc(value: datetime, label: str) -> datetime:
148
+ if value.tzinfo is None or value.utcoffset() is None:
149
+ raise ValueError(f"{label} must be timezone-aware")
150
+ return value.astimezone(timezone.utc)
151
+
152
+
153
+ def _time_from_filename(path: Path) -> datetime | None:
154
+ name = path.name.removeprefix("metrics.")
155
+ timestamp, separator, sequence = name.rpartition("-")
156
+ if not separator or not sequence.isdigit():
157
+ return None
158
+ try:
159
+ return datetime.strptime(timestamp, "%Y-%m-%dT%H-%M-%SZ").replace(
160
+ tzinfo=timezone.utc
161
+ )
162
+ except ValueError:
163
+ return None
@@ -0,0 +1 @@
1
+ """Unit tests for pyftdc."""
@@ -0,0 +1,56 @@
1
+ """Small valid FTDC fixture builders for unit tests."""
2
+
3
+ import struct
4
+ import zlib
5
+ from collections.abc import Mapping, Sequence
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from bson import BSON, Binary
10
+
11
+
12
+ def write_ftdc(
13
+ path: Path,
14
+ reference: Mapping[str, Any],
15
+ columns: Sequence[Sequence[int]],
16
+ ) -> Path:
17
+ """Write one type-1 chunk. Columns contain deltas after the reference."""
18
+
19
+ delta_count = len(columns[0]) if columns else 0
20
+ assert all(len(column) == delta_count for column in columns)
21
+ reference_bson = BSON.encode(dict(reference))
22
+ deltas = [value for column in columns for value in column]
23
+ compacted = _rle_zeroes(deltas)
24
+ raw = (
25
+ reference_bson
26
+ + struct.pack("<II", len(columns), delta_count)
27
+ + b"".join(_varint(value) for value in compacted)
28
+ )
29
+ payload = struct.pack("<I", len(raw)) + zlib.compress(raw)
30
+ path.write_bytes(BSON.encode({"type": 1, "data": Binary(payload)}))
31
+ return path
32
+
33
+
34
+ def _rle_zeroes(values: Sequence[int]) -> list[int]:
35
+ output: list[int] = []
36
+ zeroes = 0
37
+ for value in values:
38
+ if value == 0:
39
+ zeroes += 1
40
+ else:
41
+ if zeroes:
42
+ output.extend((0, zeroes - 1))
43
+ zeroes = 0
44
+ output.append(value & ((1 << 64) - 1))
45
+ if zeroes:
46
+ output.extend((0, zeroes - 1))
47
+ return output
48
+
49
+
50
+ def _varint(value: int) -> bytes:
51
+ output = bytearray()
52
+ while value > 0x7F:
53
+ output.append((value & 0x7F) | 0x80)
54
+ value >>= 7
55
+ output.append(value)
56
+ return bytes(output)
@@ -0,0 +1,200 @@
1
+ """Tests for the public FTDC reader API."""
2
+
3
+ from datetime import datetime, timedelta, timezone
4
+ from pathlib import Path
5
+
6
+ import pytest
7
+
8
+ from pyftdc import FTDCReader, MetricNotFoundError
9
+ from tests.conftest import write_ftdc
10
+
11
+
12
+ def test_get_metric_filters_timespan_and_decodes_deltas(tmp_path: Path) -> None:
13
+ """Metric queries decode deltas and apply inclusive time filtering."""
14
+
15
+ start = datetime(2026, 1, 1, tzinfo=timezone.utc)
16
+ write_ftdc(
17
+ tmp_path / "metrics.2026-01-01T00-00-00Z-00000",
18
+ {"start": start, "serverStatus": {"connections": {"current": 10}}},
19
+ [[1000, 1000, 1000], [2, 0, (1 << 64) - 5]],
20
+ )
21
+
22
+ result = FTDCReader(tmp_path).get_metric(
23
+ {"serverStatus.connections.current"},
24
+ start + timedelta(seconds=1),
25
+ start + timedelta(seconds=2),
26
+ )
27
+ points = result["serverStatus.connections.current"]
28
+
29
+ assert [point.timestamp for point in points] == [
30
+ start + timedelta(seconds=1),
31
+ start + timedelta(seconds=2),
32
+ ]
33
+ assert [point.value for point in points] == [12, 12]
34
+
35
+
36
+ def test_get_metric_returns_multiple_metrics(tmp_path: Path) -> None:
37
+ """A query returns a separate ordered point list for each requested metric."""
38
+
39
+ start = datetime(2026, 1, 1, tzinfo=timezone.utc)
40
+ path = write_ftdc(
41
+ tmp_path / "metrics.interim",
42
+ {"start": start, "value": 1, "other": 10},
43
+ [[1000, 1000], [1, 1], [2, 2]],
44
+ )
45
+
46
+ result = FTDCReader(path).get_metric({"value", "other"}, start, start + timedelta(seconds=2))
47
+
48
+ assert list(result) == ["other", "value"]
49
+ assert [point.value for point in result["value"]] == [1, 2, 3]
50
+ assert [point.value for point in result["other"]] == [10, 12, 14]
51
+
52
+
53
+ def test_omitted_timespan_reads_earliest_through_latest_in_folder(tmp_path: Path) -> None:
54
+ """Omitted bounds include every sample across all files in the folder."""
55
+
56
+ start = datetime(2026, 1, 1, tzinfo=timezone.utc)
57
+ write_ftdc(tmp_path / "metrics.1", {"start": start, "value": 1}, [[], []])
58
+ write_ftdc(
59
+ tmp_path / "metrics.2",
60
+ {"start": start + timedelta(hours=1), "value": 2},
61
+ [[], []],
62
+ )
63
+
64
+ result = FTDCReader(tmp_path).get_metric({"value"})
65
+
66
+ assert [point.timestamp for point in result["value"]] == [
67
+ start,
68
+ start + timedelta(hours=1),
69
+ ]
70
+ assert [point.value for point in result["value"]] == [1, 2]
71
+
72
+
73
+ def test_each_timespan_bound_can_be_omitted(tmp_path: Path) -> None:
74
+ """Either omitted bound expands to the corresponding archive endpoint."""
75
+
76
+ start = datetime(2026, 1, 1, tzinfo=timezone.utc)
77
+ path = write_ftdc(
78
+ tmp_path / "metrics.interim",
79
+ {"start": start, "value": 0},
80
+ [[1000, 1000], [1, 1]],
81
+ )
82
+ reader = FTDCReader(path)
83
+
84
+ through_middle = reader.get_metric({"value"}, end=start + timedelta(seconds=1))
85
+ from_middle = reader.get_metric({"value"}, start=start + timedelta(seconds=1))
86
+
87
+ assert [point.value for point in through_middle["value"]] == [0, 1]
88
+ assert [point.value for point in from_middle["value"]] == [1, 2]
89
+
90
+
91
+ def test_start_bound_skips_older_timestamped_files(tmp_path: Path) -> None:
92
+ """Files before the start candidate are not opened."""
93
+
94
+ start = datetime(2026, 1, 2, tzinfo=timezone.utc)
95
+ (tmp_path / "metrics.2026-01-01T00-00-00Z-00000").write_bytes(b"not BSON")
96
+ write_ftdc(
97
+ tmp_path / "metrics.2026-01-02T00-00-00Z-00000",
98
+ {"start": start, "value": 1},
99
+ [[], []],
100
+ )
101
+
102
+ result = FTDCReader(tmp_path).get_metric({"value"}, start=start)
103
+
104
+ assert [point.value for point in result["value"]] == [1]
105
+
106
+
107
+ def test_end_bound_skips_newer_timestamped_files(tmp_path: Path) -> None:
108
+ """Files starting after the end bound are not opened."""
109
+
110
+ start = datetime(2026, 1, 1, tzinfo=timezone.utc)
111
+ write_ftdc(
112
+ tmp_path / "metrics.2026-01-01T00-00-00Z-00000",
113
+ {"start": start, "value": 1},
114
+ [[], []],
115
+ )
116
+ (tmp_path / "metrics.2026-01-02T00-00-00Z-00000").write_bytes(b"not BSON")
117
+
118
+ result = FTDCReader(tmp_path).get_metric({"value"}, end=start)
119
+
120
+ assert [point.value for point in result["value"]] == [1]
121
+
122
+
123
+ def test_empty_names_returns_all_metrics(tmp_path: Path) -> None:
124
+ """An empty name set selects every metric in the archive."""
125
+
126
+ start = datetime(2026, 1, 1, tzinfo=timezone.utc)
127
+ path = write_ftdc(
128
+ tmp_path / "metrics.interim",
129
+ {"start": start, "value": 1, "other": 10},
130
+ [[1000], [1], [2]],
131
+ )
132
+
133
+ result = FTDCReader(path).get_metric(set(), start, start + timedelta(seconds=1))
134
+
135
+ assert set(result) == {"start", "value", "other"}
136
+ assert all(len(points) == 2 for points in result.values())
137
+
138
+
139
+ def test_missing_metric_raises(tmp_path: Path) -> None:
140
+ """A requested metric absent from every chunk raises a specific error."""
141
+
142
+ start = datetime(2026, 1, 1, tzinfo=timezone.utc)
143
+ path = write_ftdc(tmp_path / "metrics.interim", {"start": start, "value": 1}, [[], []])
144
+
145
+ with pytest.raises(MetricNotFoundError):
146
+ FTDCReader(path).get_metric({"value", "other"}, start, start)
147
+
148
+
149
+ def test_rejects_empty_metric_name(tmp_path: Path) -> None:
150
+ """An empty string is not a valid requested metric name."""
151
+
152
+ start = datetime(2026, 1, 1, tzinfo=timezone.utc)
153
+
154
+ with pytest.raises(ValueError, match="metric names"):
155
+ FTDCReader(tmp_path).get_metric({""}, start, start)
156
+
157
+
158
+ def test_rejects_naive_timespan(tmp_path: Path) -> None:
159
+ """Timespan bounds must include timezone information."""
160
+
161
+ reader = FTDCReader(tmp_path)
162
+
163
+ with pytest.raises(ValueError, match="timezone-aware"):
164
+ reader.get_metric({"value"}, datetime(2026, 1, 1), datetime(2026, 1, 2))
165
+
166
+
167
+ def test_get_metric_samples_points(tmp_path: Path) -> None:
168
+ """A sample rate uniformly skips points for every requested metric."""
169
+
170
+ start = datetime(2026, 1, 1, tzinfo=timezone.utc)
171
+ path = write_ftdc(
172
+ tmp_path / "metrics.interim",
173
+ {"start": start, "value": 0, "other": 10},
174
+ [[1000] * 9, [1] * 9, [2] * 9],
175
+ )
176
+
177
+ result = FTDCReader(path).get_metric(
178
+ {"value", "other"},
179
+ start,
180
+ start + timedelta(seconds=9),
181
+ sample_rate=0.1,
182
+ )
183
+
184
+ assert [(point.timestamp, point.value) for point in result["value"]] == [
185
+ (start + timedelta(seconds=9), 9)
186
+ ]
187
+ assert [(point.timestamp, point.value) for point in result["other"]] == [
188
+ (start + timedelta(seconds=9), 28)
189
+ ]
190
+
191
+
192
+ @pytest.mark.parametrize("sample_rate", [0, -0.1, 1.1, float("nan"), float("inf")])
193
+ def test_rejects_invalid_sample_rate(tmp_path: Path, sample_rate: float) -> None:
194
+ """A sample rate must be finite and in the interval (0, 1]."""
195
+
196
+ reader = FTDCReader(tmp_path)
197
+ start = datetime(2026, 1, 1, tzinfo=timezone.utc)
198
+
199
+ with pytest.raises(ValueError, match="sample_rate"):
200
+ reader.get_metric({"value"}, start, start, sample_rate=sample_rate)