PyPI - pymongoftdc - Versions diffs - 0.1.0__tar.gz - Mend

pymongoftdc 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

pymongoftdc-0.1.0/.github/workflows/ci.yml +29 -0
pymongoftdc-0.1.0/.github/workflows/publish.yml +81 -0
pymongoftdc-0.1.0/.gitignore +221 -0
pymongoftdc-0.1.0/.vscode/launch.json +15 -0
pymongoftdc-0.1.0/LICENSE +21 -0
pymongoftdc-0.1.0/Makefile +19 -0
pymongoftdc-0.1.0/PKG-INFO +98 -0
pymongoftdc-0.1.0/README.md +62 -0
pymongoftdc-0.1.0/pyproject.toml +53 -0
pymongoftdc-0.1.0/src/pyftdc/__init__.py +13 -0
pymongoftdc-0.1.0/src/pyftdc/_codec.py +214 -0
pymongoftdc-0.1.0/src/pyftdc/exceptions.py +13 -0
pymongoftdc-0.1.0/src/pyftdc/models.py +15 -0
pymongoftdc-0.1.0/src/pyftdc/reader.py +163 -0
pymongoftdc-0.1.0/tests/__init__.py +1 -0
pymongoftdc-0.1.0/tests/conftest.py +56 -0
pymongoftdc-0.1.0/tests/test_reader.py +200 -0

pymongoftdc-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,29 @@
+name: CI
+on:
+  push:
+    branches:
+      - main
+permissions:
+  contents: read
+jobs:
+  checks:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v6
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.10"
+          cache: pip
+          cache-dependency-path: pyproject.toml
+      - name: Lint
+        run: make lint
+      - name: Test
+        run: make test

pymongoftdc-0.1.0/.github/workflows/publish.yml ADDED Viewed

@@ -0,0 +1,81 @@
+name: Publish to PyPI
+on:
+  release:
+    types:
+      - published
+permissions:
+  contents: read
+jobs:
+  build:
+    name: Build distributions
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out release
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ github.event.release.tag_name }}
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+          cache: pip
+          cache-dependency-path: pyproject.toml
+      - name: Install build tools
+        run: python -m pip install --upgrade build twine
+      - name: Verify release version
+        env:
+          RELEASE_TAG: ${{ github.event.release.tag_name }}
+        run: |
+          python - <<'PY'
+          import os
+          import tomllib
+          with open("pyproject.toml", "rb") as stream:
+              package_version = tomllib.load(stream)["project"]["version"]
+          release_version = os.environ["RELEASE_TAG"].removeprefix("v")
+          if release_version != package_version:
+              raise SystemExit(
+                  f"release tag version {release_version!r} does not match "
+                  f"pyproject.toml version {package_version!r}"
+              )
+          PY
+      - name: Build distributions
+        run: python -m build
+      - name: Check distributions
+        run: python -m twine check dist/*
+      - name: Upload distributions
+        uses: actions/upload-artifact@v6
+        with:
+          name: python-package-distributions
+          path: dist/
+          if-no-files-found: error
+          retention-days: 1
+  publish:
+    name: Publish distributions
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/pyftdc
+    permissions:
+      id-token: write
+    steps:
+      - name: Download distributions
+        uses: actions/download-artifact@v6
+        with:
+          name: python-package-distributions
+          path: dist/
+      - name: Publish distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

pymongoftdc-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,221 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#   Usually these files are written by a python script from a template
+#   before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+# Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+# uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+# poetry.lock
+# poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+# pdm.lock
+# pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+# pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# Redis
+*.rdb
+*.aof
+*.pid
+# RabbitMQ
+mnesia/
+rabbitmq/
+rabbitmq-data/
+# ActiveMQ
+activemq-data/
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#   JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#   be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#   and can be added to the global gitignore or merged into this file.  For a more nuclear
+#   option (not recommended) you can uncomment the following to ignore the entire idea folder.
+# .idea/
+# Abstra
+#   Abstra is an AI-powered process automation framework.
+#   Ignore directories containing user credentials, local state, and settings.
+#   Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#   Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#   that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#   and can be added to the global gitignore or merged into this file. However, if you prefer,
+#   you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Temporary file for partial code execution
+tempCodeRunnerFile.py
+# Ruff stuff:
+.ruff_cache/
+# Qwen Code
+.qwen/
+# PyPI configuration file
+.pypirc
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Streamlit
+.streamlit/secrets.toml

pymongoftdc-0.1.0/.vscode/launch.json ADDED Viewed

@@ -0,0 +1,15 @@
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Run All Unit Tests",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "pytest",
+      "python": "${workspaceFolder}/.venv/bin/python",
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
+      "justMyCode": false
+    }
+  ]
+}

pymongoftdc-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Yaoxing
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

pymongoftdc-0.1.0/Makefile ADDED Viewed

@@ -0,0 +1,19 @@
+PYTHON ?= python3
+VENV := .venv
+VENV_PYTHON := $(VENV)/bin/python
+.PHONY: venv deps lint test
+venv:
+	@test -x "$(VENV_PYTHON)" || "$(PYTHON)" -m venv "$(VENV)"
+deps: venv
+	"$(VENV_PYTHON)" -m pip install -e '.[test]'
+lint: deps
+	"$(VENV_PYTHON)" -m ruff check src tests
+	"$(VENV_PYTHON)" -m pylint src tests
+	"$(VENV_PYTHON)" -m pyright
+test: deps
+	"$(VENV_PYTHON)" -m pytest

pymongoftdc-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,98 @@
+Metadata-Version: 2.4
+Name: pymongoftdc
+Version: 0.1.0
+Summary: A typed reader for MongoDB FTDC metric archives
+License: MIT License
+        Copyright (c) 2026 Yaoxing
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Requires-Python: >=3.10
+Requires-Dist: pymongo<5,>=4.6
+Provides-Extra: test
+Requires-Dist: pylint>=3; extra == 'test'
+Requires-Dist: pyright>=1.1.400; extra == 'test'
+Requires-Dist: pytest-cov>=5; extra == 'test'
+Requires-Dist: pytest>=8; extra == 'test'
+Requires-Dist: ruff>=0.11; extra == 'test'
+Description-Content-Type: text/markdown
+# pymongoftdc
+[![CI](https://github.com/zhangyaoxing/pyftdc/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/zhangyaoxing/pyftdc/actions/workflows/ci.yml)
+[![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
+`pymongoftdc` reads numeric time-series metrics directly from MongoDB Full-Time
+Diagnostic Data Capture (FTDC) archive files.
+## Install
+```bash
+python -m pip install -e .
+```
+For development:
+```bash
+python -m pip install -e '.[test]'
+pytest
+```
+## Use
+```python
+from datetime import datetime, timezone
+from pyftdc import FTDCReader
+reader = FTDCReader("/var/lib/mongo/diagnostic.data")
+metrics = reader.get_metric(
+    {"serverStatus.connections.current"},
+    start=datetime(2026, 1, 1, tzinfo=timezone.utc),
+    end=datetime(2026, 1, 1, 1, tzinfo=timezone.utc),
+    sample_rate=0.1,
+)
+points = metrics["serverStatus.connections.current"]
+```
+The source may be one `metrics.*` file or a `diagnostic.data` directory.
+Timespan endpoints are inclusive and must be timezone-aware. Omit `start` or
+`end` to use the earliest or latest timestamp in the source. The result maps each
+requested name to points ordered by UTC timestamp. Pass an empty set to read every
+metric. `sample_rate` must be greater than 0 and at most 1;
+for example, `0.1` returns approximately 10% of points. Its default is `1.0`.
+`query()` is an alias for `get_metric()`.
+Use `reader.list_metrics()` to discover dotted metric paths. A missing requested
+metric raises `MetricNotFoundError`; an invalid archive raises `FTDCDecodeError`.
+## Project layout
+```text
+src/pyftdc/
+  _codec.py       BSON framing and FTDC decompression
+  reader.py       public query API
+  models.py       returned value objects
+  exceptions.py   library-specific errors
+tests/             pytest tests and fixture builders
+```
+The reader supports BSON-framed type-1 metric chunks using MongoDB's
+delta/RLE/varint/zlib encoding. Metadata documents are safely skipped.

pymongoftdc-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,62 @@
+# pymongoftdc
+[![CI](https://github.com/zhangyaoxing/pyftdc/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/zhangyaoxing/pyftdc/actions/workflows/ci.yml)
+[![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
+`pymongoftdc` reads numeric time-series metrics directly from MongoDB Full-Time
+Diagnostic Data Capture (FTDC) archive files.
+## Install
+```bash
+python -m pip install -e .
+```
+For development:
+```bash
+python -m pip install -e '.[test]'
+pytest
+```
+## Use
+```python
+from datetime import datetime, timezone
+from pyftdc import FTDCReader
+reader = FTDCReader("/var/lib/mongo/diagnostic.data")
+metrics = reader.get_metric(
+    {"serverStatus.connections.current"},
+    start=datetime(2026, 1, 1, tzinfo=timezone.utc),
+    end=datetime(2026, 1, 1, 1, tzinfo=timezone.utc),
+    sample_rate=0.1,
+)
+points = metrics["serverStatus.connections.current"]
+```
+The source may be one `metrics.*` file or a `diagnostic.data` directory.
+Timespan endpoints are inclusive and must be timezone-aware. Omit `start` or
+`end` to use the earliest or latest timestamp in the source. The result maps each
+requested name to points ordered by UTC timestamp. Pass an empty set to read every
+metric. `sample_rate` must be greater than 0 and at most 1;
+for example, `0.1` returns approximately 10% of points. Its default is `1.0`.
+`query()` is an alias for `get_metric()`.
+Use `reader.list_metrics()` to discover dotted metric paths. A missing requested
+metric raises `MetricNotFoundError`; an invalid archive raises `FTDCDecodeError`.
+## Project layout
+```text
+src/pyftdc/
+  _codec.py       BSON framing and FTDC decompression
+  reader.py       public query API
+  models.py       returned value objects
+  exceptions.py   library-specific errors
+tests/             pytest tests and fixture builders
+```
+The reader supports BSON-framed type-1 metric chunks using MongoDB's
+delta/RLE/varint/zlib encoding. Metadata documents are safely skipped.

pymongoftdc-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,53 @@
+[build-system]
+requires = ["hatchling>=1.25"]
+build-backend = "hatchling.build"
+[project]
+name = "pymongoftdc"
+version = "0.1.0"
+description = "A typed reader for MongoDB FTDC metric archives"
+readme = "README.md"
+requires-python = ">=3.10"
+license = { file = "LICENSE" }
+dependencies = ["pymongo>=4.6,<5"]
+[project.optional-dependencies]
+test = [
+  "pylint>=3",
+  "pyright>=1.1.400",
+  "pytest>=8",
+  "pytest-cov>=5",
+  "ruff>=0.11",
+]
+[tool.hatch.build.targets.wheel]
+packages = ["src/pyftdc"]
+[tool.pytest.ini_options]
+addopts = "-ra --strict-markers --strict-config"
+testpaths = ["tests"]
+[tool.coverage.run]
+branch = true
+source = ["pyftdc"]
+[tool.coverage.report]
+show_missing = true
+fail_under = 70
+[tool.ruff]
+line-length = 100
+target-version = "py310"
+[tool.ruff.lint]
+select = ["E", "F", "I", "UP", "B", "SIM"]
+[tool.pyright]
+include = ["src", "tests"]
+pythonVersion = "3.10"
+typeCheckingMode = "strict"
+venv = ".venv"
+venvPath = "."
+[tool.pylint.messages_control]
+disable = ["R0914"]

pymongoftdc-0.1.0/src/pyftdc/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Read time-series metrics from MongoDB FTDC archives."""
+from pyftdc.exceptions import FTDCDecodeError, FTDCError, MetricNotFoundError
+from pyftdc.models import DataPoint
+from pyftdc.reader import FTDCReader
+__all__ = [
+    "DataPoint",
+    "FTDCDecodeError",
+    "FTDCError",
+    "FTDCReader",
+    "MetricNotFoundError",
+]

pymongoftdc-0.1.0/src/pyftdc/_codec.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""Low-level BSON framing and FTDC metric chunk decoding."""
+from __future__ import annotations
+import math
+import struct
+import zlib
+from collections.abc import Iterator, Mapping, Sequence
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, BinaryIO, cast
+from bson import BSON
+from bson.codec_options import CodecOptions
+from bson.decimal128 import Decimal128
+from bson.timestamp import Timestamp
+from pyftdc.exceptions import FTDCDecodeError
+from pyftdc.models import MetricValue
+_UINT64_MASK = (1 << 64) - 1
+_MIN_BSON_SIZE = 5
+_CODEC_OPTIONS: CodecOptions[Any] = CodecOptions(tz_aware=True, tzinfo=timezone.utc)
+@dataclass(frozen=True, slots=True)
+class MetricSlot:
+    """A compressed numeric field and its location in a reference document."""
+    path: str
+    initial: int
+    kind: str
+    part: int = 0
+@dataclass(frozen=True, slots=True)
+class DecodedChunk:
+    """A reference document and its decoded metric rows."""
+    reference: Mapping[str, Any]
+    slots: tuple[MetricSlot, ...]
+    rows: tuple[tuple[int, ...], ...]
+def iter_bson_documents(stream: BinaryIO, source: Path) -> Iterator[Mapping[str, Any]]:
+    """Yield the concatenated BSON documents in an FTDC file."""
+    while prefix := stream.read(4):
+        if len(prefix) != 4:
+            raise FTDCDecodeError(f"{source}: truncated BSON length")
+        (length,) = struct.unpack("<I", prefix)
+        if length == 0:  # Zero bytes terminate an interim file.
+            return
+        if length < _MIN_BSON_SIZE:
+            raise FTDCDecodeError(f"{source}: invalid BSON length {length}")
+        remainder = stream.read(length - 4)
+        if len(remainder) != length - 4:
+            raise FTDCDecodeError(f"{source}: truncated BSON document")
+        try:
+            yield BSON(prefix + remainder).decode(codec_options=_CODEC_OPTIONS)
+        except Exception as exc:
+            raise FTDCDecodeError(f"{source}: invalid BSON document") from exc
+def decode_metric_document(document: Mapping[str, Any]) -> DecodedChunk:
+    """Decode one outer FTDC document whose type is 1."""
+    raw = _decompress_payload(document)
+    if len(raw) < _MIN_BSON_SIZE:
+        raise FTDCDecodeError("metric chunk has no reference document")
+    (reference_size,) = struct.unpack_from("<I", raw)
+    if reference_size < _MIN_BSON_SIZE or reference_size + 8 > len(raw):
+        raise FTDCDecodeError("invalid reference document size")
+    try:
+        reference = BSON(raw[:reference_size]).decode(codec_options=_CODEC_OPTIONS)
+    except Exception as exc:
+        raise FTDCDecodeError("invalid metric reference document") from exc
+    metric_count, delta_count = struct.unpack_from("<II", raw, reference_size)
+    slots = tuple(_extract_slots(reference))
+    if len(slots) != metric_count:
+        raise FTDCDecodeError(
+            f"reference contains {len(slots)} numeric slots, chunk declares {metric_count}"
+        )
+    encoded = memoryview(raw)[reference_size + 8 :]
+    flat_deltas = _decode_deltas(encoded, metric_count * delta_count)
+    current = [slot.initial for slot in slots]
+    rows: list[tuple[int, ...]] = [tuple(current)]
+    for sample_index in range(delta_count):
+        for metric_index in range(metric_count):
+            offset = metric_index * delta_count + sample_index
+            current[metric_index] = (current[metric_index] + flat_deltas[offset]) & _UINT64_MASK
+        rows.append(tuple(current))
+    return DecodedChunk(reference, slots, tuple(rows))
+def _decompress_payload(document: Mapping[str, Any]) -> bytes:
+    """Validate and decompress the binary payload of a metric document."""
+    payload = document.get("data", document.get("doc"))
+    if not isinstance(payload, (bytes, bytearray, memoryview)):
+        raise FTDCDecodeError("metric document has no binary 'data' or 'doc' field")
+    data = payload.tobytes() if isinstance(payload, memoryview) else bytes(payload)
+    if len(data) < 5:
+        raise FTDCDecodeError("compressed metric chunk is too short")
+    (expected_size,) = struct.unpack_from("<I", data)
+    try:
+        raw = zlib.decompress(data[4:])
+    except zlib.error as exc:
+        raise FTDCDecodeError("invalid zlib metric payload") from exc
+    if len(raw) != expected_size:
+        raise FTDCDecodeError(
+            f"metric chunk size mismatch: expected {expected_size}, got {len(raw)}"
+        )
+    return raw
+def value_for_slot(slot: MetricSlot, raw_value: int) -> MetricValue:
+    """Restore a compressed integer to the reference field's useful Python type."""
+    signed = _as_signed(raw_value)
+    if slot.kind == "bool":
+        return bool(raw_value)
+    if slot.kind == "float":
+        return float(signed)
+    return signed
+def timestamp_for_row(chunk: DecodedChunk, row: Sequence[int]) -> datetime:
+    """Return the top-level collection start time for a decoded sample."""
+    for index, slot in enumerate(chunk.slots):
+        if slot.path == "start" and slot.kind == "datetime":
+            return datetime.fromtimestamp(_as_signed(row[index]) / 1000, tz=timezone.utc)
+    raise FTDCDecodeError("metric reference document has no top-level datetime 'start'")
+def _extract_slots(value: object, path: str = "") -> Iterator[MetricSlot]:
+    if isinstance(value, Mapping):
+        mapping = cast(Mapping[object, object], value)
+        for name, child in mapping.items():
+            child_path = f"{path}.{name}" if path else str(name)
+            yield from _extract_slots(child, child_path)
+        return
+    if isinstance(value, (list, tuple)):
+        sequence = cast(Sequence[object], value)
+        for index, child in enumerate(sequence):
+            child_path = f"{path}.{index}" if path else str(index)
+            yield from _extract_slots(child, child_path)
+        return
+    if isinstance(value, bool):
+        yield MetricSlot(path, int(value), "bool")
+    elif isinstance(value, int):
+        yield MetricSlot(path, value & _UINT64_MASK, "int")
+    elif isinstance(value, float):
+        number = (
+            0
+            if math.isnan(value)
+            else max(-(1 << 63), min((1 << 63) - 1, int(value)))
+        )
+        yield MetricSlot(path, number & _UINT64_MASK, "float")
+    elif isinstance(value, datetime):
+        moment = value if value.tzinfo is not None else value.replace(tzinfo=timezone.utc)
+        yield MetricSlot(path, int(moment.timestamp() * 1000) & _UINT64_MASK, "datetime")
+    elif isinstance(value, Timestamp):
+        yield MetricSlot(path, value.time & _UINT64_MASK, "timestamp", 0)
+        yield MetricSlot(path, value.inc & _UINT64_MASK, "timestamp", 1)
+    elif isinstance(value, Decimal128):
+        low, high = struct.unpack("<QQ", value.bid)
+        yield MetricSlot(path, low, "decimal128", 0)
+        yield MetricSlot(path, high, "decimal128", 1)
+def _decode_deltas(data: memoryview, expected_count: int) -> list[int]:
+    values: list[int] = []
+    position = 0
+    while len(values) < expected_count:
+        value, position = _read_varint(data, position)
+        if value:
+            values.append(value)
+            continue
+        run_minus_one, position = _read_varint(data, position)
+        run_length = run_minus_one + 1
+        if len(values) + run_length > expected_count:
+            raise FTDCDecodeError("zero run exceeds declared metric data")
+        values.extend([0] * run_length)
+    if position != len(data):
+        raise FTDCDecodeError("unexpected bytes after compressed metric data")
+    return values
+def _read_varint(data: memoryview, position: int) -> tuple[int, int]:
+    value = 0
+    for shift in range(0, 70, 7):
+        if position >= len(data):
+            raise FTDCDecodeError("truncated varint metric data")
+        byte = data[position]
+        position += 1
+        value |= (byte & 0x7F) << shift
+        if not byte & 0x80:
+            if value > _UINT64_MASK:
+                raise FTDCDecodeError("varint exceeds uint64")
+            return value, position
+    raise FTDCDecodeError("varint exceeds uint64")
+def _as_signed(value: int) -> int:
+    return value if value < (1 << 63) else value - (1 << 64)

pymongoftdc-0.1.0/src/pyftdc/exceptions.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Exceptions raised by pyftdc."""
+class FTDCError(Exception):
+    """Base class for pyftdc errors."""
+class FTDCDecodeError(FTDCError):
+    """An FTDC file or compressed metric chunk is invalid."""
+class MetricNotFoundError(FTDCError, KeyError):
+    """The requested metric does not occur in the source."""

pymongoftdc-0.1.0/src/pyftdc/models.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Public value objects."""
+from dataclasses import dataclass
+from datetime import datetime
+from typing import TypeAlias
+MetricValue: TypeAlias = int | float | bool
+@dataclass(frozen=True, slots=True)
+class DataPoint:
+    """One metric observation."""
+    timestamp: datetime
+    value: MetricValue

pymongoftdc-0.1.0/src/pyftdc/reader.py ADDED Viewed

@@ -0,0 +1,163 @@
+"""High-level FTDC metric query API."""
+from __future__ import annotations
+import math
+from collections.abc import Iterator
+from datetime import datetime, timezone
+from pathlib import Path
+from pyftdc._codec import (
+    DecodedChunk,
+    decode_metric_document,
+    iter_bson_documents,
+    timestamp_for_row,
+    value_for_slot,
+)
+from pyftdc.exceptions import FTDCError, MetricNotFoundError
+from pyftdc.models import DataPoint
+class FTDCReader:
+    """Read metrics from one FTDC file or a ``diagnostic.data`` directory."""
+    def __init__(self, source: str | Path) -> None:
+        self.source = Path(source)
+        if not self.source.exists():
+            raise FileNotFoundError(self.source)
+        if not self.source.is_file() and not self.source.is_dir():
+            raise FTDCError(f"FTDC source is not a regular file or directory: {self.source}")
+    def get_metric(
+        self,
+        name: set[str],
+        start: datetime | None = None,
+        end: datetime | None = None,
+        sample_rate: float = 1.0,
+    ) -> dict[str, list[DataPoint]]:
+        """Return sampled observations by metric name in the inclusive UTC timespan."""
+        requested_names = set(name)
+        if "" in requested_names:
+            raise ValueError("metric names must not be empty")
+        start_utc = _as_utc(start, "start") if start is not None else None
+        end_utc = _as_utc(end, "end") if end is not None else None
+        if start_utc is not None and end_utc is not None and start_utc > end_utc:
+            raise ValueError("start must be before or equal to end")
+        if not math.isfinite(sample_rate) or not 0 < sample_rate <= 1:
+            raise ValueError("sample_rate must be greater than 0 and at most 1")
+        found_names: set[str] = set()
+        point_numbers: dict[str, int] = {}
+        points_by_name: dict[str, dict[datetime, DataPoint]] = {}
+        for chunk in self._metric_chunks(start_utc, end_utc):
+            matching_slots = {
+                slot.path: (index, slot)
+                for index, slot in enumerate(chunk.slots)
+                if slot.part == 0 and (not requested_names or slot.path in requested_names)
+            }
+            if not matching_slots:
+                continue
+            found_names.update(matching_slots)
+            for metric_name in matching_slots:
+                point_numbers.setdefault(metric_name, 0)
+                points_by_name.setdefault(metric_name, {})
+            for row in chunk.rows:
+                timestamp = timestamp_for_row(chunk, row)
+                if (start_utc is None or start_utc <= timestamp) and (
+                    end_utc is None or timestamp <= end_utc
+                ):
+                    for metric_name, (metric_index, slot) in matching_slots.items():
+                        point_number = point_numbers[metric_name] + 1
+                        point_numbers[metric_name] = point_number
+                        if int(point_number * sample_rate) == int((point_number - 1) * sample_rate):
+                            continue
+                        points_by_name[metric_name][timestamp] = DataPoint(
+                            timestamp=timestamp,
+                            value=value_for_slot(slot, row[metric_index]),
+                        )
+        missing_names = requested_names - found_names
+        if missing_names:
+            raise MetricNotFoundError(sorted(missing_names)[0])
+        return {
+            metric_name: [points[timestamp] for timestamp in sorted(points)]
+            for metric_name, points in sorted(points_by_name.items())
+        }
+    query = get_metric
+    def list_metrics(self) -> list[str]:
+        """Return sorted dotted names for numeric fields in the source."""
+        names: set[str] = set()
+        for chunk in self._metric_chunks():
+            names.update(slot.path for slot in chunk.slots)
+        return sorted(names)
+    def _metric_chunks(
+        self,
+        start: datetime | None = None,
+        end: datetime | None = None,
+    ) -> Iterator[DecodedChunk]:
+        for path in self._paths(start, end):
+            with path.open("rb") as stream:
+                for document in iter_bson_documents(stream, path):
+                    if document.get("type") == 1:
+                        yield decode_metric_document(document)
+    def _paths(
+        self,
+        start: datetime | None = None,
+        end: datetime | None = None,
+    ) -> list[Path]:
+        if self.source.is_file():
+            return [self.source]
+        paths = sorted(
+            path
+            for path in self.source.glob("metrics.*")
+            if path.is_file() and not path.name.endswith(".tmp")
+        )
+        times = {path: _time_from_filename(path) for path in paths}
+        timestamped = [file_time for file_time in times.values() if file_time is not None]
+        if not timestamped:
+            return paths
+        first_time = min(timestamped)
+        lower_file_time: datetime | None = None
+        if start is not None:
+            preceding = [file_time for file_time in timestamped if file_time <= start]
+            lower_file_time = max(preceding, default=first_time)
+        upper_file_time = end
+        if end is not None and end < first_time:
+            upper_file_time = first_time
+        return [
+            path
+            for path in paths
+            if (file_time := times[path]) is None
+            or (
+                (lower_file_time is None or lower_file_time <= file_time)
+                and (upper_file_time is None or file_time <= upper_file_time)
+            )
+        ]
+def _as_utc(value: datetime, label: str) -> datetime:
+    if value.tzinfo is None or value.utcoffset() is None:
+        raise ValueError(f"{label} must be timezone-aware")
+    return value.astimezone(timezone.utc)
+def _time_from_filename(path: Path) -> datetime | None:
+    name = path.name.removeprefix("metrics.")
+    timestamp, separator, sequence = name.rpartition("-")
+    if not separator or not sequence.isdigit():
+        return None
+    try:
+        return datetime.strptime(timestamp, "%Y-%m-%dT%H-%M-%SZ").replace(
+            tzinfo=timezone.utc
+        )
+    except ValueError:
+        return None

pymongoftdc-0.1.0/tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Unit tests for pyftdc."""

pymongoftdc-0.1.0/tests/conftest.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""Small valid FTDC fixture builders for unit tests."""
+import struct
+import zlib
+from collections.abc import Mapping, Sequence
+from pathlib import Path
+from typing import Any
+from bson import BSON, Binary
+def write_ftdc(
+    path: Path,
+    reference: Mapping[str, Any],
+    columns: Sequence[Sequence[int]],
+) -> Path:
+    """Write one type-1 chunk. Columns contain deltas after the reference."""
+    delta_count = len(columns[0]) if columns else 0
+    assert all(len(column) == delta_count for column in columns)
+    reference_bson = BSON.encode(dict(reference))
+    deltas = [value for column in columns for value in column]
+    compacted = _rle_zeroes(deltas)
+    raw = (
+        reference_bson
+        + struct.pack("<II", len(columns), delta_count)
+        + b"".join(_varint(value) for value in compacted)
+    )
+    payload = struct.pack("<I", len(raw)) + zlib.compress(raw)
+    path.write_bytes(BSON.encode({"type": 1, "data": Binary(payload)}))
+    return path
+def _rle_zeroes(values: Sequence[int]) -> list[int]:
+    output: list[int] = []
+    zeroes = 0
+    for value in values:
+        if value == 0:
+            zeroes += 1
+        else:
+            if zeroes:
+                output.extend((0, zeroes - 1))
+                zeroes = 0
+            output.append(value & ((1 << 64) - 1))
+    if zeroes:
+        output.extend((0, zeroes - 1))
+    return output
+def _varint(value: int) -> bytes:
+    output = bytearray()
+    while value > 0x7F:
+        output.append((value & 0x7F) | 0x80)
+        value >>= 7
+    output.append(value)
+    return bytes(output)

pymongoftdc-0.1.0/tests/test_reader.py ADDED Viewed

@@ -0,0 +1,200 @@
+"""Tests for the public FTDC reader API."""
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+import pytest
+from pyftdc import FTDCReader, MetricNotFoundError
+from tests.conftest import write_ftdc
+def test_get_metric_filters_timespan_and_decodes_deltas(tmp_path: Path) -> None:
+    """Metric queries decode deltas and apply inclusive time filtering."""
+    start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+    write_ftdc(
+        tmp_path / "metrics.2026-01-01T00-00-00Z-00000",
+        {"start": start, "serverStatus": {"connections": {"current": 10}}},
+        [[1000, 1000, 1000], [2, 0, (1 << 64) - 5]],
+    )
+    result = FTDCReader(tmp_path).get_metric(
+        {"serverStatus.connections.current"},
+        start + timedelta(seconds=1),
+        start + timedelta(seconds=2),
+    )
+    points = result["serverStatus.connections.current"]
+    assert [point.timestamp for point in points] == [
+        start + timedelta(seconds=1),
+        start + timedelta(seconds=2),
+    ]
+    assert [point.value for point in points] == [12, 12]
+def test_get_metric_returns_multiple_metrics(tmp_path: Path) -> None:
+    """A query returns a separate ordered point list for each requested metric."""
+    start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+    path = write_ftdc(
+        tmp_path / "metrics.interim",
+        {"start": start, "value": 1, "other": 10},
+        [[1000, 1000], [1, 1], [2, 2]],
+    )
+    result = FTDCReader(path).get_metric({"value", "other"}, start, start + timedelta(seconds=2))
+    assert list(result) == ["other", "value"]
+    assert [point.value for point in result["value"]] == [1, 2, 3]
+    assert [point.value for point in result["other"]] == [10, 12, 14]
+def test_omitted_timespan_reads_earliest_through_latest_in_folder(tmp_path: Path) -> None:
+    """Omitted bounds include every sample across all files in the folder."""
+    start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+    write_ftdc(tmp_path / "metrics.1", {"start": start, "value": 1}, [[], []])
+    write_ftdc(
+        tmp_path / "metrics.2",
+        {"start": start + timedelta(hours=1), "value": 2},
+        [[], []],
+    )
+    result = FTDCReader(tmp_path).get_metric({"value"})
+    assert [point.timestamp for point in result["value"]] == [
+        start,
+        start + timedelta(hours=1),
+    ]
+    assert [point.value for point in result["value"]] == [1, 2]
+def test_each_timespan_bound_can_be_omitted(tmp_path: Path) -> None:
+    """Either omitted bound expands to the corresponding archive endpoint."""
+    start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+    path = write_ftdc(
+        tmp_path / "metrics.interim",
+        {"start": start, "value": 0},
+        [[1000, 1000], [1, 1]],
+    )
+    reader = FTDCReader(path)
+    through_middle = reader.get_metric({"value"}, end=start + timedelta(seconds=1))
+    from_middle = reader.get_metric({"value"}, start=start + timedelta(seconds=1))
+    assert [point.value for point in through_middle["value"]] == [0, 1]
+    assert [point.value for point in from_middle["value"]] == [1, 2]
+def test_start_bound_skips_older_timestamped_files(tmp_path: Path) -> None:
+    """Files before the start candidate are not opened."""
+    start = datetime(2026, 1, 2, tzinfo=timezone.utc)
+    (tmp_path / "metrics.2026-01-01T00-00-00Z-00000").write_bytes(b"not BSON")
+    write_ftdc(
+        tmp_path / "metrics.2026-01-02T00-00-00Z-00000",
+        {"start": start, "value": 1},
+        [[], []],
+    )
+    result = FTDCReader(tmp_path).get_metric({"value"}, start=start)
+    assert [point.value for point in result["value"]] == [1]
+def test_end_bound_skips_newer_timestamped_files(tmp_path: Path) -> None:
+    """Files starting after the end bound are not opened."""
+    start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+    write_ftdc(
+        tmp_path / "metrics.2026-01-01T00-00-00Z-00000",
+        {"start": start, "value": 1},
+        [[], []],
+    )
+    (tmp_path / "metrics.2026-01-02T00-00-00Z-00000").write_bytes(b"not BSON")
+    result = FTDCReader(tmp_path).get_metric({"value"}, end=start)
+    assert [point.value for point in result["value"]] == [1]
+def test_empty_names_returns_all_metrics(tmp_path: Path) -> None:
+    """An empty name set selects every metric in the archive."""
+    start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+    path = write_ftdc(
+        tmp_path / "metrics.interim",
+        {"start": start, "value": 1, "other": 10},
+        [[1000], [1], [2]],
+    )
+    result = FTDCReader(path).get_metric(set(), start, start + timedelta(seconds=1))
+    assert set(result) == {"start", "value", "other"}
+    assert all(len(points) == 2 for points in result.values())
+def test_missing_metric_raises(tmp_path: Path) -> None:
+    """A requested metric absent from every chunk raises a specific error."""
+    start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+    path = write_ftdc(tmp_path / "metrics.interim", {"start": start, "value": 1}, [[], []])
+    with pytest.raises(MetricNotFoundError):
+        FTDCReader(path).get_metric({"value", "other"}, start, start)
+def test_rejects_empty_metric_name(tmp_path: Path) -> None:
+    """An empty string is not a valid requested metric name."""
+    start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+    with pytest.raises(ValueError, match="metric names"):
+        FTDCReader(tmp_path).get_metric({""}, start, start)
+def test_rejects_naive_timespan(tmp_path: Path) -> None:
+    """Timespan bounds must include timezone information."""
+    reader = FTDCReader(tmp_path)
+    with pytest.raises(ValueError, match="timezone-aware"):
+        reader.get_metric({"value"}, datetime(2026, 1, 1), datetime(2026, 1, 2))
+def test_get_metric_samples_points(tmp_path: Path) -> None:
+    """A sample rate uniformly skips points for every requested metric."""
+    start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+    path = write_ftdc(
+        tmp_path / "metrics.interim",
+        {"start": start, "value": 0, "other": 10},
+        [[1000] * 9, [1] * 9, [2] * 9],
+    )
+    result = FTDCReader(path).get_metric(
+        {"value", "other"},
+        start,
+        start + timedelta(seconds=9),
+        sample_rate=0.1,
+    )
+    assert [(point.timestamp, point.value) for point in result["value"]] == [
+        (start + timedelta(seconds=9), 9)
+    ]
+    assert [(point.timestamp, point.value) for point in result["other"]] == [
+        (start + timedelta(seconds=9), 28)
+    ]
+@pytest.mark.parametrize("sample_rate", [0, -0.1, 1.1, float("nan"), float("inf")])
+def test_rejects_invalid_sample_rate(tmp_path: Path, sample_rate: float) -> None:
+    """A sample rate must be finite and in the interval (0, 1]."""
+    reader = FTDCReader(tmp_path)
+    start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+    with pytest.raises(ValueError, match="sample_rate"):
+        reader.get_metric({"value"}, start, start, sample_rate=sample_rate)