pymongoftdc 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyftdc/__init__.py +13 -0
- pyftdc/_codec.py +214 -0
- pyftdc/exceptions.py +13 -0
- pyftdc/models.py +15 -0
- pyftdc/reader.py +163 -0
- pymongoftdc-0.1.0.dist-info/METADATA +98 -0
- pymongoftdc-0.1.0.dist-info/RECORD +9 -0
- pymongoftdc-0.1.0.dist-info/WHEEL +4 -0
- pymongoftdc-0.1.0.dist-info/licenses/LICENSE +21 -0
pyftdc/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Read time-series metrics from MongoDB FTDC archives."""
|
|
2
|
+
|
|
3
|
+
from pyftdc.exceptions import FTDCDecodeError, FTDCError, MetricNotFoundError
|
|
4
|
+
from pyftdc.models import DataPoint
|
|
5
|
+
from pyftdc.reader import FTDCReader
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"DataPoint",
|
|
9
|
+
"FTDCDecodeError",
|
|
10
|
+
"FTDCError",
|
|
11
|
+
"FTDCReader",
|
|
12
|
+
"MetricNotFoundError",
|
|
13
|
+
]
|
pyftdc/_codec.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Low-level BSON framing and FTDC metric chunk decoding."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import math
|
|
6
|
+
import struct
|
|
7
|
+
import zlib
|
|
8
|
+
from collections.abc import Iterator, Mapping, Sequence
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, BinaryIO, cast
|
|
13
|
+
|
|
14
|
+
from bson import BSON
|
|
15
|
+
from bson.codec_options import CodecOptions
|
|
16
|
+
from bson.decimal128 import Decimal128
|
|
17
|
+
from bson.timestamp import Timestamp
|
|
18
|
+
|
|
19
|
+
from pyftdc.exceptions import FTDCDecodeError
|
|
20
|
+
from pyftdc.models import MetricValue
|
|
21
|
+
|
|
22
|
+
_UINT64_MASK = (1 << 64) - 1
|
|
23
|
+
_MIN_BSON_SIZE = 5
|
|
24
|
+
_CODEC_OPTIONS: CodecOptions[Any] = CodecOptions(tz_aware=True, tzinfo=timezone.utc)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(frozen=True, slots=True)
|
|
28
|
+
class MetricSlot:
|
|
29
|
+
"""A compressed numeric field and its location in a reference document."""
|
|
30
|
+
|
|
31
|
+
path: str
|
|
32
|
+
initial: int
|
|
33
|
+
kind: str
|
|
34
|
+
part: int = 0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True, slots=True)
|
|
38
|
+
class DecodedChunk:
|
|
39
|
+
"""A reference document and its decoded metric rows."""
|
|
40
|
+
|
|
41
|
+
reference: Mapping[str, Any]
|
|
42
|
+
slots: tuple[MetricSlot, ...]
|
|
43
|
+
rows: tuple[tuple[int, ...], ...]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def iter_bson_documents(stream: BinaryIO, source: Path) -> Iterator[Mapping[str, Any]]:
|
|
47
|
+
"""Yield the concatenated BSON documents in an FTDC file."""
|
|
48
|
+
|
|
49
|
+
while prefix := stream.read(4):
|
|
50
|
+
if len(prefix) != 4:
|
|
51
|
+
raise FTDCDecodeError(f"{source}: truncated BSON length")
|
|
52
|
+
(length,) = struct.unpack("<I", prefix)
|
|
53
|
+
if length == 0: # Zero bytes terminate an interim file.
|
|
54
|
+
return
|
|
55
|
+
if length < _MIN_BSON_SIZE:
|
|
56
|
+
raise FTDCDecodeError(f"{source}: invalid BSON length {length}")
|
|
57
|
+
remainder = stream.read(length - 4)
|
|
58
|
+
if len(remainder) != length - 4:
|
|
59
|
+
raise FTDCDecodeError(f"{source}: truncated BSON document")
|
|
60
|
+
try:
|
|
61
|
+
yield BSON(prefix + remainder).decode(codec_options=_CODEC_OPTIONS)
|
|
62
|
+
except Exception as exc:
|
|
63
|
+
raise FTDCDecodeError(f"{source}: invalid BSON document") from exc
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def decode_metric_document(document: Mapping[str, Any]) -> DecodedChunk:
|
|
67
|
+
"""Decode one outer FTDC document whose type is 1."""
|
|
68
|
+
|
|
69
|
+
raw = _decompress_payload(document)
|
|
70
|
+
if len(raw) < _MIN_BSON_SIZE:
|
|
71
|
+
raise FTDCDecodeError("metric chunk has no reference document")
|
|
72
|
+
|
|
73
|
+
(reference_size,) = struct.unpack_from("<I", raw)
|
|
74
|
+
if reference_size < _MIN_BSON_SIZE or reference_size + 8 > len(raw):
|
|
75
|
+
raise FTDCDecodeError("invalid reference document size")
|
|
76
|
+
try:
|
|
77
|
+
reference = BSON(raw[:reference_size]).decode(codec_options=_CODEC_OPTIONS)
|
|
78
|
+
except Exception as exc:
|
|
79
|
+
raise FTDCDecodeError("invalid metric reference document") from exc
|
|
80
|
+
|
|
81
|
+
metric_count, delta_count = struct.unpack_from("<II", raw, reference_size)
|
|
82
|
+
slots = tuple(_extract_slots(reference))
|
|
83
|
+
if len(slots) != metric_count:
|
|
84
|
+
raise FTDCDecodeError(
|
|
85
|
+
f"reference contains {len(slots)} numeric slots, chunk declares {metric_count}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
encoded = memoryview(raw)[reference_size + 8 :]
|
|
89
|
+
flat_deltas = _decode_deltas(encoded, metric_count * delta_count)
|
|
90
|
+
current = [slot.initial for slot in slots]
|
|
91
|
+
rows: list[tuple[int, ...]] = [tuple(current)]
|
|
92
|
+
for sample_index in range(delta_count):
|
|
93
|
+
for metric_index in range(metric_count):
|
|
94
|
+
offset = metric_index * delta_count + sample_index
|
|
95
|
+
current[metric_index] = (current[metric_index] + flat_deltas[offset]) & _UINT64_MASK
|
|
96
|
+
rows.append(tuple(current))
|
|
97
|
+
|
|
98
|
+
return DecodedChunk(reference, slots, tuple(rows))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _decompress_payload(document: Mapping[str, Any]) -> bytes:
|
|
102
|
+
"""Validate and decompress the binary payload of a metric document."""
|
|
103
|
+
|
|
104
|
+
payload = document.get("data", document.get("doc"))
|
|
105
|
+
if not isinstance(payload, (bytes, bytearray, memoryview)):
|
|
106
|
+
raise FTDCDecodeError("metric document has no binary 'data' or 'doc' field")
|
|
107
|
+
data = payload.tobytes() if isinstance(payload, memoryview) else bytes(payload)
|
|
108
|
+
if len(data) < 5:
|
|
109
|
+
raise FTDCDecodeError("compressed metric chunk is too short")
|
|
110
|
+
|
|
111
|
+
(expected_size,) = struct.unpack_from("<I", data)
|
|
112
|
+
try:
|
|
113
|
+
raw = zlib.decompress(data[4:])
|
|
114
|
+
except zlib.error as exc:
|
|
115
|
+
raise FTDCDecodeError("invalid zlib metric payload") from exc
|
|
116
|
+
if len(raw) != expected_size:
|
|
117
|
+
raise FTDCDecodeError(
|
|
118
|
+
f"metric chunk size mismatch: expected {expected_size}, got {len(raw)}"
|
|
119
|
+
)
|
|
120
|
+
return raw
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def value_for_slot(slot: MetricSlot, raw_value: int) -> MetricValue:
|
|
124
|
+
"""Restore a compressed integer to the reference field's useful Python type."""
|
|
125
|
+
|
|
126
|
+
signed = _as_signed(raw_value)
|
|
127
|
+
if slot.kind == "bool":
|
|
128
|
+
return bool(raw_value)
|
|
129
|
+
if slot.kind == "float":
|
|
130
|
+
return float(signed)
|
|
131
|
+
return signed
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def timestamp_for_row(chunk: DecodedChunk, row: Sequence[int]) -> datetime:
|
|
135
|
+
"""Return the top-level collection start time for a decoded sample."""
|
|
136
|
+
|
|
137
|
+
for index, slot in enumerate(chunk.slots):
|
|
138
|
+
if slot.path == "start" and slot.kind == "datetime":
|
|
139
|
+
return datetime.fromtimestamp(_as_signed(row[index]) / 1000, tz=timezone.utc)
|
|
140
|
+
raise FTDCDecodeError("metric reference document has no top-level datetime 'start'")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _extract_slots(value: object, path: str = "") -> Iterator[MetricSlot]:
|
|
144
|
+
if isinstance(value, Mapping):
|
|
145
|
+
mapping = cast(Mapping[object, object], value)
|
|
146
|
+
for name, child in mapping.items():
|
|
147
|
+
child_path = f"{path}.{name}" if path else str(name)
|
|
148
|
+
yield from _extract_slots(child, child_path)
|
|
149
|
+
return
|
|
150
|
+
if isinstance(value, (list, tuple)):
|
|
151
|
+
sequence = cast(Sequence[object], value)
|
|
152
|
+
for index, child in enumerate(sequence):
|
|
153
|
+
child_path = f"{path}.{index}" if path else str(index)
|
|
154
|
+
yield from _extract_slots(child, child_path)
|
|
155
|
+
return
|
|
156
|
+
|
|
157
|
+
if isinstance(value, bool):
|
|
158
|
+
yield MetricSlot(path, int(value), "bool")
|
|
159
|
+
elif isinstance(value, int):
|
|
160
|
+
yield MetricSlot(path, value & _UINT64_MASK, "int")
|
|
161
|
+
elif isinstance(value, float):
|
|
162
|
+
number = (
|
|
163
|
+
0
|
|
164
|
+
if math.isnan(value)
|
|
165
|
+
else max(-(1 << 63), min((1 << 63) - 1, int(value)))
|
|
166
|
+
)
|
|
167
|
+
yield MetricSlot(path, number & _UINT64_MASK, "float")
|
|
168
|
+
elif isinstance(value, datetime):
|
|
169
|
+
moment = value if value.tzinfo is not None else value.replace(tzinfo=timezone.utc)
|
|
170
|
+
yield MetricSlot(path, int(moment.timestamp() * 1000) & _UINT64_MASK, "datetime")
|
|
171
|
+
elif isinstance(value, Timestamp):
|
|
172
|
+
yield MetricSlot(path, value.time & _UINT64_MASK, "timestamp", 0)
|
|
173
|
+
yield MetricSlot(path, value.inc & _UINT64_MASK, "timestamp", 1)
|
|
174
|
+
elif isinstance(value, Decimal128):
|
|
175
|
+
low, high = struct.unpack("<QQ", value.bid)
|
|
176
|
+
yield MetricSlot(path, low, "decimal128", 0)
|
|
177
|
+
yield MetricSlot(path, high, "decimal128", 1)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _decode_deltas(data: memoryview, expected_count: int) -> list[int]:
|
|
181
|
+
values: list[int] = []
|
|
182
|
+
position = 0
|
|
183
|
+
while len(values) < expected_count:
|
|
184
|
+
value, position = _read_varint(data, position)
|
|
185
|
+
if value:
|
|
186
|
+
values.append(value)
|
|
187
|
+
continue
|
|
188
|
+
run_minus_one, position = _read_varint(data, position)
|
|
189
|
+
run_length = run_minus_one + 1
|
|
190
|
+
if len(values) + run_length > expected_count:
|
|
191
|
+
raise FTDCDecodeError("zero run exceeds declared metric data")
|
|
192
|
+
values.extend([0] * run_length)
|
|
193
|
+
if position != len(data):
|
|
194
|
+
raise FTDCDecodeError("unexpected bytes after compressed metric data")
|
|
195
|
+
return values
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _read_varint(data: memoryview, position: int) -> tuple[int, int]:
|
|
199
|
+
value = 0
|
|
200
|
+
for shift in range(0, 70, 7):
|
|
201
|
+
if position >= len(data):
|
|
202
|
+
raise FTDCDecodeError("truncated varint metric data")
|
|
203
|
+
byte = data[position]
|
|
204
|
+
position += 1
|
|
205
|
+
value |= (byte & 0x7F) << shift
|
|
206
|
+
if not byte & 0x80:
|
|
207
|
+
if value > _UINT64_MASK:
|
|
208
|
+
raise FTDCDecodeError("varint exceeds uint64")
|
|
209
|
+
return value, position
|
|
210
|
+
raise FTDCDecodeError("varint exceeds uint64")
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _as_signed(value: int) -> int:
|
|
214
|
+
return value if value < (1 << 63) else value - (1 << 64)
|
pyftdc/exceptions.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Exceptions raised by pyftdc."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class FTDCError(Exception):
|
|
5
|
+
"""Base class for pyftdc errors."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FTDCDecodeError(FTDCError):
|
|
9
|
+
"""An FTDC file or compressed metric chunk is invalid."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MetricNotFoundError(FTDCError, KeyError):
|
|
13
|
+
"""The requested metric does not occur in the source."""
|
pyftdc/models.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Public value objects."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import TypeAlias
|
|
6
|
+
|
|
7
|
+
MetricValue: TypeAlias = int | float | bool
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True, slots=True)
|
|
11
|
+
class DataPoint:
|
|
12
|
+
"""One metric observation."""
|
|
13
|
+
|
|
14
|
+
timestamp: datetime
|
|
15
|
+
value: MetricValue
|
pyftdc/reader.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""High-level FTDC metric query API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import math
|
|
6
|
+
from collections.abc import Iterator
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from pyftdc._codec import (
|
|
11
|
+
DecodedChunk,
|
|
12
|
+
decode_metric_document,
|
|
13
|
+
iter_bson_documents,
|
|
14
|
+
timestamp_for_row,
|
|
15
|
+
value_for_slot,
|
|
16
|
+
)
|
|
17
|
+
from pyftdc.exceptions import FTDCError, MetricNotFoundError
|
|
18
|
+
from pyftdc.models import DataPoint
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FTDCReader:
|
|
22
|
+
"""Read metrics from one FTDC file or a ``diagnostic.data`` directory."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, source: str | Path) -> None:
|
|
25
|
+
self.source = Path(source)
|
|
26
|
+
if not self.source.exists():
|
|
27
|
+
raise FileNotFoundError(self.source)
|
|
28
|
+
if not self.source.is_file() and not self.source.is_dir():
|
|
29
|
+
raise FTDCError(f"FTDC source is not a regular file or directory: {self.source}")
|
|
30
|
+
|
|
31
|
+
def get_metric(
|
|
32
|
+
self,
|
|
33
|
+
name: set[str],
|
|
34
|
+
start: datetime | None = None,
|
|
35
|
+
end: datetime | None = None,
|
|
36
|
+
sample_rate: float = 1.0,
|
|
37
|
+
) -> dict[str, list[DataPoint]]:
|
|
38
|
+
"""Return sampled observations by metric name in the inclusive UTC timespan."""
|
|
39
|
+
|
|
40
|
+
requested_names = set(name)
|
|
41
|
+
if "" in requested_names:
|
|
42
|
+
raise ValueError("metric names must not be empty")
|
|
43
|
+
start_utc = _as_utc(start, "start") if start is not None else None
|
|
44
|
+
end_utc = _as_utc(end, "end") if end is not None else None
|
|
45
|
+
if start_utc is not None and end_utc is not None and start_utc > end_utc:
|
|
46
|
+
raise ValueError("start must be before or equal to end")
|
|
47
|
+
if not math.isfinite(sample_rate) or not 0 < sample_rate <= 1:
|
|
48
|
+
raise ValueError("sample_rate must be greater than 0 and at most 1")
|
|
49
|
+
|
|
50
|
+
found_names: set[str] = set()
|
|
51
|
+
point_numbers: dict[str, int] = {}
|
|
52
|
+
points_by_name: dict[str, dict[datetime, DataPoint]] = {}
|
|
53
|
+
for chunk in self._metric_chunks(start_utc, end_utc):
|
|
54
|
+
matching_slots = {
|
|
55
|
+
slot.path: (index, slot)
|
|
56
|
+
for index, slot in enumerate(chunk.slots)
|
|
57
|
+
if slot.part == 0 and (not requested_names or slot.path in requested_names)
|
|
58
|
+
}
|
|
59
|
+
if not matching_slots:
|
|
60
|
+
continue
|
|
61
|
+
found_names.update(matching_slots)
|
|
62
|
+
for metric_name in matching_slots:
|
|
63
|
+
point_numbers.setdefault(metric_name, 0)
|
|
64
|
+
points_by_name.setdefault(metric_name, {})
|
|
65
|
+
for row in chunk.rows:
|
|
66
|
+
timestamp = timestamp_for_row(chunk, row)
|
|
67
|
+
if (start_utc is None or start_utc <= timestamp) and (
|
|
68
|
+
end_utc is None or timestamp <= end_utc
|
|
69
|
+
):
|
|
70
|
+
for metric_name, (metric_index, slot) in matching_slots.items():
|
|
71
|
+
point_number = point_numbers[metric_name] + 1
|
|
72
|
+
point_numbers[metric_name] = point_number
|
|
73
|
+
if int(point_number * sample_rate) == int((point_number - 1) * sample_rate):
|
|
74
|
+
continue
|
|
75
|
+
points_by_name[metric_name][timestamp] = DataPoint(
|
|
76
|
+
timestamp=timestamp,
|
|
77
|
+
value=value_for_slot(slot, row[metric_index]),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
missing_names = requested_names - found_names
|
|
81
|
+
if missing_names:
|
|
82
|
+
raise MetricNotFoundError(sorted(missing_names)[0])
|
|
83
|
+
return {
|
|
84
|
+
metric_name: [points[timestamp] for timestamp in sorted(points)]
|
|
85
|
+
for metric_name, points in sorted(points_by_name.items())
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
query = get_metric
|
|
89
|
+
|
|
90
|
+
def list_metrics(self) -> list[str]:
|
|
91
|
+
"""Return sorted dotted names for numeric fields in the source."""
|
|
92
|
+
|
|
93
|
+
names: set[str] = set()
|
|
94
|
+
for chunk in self._metric_chunks():
|
|
95
|
+
names.update(slot.path for slot in chunk.slots)
|
|
96
|
+
return sorted(names)
|
|
97
|
+
|
|
98
|
+
def _metric_chunks(
|
|
99
|
+
self,
|
|
100
|
+
start: datetime | None = None,
|
|
101
|
+
end: datetime | None = None,
|
|
102
|
+
) -> Iterator[DecodedChunk]:
|
|
103
|
+
for path in self._paths(start, end):
|
|
104
|
+
with path.open("rb") as stream:
|
|
105
|
+
for document in iter_bson_documents(stream, path):
|
|
106
|
+
if document.get("type") == 1:
|
|
107
|
+
yield decode_metric_document(document)
|
|
108
|
+
|
|
109
|
+
def _paths(
|
|
110
|
+
self,
|
|
111
|
+
start: datetime | None = None,
|
|
112
|
+
end: datetime | None = None,
|
|
113
|
+
) -> list[Path]:
|
|
114
|
+
if self.source.is_file():
|
|
115
|
+
return [self.source]
|
|
116
|
+
paths = sorted(
|
|
117
|
+
path
|
|
118
|
+
for path in self.source.glob("metrics.*")
|
|
119
|
+
if path.is_file() and not path.name.endswith(".tmp")
|
|
120
|
+
)
|
|
121
|
+
times = {path: _time_from_filename(path) for path in paths}
|
|
122
|
+
timestamped = [file_time for file_time in times.values() if file_time is not None]
|
|
123
|
+
if not timestamped:
|
|
124
|
+
return paths
|
|
125
|
+
|
|
126
|
+
first_time = min(timestamped)
|
|
127
|
+
lower_file_time: datetime | None = None
|
|
128
|
+
if start is not None:
|
|
129
|
+
preceding = [file_time for file_time in timestamped if file_time <= start]
|
|
130
|
+
lower_file_time = max(preceding, default=first_time)
|
|
131
|
+
|
|
132
|
+
upper_file_time = end
|
|
133
|
+
if end is not None and end < first_time:
|
|
134
|
+
upper_file_time = first_time
|
|
135
|
+
|
|
136
|
+
return [
|
|
137
|
+
path
|
|
138
|
+
for path in paths
|
|
139
|
+
if (file_time := times[path]) is None
|
|
140
|
+
or (
|
|
141
|
+
(lower_file_time is None or lower_file_time <= file_time)
|
|
142
|
+
and (upper_file_time is None or file_time <= upper_file_time)
|
|
143
|
+
)
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _as_utc(value: datetime, label: str) -> datetime:
|
|
148
|
+
if value.tzinfo is None or value.utcoffset() is None:
|
|
149
|
+
raise ValueError(f"{label} must be timezone-aware")
|
|
150
|
+
return value.astimezone(timezone.utc)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _time_from_filename(path: Path) -> datetime | None:
|
|
154
|
+
name = path.name.removeprefix("metrics.")
|
|
155
|
+
timestamp, separator, sequence = name.rpartition("-")
|
|
156
|
+
if not separator or not sequence.isdigit():
|
|
157
|
+
return None
|
|
158
|
+
try:
|
|
159
|
+
return datetime.strptime(timestamp, "%Y-%m-%dT%H-%M-%SZ").replace(
|
|
160
|
+
tzinfo=timezone.utc
|
|
161
|
+
)
|
|
162
|
+
except ValueError:
|
|
163
|
+
return None
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pymongoftdc
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A typed reader for MongoDB FTDC metric archives
|
|
5
|
+
License: MIT License
|
|
6
|
+
|
|
7
|
+
Copyright (c) 2026 Yaoxing
|
|
8
|
+
|
|
9
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
11
|
+
in the Software without restriction, including without limitation the rights
|
|
12
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
14
|
+
furnished to do so, subject to the following conditions:
|
|
15
|
+
|
|
16
|
+
The above copyright notice and this permission notice shall be included in all
|
|
17
|
+
copies or substantial portions of the Software.
|
|
18
|
+
|
|
19
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
21
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
22
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
23
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
24
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
25
|
+
SOFTWARE.
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Python: >=3.10
|
|
28
|
+
Requires-Dist: pymongo<5,>=4.6
|
|
29
|
+
Provides-Extra: test
|
|
30
|
+
Requires-Dist: pylint>=3; extra == 'test'
|
|
31
|
+
Requires-Dist: pyright>=1.1.400; extra == 'test'
|
|
32
|
+
Requires-Dist: pytest-cov>=5; extra == 'test'
|
|
33
|
+
Requires-Dist: pytest>=8; extra == 'test'
|
|
34
|
+
Requires-Dist: ruff>=0.11; extra == 'test'
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
# pymongoftdc
|
|
38
|
+
|
|
39
|
+
[](https://github.com/zhangyaoxing/pyftdc/actions/workflows/ci.yml)
|
|
40
|
+
[](https://www.python.org/downloads/)
|
|
41
|
+
[](LICENSE)
|
|
42
|
+
|
|
43
|
+
`pymongoftdc` reads numeric time-series metrics directly from MongoDB Full-Time
|
|
44
|
+
Diagnostic Data Capture (FTDC) archive files.
|
|
45
|
+
|
|
46
|
+
## Install
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
python -m pip install -e .
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
For development:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
python -m pip install -e '.[test]'
|
|
56
|
+
pytest
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Use
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from datetime import datetime, timezone
|
|
63
|
+
from pyftdc import FTDCReader
|
|
64
|
+
|
|
65
|
+
reader = FTDCReader("/var/lib/mongo/diagnostic.data")
|
|
66
|
+
metrics = reader.get_metric(
|
|
67
|
+
{"serverStatus.connections.current"},
|
|
68
|
+
start=datetime(2026, 1, 1, tzinfo=timezone.utc),
|
|
69
|
+
end=datetime(2026, 1, 1, 1, tzinfo=timezone.utc),
|
|
70
|
+
sample_rate=0.1,
|
|
71
|
+
)
|
|
72
|
+
points = metrics["serverStatus.connections.current"]
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
The source may be one `metrics.*` file or a `diagnostic.data` directory.
|
|
76
|
+
Timespan endpoints are inclusive and must be timezone-aware. Omit `start` or
|
|
77
|
+
`end` to use the earliest or latest timestamp in the source. The result maps each
|
|
78
|
+
requested name to points ordered by UTC timestamp. Pass an empty set to read every
|
|
79
|
+
metric. `sample_rate` must be greater than 0 and at most 1;
|
|
80
|
+
for example, `0.1` returns approximately 10% of points. Its default is `1.0`.
|
|
81
|
+
`query()` is an alias for `get_metric()`.
|
|
82
|
+
|
|
83
|
+
Use `reader.list_metrics()` to discover dotted metric paths. A missing requested
|
|
84
|
+
metric raises `MetricNotFoundError`; an invalid archive raises `FTDCDecodeError`.
|
|
85
|
+
|
|
86
|
+
## Project layout
|
|
87
|
+
|
|
88
|
+
```text
|
|
89
|
+
src/pyftdc/
|
|
90
|
+
_codec.py BSON framing and FTDC decompression
|
|
91
|
+
reader.py public query API
|
|
92
|
+
models.py returned value objects
|
|
93
|
+
exceptions.py library-specific errors
|
|
94
|
+
tests/ pytest tests and fixture builders
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
The reader supports BSON-framed type-1 metric chunks using MongoDB's
|
|
98
|
+
delta/RLE/varint/zlib encoding. Metadata documents are safely skipped.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
pyftdc/__init__.py,sha256=HslKWPYP4hlF9n9dtu0gbrdY8drEJdT_qkIvxzjNKr8,328
|
|
2
|
+
pyftdc/_codec.py,sha256=-Az4Eus9LK9NhTmBB2LByFAZDQDIe4_gNIJfGLVm4X4,8163
|
|
3
|
+
pyftdc/exceptions.py,sha256=NuUABt86Y1td4nkoUv3wr0AUCzfi2v0T3zxzsct_Vlo,314
|
|
4
|
+
pyftdc/models.py,sha256=scUG4n9Sb6lvscbmgWnOB6lRqUfx4aKEAKtUOHD5Hhc,304
|
|
5
|
+
pyftdc/reader.py,sha256=Eqad2u9d-utlUUfdeU36Cig3wJHo6casxlhatOHPtQw,6216
|
|
6
|
+
pymongoftdc-0.1.0.dist-info/METADATA,sha256=Z7O2HsCffJ_COt_M3fdcy5UP6QAKrVxggV1Oe9jWj1A,3791
|
|
7
|
+
pymongoftdc-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
8
|
+
pymongoftdc-0.1.0.dist-info/licenses/LICENSE,sha256=Tv9yMSkndKN-OnOBs1yvmrW4fF4M7ZyvyulsTV1D2As,1064
|
|
9
|
+
pymongoftdc-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Yaoxing
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|