datadog-smartd 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datadog_smartd-0.1.0/.gitignore +8 -0
- datadog_smartd-0.1.0/LICENSE +21 -0
- datadog_smartd-0.1.0/PKG-INFO +106 -0
- datadog_smartd-0.1.0/README.md +86 -0
- datadog_smartd-0.1.0/datadog_checks/smartd/__about__.py +1 -0
- datadog_smartd-0.1.0/datadog_checks/smartd/__init__.py +4 -0
- datadog_smartd-0.1.0/datadog_checks/smartd/check.py +134 -0
- datadog_smartd-0.1.0/datadog_checks/smartd/data/conf.yaml.example +33 -0
- datadog_smartd-0.1.0/hatch.toml +9 -0
- datadog_smartd-0.1.0/manifest.json +47 -0
- datadog_smartd-0.1.0/pyproject.toml +38 -0
- datadog_smartd-0.1.0/tests/__init__.py +0 -0
- datadog_smartd-0.1.0/tests/common.py +19 -0
- datadog_smartd-0.1.0/tests/conftest.py +15 -0
- datadog_smartd-0.1.0/tests/fixtures/smartd.ACME_DISK4000-SN123456789ABC.ata.state +61 -0
- datadog_smartd-0.1.0/tests/fixtures/smartd.ACME_DISK4000-SN987654321XYZ.ata.state +66 -0
- datadog_smartd-0.1.0/tests/test_smartd.py +132 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datadog-smartd
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: The smartd check
|
|
5
|
+
Project-URL: Source, https://github.com/DataDog/integrations-extras
|
|
6
|
+
Author: Community
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: datadog,datadog agent,datadog check,smart,smartd
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: System Administrators
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: System :: Monitoring
|
|
16
|
+
Requires-Python: >=3.13
|
|
17
|
+
Requires-Dist: datadog-checks-base>=37.33.0
|
|
18
|
+
Provides-Extra: deps
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# smartd
|
|
22
|
+
|
|
23
|
+
## Overview
|
|
24
|
+
|
|
25
|
+
This integration monitors [S.M.A.R.T.][1] disk health by reading state files written by the [smartd][2] daemon (part of [smartmontools][3]). It collects key disk health attributes such as temperature, reallocated sectors, power-on hours, and pending sector counts, and reports the overall health status of each drive as a service check.
|
|
26
|
+
|
|
27
|
+
Unlike other approaches that shell out to `smartctl` (which requires root privileges), this integration reads the state files that `smartd` already maintains, making it work without any privilege escalation.
|
|
28
|
+
|
|
29
|
+
## Setup
|
|
30
|
+
|
|
31
|
+
### Prerequisites
|
|
32
|
+
|
|
33
|
+
- The `smartd` daemon must be running and writing state files (default location: `/var/lib/smartmontools/`).
|
|
34
|
+
- The `dd-agent` user must have read access to the state files (they are typically world-readable with `644` permissions).
|
|
35
|
+
|
|
36
|
+
### Installation
|
|
37
|
+
|
|
38
|
+
For development, install the check in the Datadog Agent's Python environment:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
datadog-agent integration install -e /path/to/smartd
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
For production, copy the check file and configuration:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
cp datadog_checks/smartd/check.py /etc/datadog-agent/checks.d/smartd.py
|
|
48
|
+
mkdir -p /etc/datadog-agent/conf.d/smartd.d
|
|
49
|
+
cp datadog_checks/smartd/data/conf.yaml.example /etc/datadog-agent/conf.d/smartd.d/conf.yaml
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Configuration
|
|
53
|
+
|
|
54
|
+
Edit `/etc/datadog-agent/conf.d/smartd.d/conf.yaml` to configure the check:
|
|
55
|
+
|
|
56
|
+
```yaml
|
|
57
|
+
init_config:
|
|
58
|
+
|
|
59
|
+
instances:
|
|
60
|
+
- smartd_state_dir: /var/lib/smartmontools
|
|
61
|
+
min_collection_interval: 120
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Then [restart the Agent][4].
|
|
65
|
+
|
|
66
|
+
### Validation
|
|
67
|
+
|
|
68
|
+
Run the [Agent's status subcommand][5] and look for `smartd` under the Checks section:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
datadog-agent status
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Or run the check directly:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
datadog-agent check smartd
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Data Collected
|
|
81
|
+
|
|
82
|
+
### Metrics
|
|
83
|
+
|
|
84
|
+
See [metadata.csv][6] for a list of metrics provided by this integration.
|
|
85
|
+
|
|
86
|
+
### Service Checks
|
|
87
|
+
|
|
88
|
+
**smartd.disk_health**: Returns `OK` if the drive is healthy, `WARNING` if reallocated sectors, pending sectors, or offline uncorrectable counts are non-zero, `CRITICAL` if a normalized attribute value reaches 0.
|
|
89
|
+
|
|
90
|
+
**smartd.can_read**: Returns `OK` if smartd state files were found and parsed successfully, `CRITICAL` otherwise.
|
|
91
|
+
|
|
92
|
+
### Events
|
|
93
|
+
|
|
94
|
+
The smartd integration does not include any events.
|
|
95
|
+
|
|
96
|
+
## Support
|
|
97
|
+
|
|
98
|
+
For help, open an issue on the [GitHub repository][7].
|
|
99
|
+
|
|
100
|
+
[1]: https://en.wikipedia.org/wiki/Self-Monitoring,_Analysis_and_Reporting_Technology
|
|
101
|
+
[2]: https://www.smartmontools.org/wiki/Smartd
|
|
102
|
+
[3]: https://www.smartmontools.org/
|
|
103
|
+
[4]: https://docs.datadoghq.com/agent/guide/agent-commands/#start-stop-and-restart-the-agent
|
|
104
|
+
[5]: https://docs.datadoghq.com/agent/guide/agent-commands/#agent-status-and-information
|
|
105
|
+
[6]: https://github.com/DataDog/integrations-extras/blob/master/smartd/metadata.csv
|
|
106
|
+
[7]: https://github.com/DataDog/integrations-extras
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# smartd
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
This integration monitors [S.M.A.R.T.][1] disk health by reading state files written by the [smartd][2] daemon (part of [smartmontools][3]). It collects key disk health attributes such as temperature, reallocated sectors, power-on hours, and pending sector counts, and reports the overall health status of each drive as a service check.
|
|
6
|
+
|
|
7
|
+
Unlike other approaches that shell out to `smartctl` (which requires root privileges), this integration reads the state files that `smartd` already maintains, making it work without any privilege escalation.
|
|
8
|
+
|
|
9
|
+
## Setup
|
|
10
|
+
|
|
11
|
+
### Prerequisites
|
|
12
|
+
|
|
13
|
+
- The `smartd` daemon must be running and writing state files (default location: `/var/lib/smartmontools/`).
|
|
14
|
+
- The `dd-agent` user must have read access to the state files (they are typically world-readable with `644` permissions).
|
|
15
|
+
|
|
16
|
+
### Installation
|
|
17
|
+
|
|
18
|
+
For development, install the check in the Datadog Agent's Python environment:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
datadog-agent integration install -e /path/to/smartd
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
For production, copy the check file and configuration:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
cp datadog_checks/smartd/check.py /etc/datadog-agent/checks.d/smartd.py
|
|
28
|
+
mkdir -p /etc/datadog-agent/conf.d/smartd.d
|
|
29
|
+
cp datadog_checks/smartd/data/conf.yaml.example /etc/datadog-agent/conf.d/smartd.d/conf.yaml
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Configuration
|
|
33
|
+
|
|
34
|
+
Edit `/etc/datadog-agent/conf.d/smartd.d/conf.yaml` to configure the check:
|
|
35
|
+
|
|
36
|
+
```yaml
|
|
37
|
+
init_config:
|
|
38
|
+
|
|
39
|
+
instances:
|
|
40
|
+
- smartd_state_dir: /var/lib/smartmontools
|
|
41
|
+
min_collection_interval: 120
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Then [restart the Agent][4].
|
|
45
|
+
|
|
46
|
+
### Validation
|
|
47
|
+
|
|
48
|
+
Run the [Agent's status subcommand][5] and look for `smartd` under the Checks section:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
datadog-agent status
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Or run the check directly:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
datadog-agent check smartd
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Data Collected
|
|
61
|
+
|
|
62
|
+
### Metrics
|
|
63
|
+
|
|
64
|
+
See [metadata.csv][6] for a list of metrics provided by this integration.
|
|
65
|
+
|
|
66
|
+
### Service Checks
|
|
67
|
+
|
|
68
|
+
**smartd.disk_health**: Returns `OK` if the drive is healthy, `WARNING` if reallocated sectors, pending sectors, or offline uncorrectable counts are non-zero, `CRITICAL` if a normalized attribute value reaches 0.
|
|
69
|
+
|
|
70
|
+
**smartd.can_read**: Returns `OK` if smartd state files were found and parsed successfully, `CRITICAL` otherwise.
|
|
71
|
+
|
|
72
|
+
### Events
|
|
73
|
+
|
|
74
|
+
The smartd integration does not include any events.
|
|
75
|
+
|
|
76
|
+
## Support
|
|
77
|
+
|
|
78
|
+
For help, open an issue on the [GitHub repository][7].
|
|
79
|
+
|
|
80
|
+
[1]: https://en.wikipedia.org/wiki/Self-Monitoring,_Analysis_and_Reporting_Technology
|
|
81
|
+
[2]: https://www.smartmontools.org/wiki/Smartd
|
|
82
|
+
[3]: https://www.smartmontools.org/
|
|
83
|
+
[4]: https://docs.datadoghq.com/agent/guide/agent-commands/#start-stop-and-restart-the-agent
|
|
84
|
+
[5]: https://docs.datadoghq.com/agent/guide/agent-commands/#agent-status-and-information
|
|
85
|
+
[6]: https://github.com/DataDog/integrations-extras/blob/master/smartd/metadata.csv
|
|
86
|
+
[7]: https://github.com/DataDog/integrations-extras
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '0.1.0'
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import glob
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from datadog_checks.base import AgentCheck
|
|
6
|
+
|
|
7
|
+
FILENAME_PATTERN = re.compile(r'^smartd\.(.+)-([^-]+)\.\w+\.state$')
|
|
8
|
+
|
|
9
|
+
# SMART attribute ID → metric name mapping
|
|
10
|
+
NAMED_ATTRIBUTES = {
|
|
11
|
+
1: 'raw_read_error_rate',
|
|
12
|
+
5: 'reallocated_sectors',
|
|
13
|
+
9: 'power_on_hours',
|
|
14
|
+
10: 'spin_retry_count',
|
|
15
|
+
12: 'power_cycle_count',
|
|
16
|
+
194: 'temperature',
|
|
17
|
+
196: 'reallocated_event_count',
|
|
18
|
+
197: 'current_pending_sectors',
|
|
19
|
+
198: 'offline_uncorrectable',
|
|
20
|
+
199: 'udma_crc_error_count',
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
# Attributes where non-zero raw values indicate potential problems
|
|
24
|
+
WARNING_ATTRIBUTES = {5, 197, 198}
|
|
25
|
+
|
|
26
|
+
ATTR_LINE_PATTERN = re.compile(
|
|
27
|
+
r'^ata-smart-attribute\.(\d+)\.(id|val|worst|raw)\s*=\s*(\d+)$'
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SmartdCheck(AgentCheck):
|
|
32
|
+
|
|
33
|
+
__NAMESPACE__ = 'smartd'
|
|
34
|
+
|
|
35
|
+
def __init__(self, name, init_config, instances):
|
|
36
|
+
super().__init__(name, init_config, instances)
|
|
37
|
+
self.state_dir = self.instance.get('smartd_state_dir', '/var/lib/smartmontools')
|
|
38
|
+
self.file_pattern = self.instance.get('file_pattern', 'smartd.*.state')
|
|
39
|
+
|
|
40
|
+
def check(self, _):
|
|
41
|
+
pattern = os.path.join(self.state_dir, self.file_pattern)
|
|
42
|
+
state_files = sorted(glob.glob(pattern))
|
|
43
|
+
|
|
44
|
+
if not state_files:
|
|
45
|
+
self.service_check('can_read', AgentCheck.CRITICAL, message='No smartd state files found')
|
|
46
|
+
self.log.error('No smartd state files found matching %s', pattern)
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
for path in state_files:
|
|
50
|
+
self._process_state_file(path)
|
|
51
|
+
|
|
52
|
+
self.service_check('can_read', AgentCheck.OK)
|
|
53
|
+
|
|
54
|
+
def _process_state_file(self, path):
|
|
55
|
+
filename = os.path.basename(path)
|
|
56
|
+
match = FILENAME_PATTERN.match(filename)
|
|
57
|
+
if not match:
|
|
58
|
+
self.log.warning('Could not parse device info from filename: %s', filename)
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
model = match.group(1)
|
|
62
|
+
serial = match.group(2)
|
|
63
|
+
device_tags = ['device_model:{}'.format(model), 'serial_number:{}'.format(serial)]
|
|
64
|
+
tags = device_tags + self.instance.get('tags', [])
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
attributes = self._parse_state_file(path)
|
|
68
|
+
except Exception as e:
|
|
69
|
+
self.log.error('Failed to parse state file %s: %s', path, e)
|
|
70
|
+
self.service_check('disk_health', AgentCheck.CRITICAL, tags=tags, message=str(e))
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
health = AgentCheck.OK
|
|
74
|
+
health_message = None
|
|
75
|
+
|
|
76
|
+
for attr_id, attr_data in attributes.items():
|
|
77
|
+
metric_name = NAMED_ATTRIBUTES.get(attr_id)
|
|
78
|
+
if metric_name is None:
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
raw = attr_data.get('raw', 0)
|
|
82
|
+
val = attr_data.get('val', 0)
|
|
83
|
+
|
|
84
|
+
if metric_name == 'temperature':
|
|
85
|
+
# Temperature is encoded in the lowest byte of the raw value
|
|
86
|
+
value = raw & 0xFF
|
|
87
|
+
else:
|
|
88
|
+
value = raw
|
|
89
|
+
|
|
90
|
+
self.gauge(metric_name, value, tags=tags)
|
|
91
|
+
|
|
92
|
+
# Health checks
|
|
93
|
+
if val == 0:
|
|
94
|
+
health = AgentCheck.CRITICAL
|
|
95
|
+
health_message = 'Attribute {} normalized value is 0'.format(attr_id)
|
|
96
|
+
elif attr_id in WARNING_ATTRIBUTES and raw > 0 and health != AgentCheck.CRITICAL:
|
|
97
|
+
health = AgentCheck.WARNING
|
|
98
|
+
health_message = 'Attribute {} ({}): raw value {}'.format(
|
|
99
|
+
attr_id, metric_name, raw
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
self.service_check('disk_health', health, tags=tags, message=health_message)
|
|
103
|
+
|
|
104
|
+
def _parse_state_file(self, path):
|
|
105
|
+
attributes = {}
|
|
106
|
+
|
|
107
|
+
with open(path) as f:
|
|
108
|
+
for line in f:
|
|
109
|
+
line = line.strip()
|
|
110
|
+
match = ATTR_LINE_PATTERN.match(line)
|
|
111
|
+
if not match:
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
idx = int(match.group(1))
|
|
115
|
+
field = match.group(2)
|
|
116
|
+
value = int(match.group(3))
|
|
117
|
+
|
|
118
|
+
if field == 'id':
|
|
119
|
+
attributes.setdefault(idx, {})['id'] = value
|
|
120
|
+
elif field == 'val':
|
|
121
|
+
attributes.setdefault(idx, {})['val'] = value
|
|
122
|
+
elif field == 'worst':
|
|
123
|
+
attributes.setdefault(idx, {})['worst'] = value
|
|
124
|
+
elif field == 'raw':
|
|
125
|
+
attributes.setdefault(idx, {})['raw'] = value
|
|
126
|
+
|
|
127
|
+
# Re-key by attribute ID instead of index
|
|
128
|
+
result = {}
|
|
129
|
+
for attr_data in attributes.values():
|
|
130
|
+
attr_id = attr_data.get('id')
|
|
131
|
+
if attr_id is not None:
|
|
132
|
+
result[attr_id] = attr_data
|
|
133
|
+
|
|
134
|
+
return result
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
## All options defined here are available to all instances.
|
|
2
|
+
#
|
|
3
|
+
init_config:
|
|
4
|
+
|
|
5
|
+
## Every instance is scheduled independently of the others.
|
|
6
|
+
#
|
|
7
|
+
instances:
|
|
8
|
+
|
|
9
|
+
-
|
|
10
|
+
## @param smartd_state_dir - string - optional - default: /var/lib/smartmontools
|
|
11
|
+
## Path to the directory containing smartd state files.
|
|
12
|
+
#
|
|
13
|
+
# smartd_state_dir: /var/lib/smartmontools
|
|
14
|
+
|
|
15
|
+
## @param file_pattern - string - optional - default: smartd.*.state
|
|
16
|
+
## Glob pattern to match smartd state files within the state directory.
|
|
17
|
+
#
|
|
18
|
+
# file_pattern: "smartd.*.state"
|
|
19
|
+
|
|
20
|
+
## @param tags - list of strings - optional
|
|
21
|
+
## A list of tags to attach to every metric and service check emitted by this instance.
|
|
22
|
+
##
|
|
23
|
+
## Learn more about tagging at https://docs.datadoghq.com/tagging
|
|
24
|
+
#
|
|
25
|
+
# tags:
|
|
26
|
+
# - <KEY_1>:<VALUE_1>
|
|
27
|
+
# - <KEY_2>:<VALUE_2>
|
|
28
|
+
|
|
29
|
+
## @param min_collection_interval - number - optional - default: 120
|
|
30
|
+
## This changes the collection interval of the check. For more information, see:
|
|
31
|
+
## https://docs.datadoghq.com/developers/write_agent_check/#collection-interval
|
|
32
|
+
#
|
|
33
|
+
# min_collection_interval: 120
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# NOTE: [env.collectors.datadog-checks] is needed in integrations-core/extras
|
|
2
|
+
# but requires the monorepo hatch plugin. Uncomment when contributing upstream.
|
|
3
|
+
# [env.collectors.datadog-checks]
|
|
4
|
+
|
|
5
|
+
[envs.default]
|
|
6
|
+
e2e-env = false
|
|
7
|
+
|
|
8
|
+
[[envs.default.matrix]]
|
|
9
|
+
python = ["3.13"]
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
{
|
|
2
|
+
"manifest_version": "2.0.0",
|
|
3
|
+
"app_id": "smartd",
|
|
4
|
+
"app_uuid": "38657b7d-a5d3-4a34-b65c-0dc36e23f330",
|
|
5
|
+
"display_on_public_website": true,
|
|
6
|
+
"tile": {
|
|
7
|
+
"overview": "README.md#Overview",
|
|
8
|
+
"configuration": "README.md#Setup",
|
|
9
|
+
"support": "README.md#Support",
|
|
10
|
+
"changelog": "CHANGELOG.md",
|
|
11
|
+
"description": "Monitor S.M.A.R.T. disk health via smartd state files",
|
|
12
|
+
"title": "smartd",
|
|
13
|
+
"media": [],
|
|
14
|
+
"classifier_tags": [
|
|
15
|
+
"Supported OS::Linux",
|
|
16
|
+
"Category::OS & System",
|
|
17
|
+
"Offering::Integration"
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
"author": {
|
|
21
|
+
"support_email": "help@datadoghq.com",
|
|
22
|
+
"name": "Community",
|
|
23
|
+
"homepage": "https://github.com/DataDog/integrations-extras"
|
|
24
|
+
},
|
|
25
|
+
"assets": {
|
|
26
|
+
"integration": {
|
|
27
|
+
"source_type_name": "smartd",
|
|
28
|
+
"configuration": {
|
|
29
|
+
"spec": "assets/configuration/spec.yaml"
|
|
30
|
+
},
|
|
31
|
+
"events": {
|
|
32
|
+
"creates_events": false
|
|
33
|
+
},
|
|
34
|
+
"metrics": {
|
|
35
|
+
"prefix": "smartd.",
|
|
36
|
+
"check": "smartd.temperature",
|
|
37
|
+
"metadata_path": "metadata.csv"
|
|
38
|
+
},
|
|
39
|
+
"service_checks": {
|
|
40
|
+
"metadata_path": "assets/service_checks.json"
|
|
41
|
+
}
|
|
42
|
+
},
|
|
43
|
+
"dashboards": {
|
|
44
|
+
"smartd Overview": "assets/dashboards/smartd_overview.json"
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=0.11.2"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "datadog-smartd"
|
|
7
|
+
description = "The smartd check"
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
license = "MIT"
|
|
10
|
+
requires-python = ">=3.13"
|
|
11
|
+
keywords = ["datadog", "datadog agent", "datadog check", "smartd", "smart"]
|
|
12
|
+
authors = [{ name = "Community" }]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 4 - Beta",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"Intended Audience :: System Administrators",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3.13",
|
|
19
|
+
"Topic :: System :: Monitoring",
|
|
20
|
+
]
|
|
21
|
+
dependencies = ["datadog-checks-base>=37.33.0"]
|
|
22
|
+
dynamic = ["version"]
|
|
23
|
+
|
|
24
|
+
[project.optional-dependencies]
|
|
25
|
+
deps = []
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Source = "https://github.com/DataDog/integrations-extras"
|
|
29
|
+
|
|
30
|
+
[tool.hatch.version]
|
|
31
|
+
path = "datadog_checks/smartd/__about__.py"
|
|
32
|
+
|
|
33
|
+
[tool.hatch.build.targets.sdist]
|
|
34
|
+
include = ["/datadog_checks", "/tests", "/manifest.json"]
|
|
35
|
+
|
|
36
|
+
[tool.hatch.build.targets.wheel]
|
|
37
|
+
include = ["/datadog_checks/smartd"]
|
|
38
|
+
dev-mode-dirs = ["."]
|
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
4
|
+
FIXTURE_DIR = os.path.join(HERE, 'fixtures')
|
|
5
|
+
|
|
6
|
+
INSTANCE = {
|
|
7
|
+
'smartd_state_dir': FIXTURE_DIR,
|
|
8
|
+
'file_pattern': 'smartd.*.state',
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
CHECK_NAME = 'smartd'
|
|
12
|
+
|
|
13
|
+
HEALTHY_MODEL = 'ACME_DISK4000'
|
|
14
|
+
HEALTHY_SERIAL = 'SN123456789ABC'
|
|
15
|
+
HEALTHY_TAGS = ['device_model:{}'.format(HEALTHY_MODEL), 'serial_number:{}'.format(HEALTHY_SERIAL)]
|
|
16
|
+
|
|
17
|
+
DEGRADED_MODEL = 'ACME_DISK4000'
|
|
18
|
+
DEGRADED_SERIAL = 'SN987654321XYZ'
|
|
19
|
+
DEGRADED_TAGS = ['device_model:{}'.format(DEGRADED_MODEL), 'serial_number:{}'.format(DEGRADED_SERIAL)]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from datadog_checks.smartd import SmartdCheck
|
|
4
|
+
|
|
5
|
+
from .common import CHECK_NAME, INSTANCE
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture(scope='session')
|
|
9
|
+
def dd_environment():
|
|
10
|
+
yield INSTANCE
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@pytest.fixture
|
|
14
|
+
def check():
|
|
15
|
+
return SmartdCheck(CHECK_NAME, {}, [INSTANCE])
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# smartd state file
|
|
2
|
+
ata-smart-attribute.0.id = 1
|
|
3
|
+
ata-smart-attribute.0.val = 100
|
|
4
|
+
ata-smart-attribute.0.worst = 100
|
|
5
|
+
ata-smart-attribute.1.id = 2
|
|
6
|
+
ata-smart-attribute.1.val = 138
|
|
7
|
+
ata-smart-attribute.1.worst = 138
|
|
8
|
+
ata-smart-attribute.1.raw = 76
|
|
9
|
+
ata-smart-attribute.2.id = 3
|
|
10
|
+
ata-smart-attribute.2.val = 133
|
|
11
|
+
ata-smart-attribute.2.worst = 133
|
|
12
|
+
ata-smart-attribute.2.raw = 38683869672
|
|
13
|
+
ata-smart-attribute.3.id = 4
|
|
14
|
+
ata-smart-attribute.3.val = 100
|
|
15
|
+
ata-smart-attribute.3.worst = 100
|
|
16
|
+
ata-smart-attribute.3.raw = 29
|
|
17
|
+
ata-smart-attribute.4.id = 5
|
|
18
|
+
ata-smart-attribute.4.val = 100
|
|
19
|
+
ata-smart-attribute.4.worst = 100
|
|
20
|
+
ata-smart-attribute.5.id = 7
|
|
21
|
+
ata-smart-attribute.5.val = 100
|
|
22
|
+
ata-smart-attribute.5.worst = 100
|
|
23
|
+
ata-smart-attribute.6.id = 8
|
|
24
|
+
ata-smart-attribute.6.val = 138
|
|
25
|
+
ata-smart-attribute.6.worst = 138
|
|
26
|
+
ata-smart-attribute.6.raw = 27
|
|
27
|
+
ata-smart-attribute.7.id = 9
|
|
28
|
+
ata-smart-attribute.7.val = 87
|
|
29
|
+
ata-smart-attribute.7.worst = 87
|
|
30
|
+
ata-smart-attribute.7.raw = 91000
|
|
31
|
+
ata-smart-attribute.8.id = 10
|
|
32
|
+
ata-smart-attribute.8.val = 100
|
|
33
|
+
ata-smart-attribute.8.worst = 100
|
|
34
|
+
ata-smart-attribute.9.id = 12
|
|
35
|
+
ata-smart-attribute.9.val = 100
|
|
36
|
+
ata-smart-attribute.9.worst = 100
|
|
37
|
+
ata-smart-attribute.9.raw = 29
|
|
38
|
+
ata-smart-attribute.10.id = 192
|
|
39
|
+
ata-smart-attribute.10.val = 100
|
|
40
|
+
ata-smart-attribute.10.worst = 100
|
|
41
|
+
ata-smart-attribute.10.raw = 1168
|
|
42
|
+
ata-smart-attribute.11.id = 193
|
|
43
|
+
ata-smart-attribute.11.val = 100
|
|
44
|
+
ata-smart-attribute.11.worst = 100
|
|
45
|
+
ata-smart-attribute.11.raw = 1168
|
|
46
|
+
ata-smart-attribute.12.id = 194
|
|
47
|
+
ata-smart-attribute.12.val = 162
|
|
48
|
+
ata-smart-attribute.12.worst = 162
|
|
49
|
+
ata-smart-attribute.12.raw = 201864314917
|
|
50
|
+
ata-smart-attribute.13.id = 196
|
|
51
|
+
ata-smart-attribute.13.val = 100
|
|
52
|
+
ata-smart-attribute.13.worst = 100
|
|
53
|
+
ata-smart-attribute.14.id = 197
|
|
54
|
+
ata-smart-attribute.14.val = 100
|
|
55
|
+
ata-smart-attribute.14.worst = 100
|
|
56
|
+
ata-smart-attribute.15.id = 198
|
|
57
|
+
ata-smart-attribute.15.val = 100
|
|
58
|
+
ata-smart-attribute.15.worst = 100
|
|
59
|
+
ata-smart-attribute.16.id = 199
|
|
60
|
+
ata-smart-attribute.16.val = 200
|
|
61
|
+
ata-smart-attribute.16.worst = 200
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# smartd state file
|
|
2
|
+
ata-smart-attribute.0.id = 1
|
|
3
|
+
ata-smart-attribute.0.val = 98
|
|
4
|
+
ata-smart-attribute.0.worst = 95
|
|
5
|
+
ata-smart-attribute.0.raw = 12
|
|
6
|
+
ata-smart-attribute.1.id = 2
|
|
7
|
+
ata-smart-attribute.1.val = 130
|
|
8
|
+
ata-smart-attribute.1.worst = 130
|
|
9
|
+
ata-smart-attribute.1.raw = 80
|
|
10
|
+
ata-smart-attribute.2.id = 3
|
|
11
|
+
ata-smart-attribute.2.val = 120
|
|
12
|
+
ata-smart-attribute.2.worst = 120
|
|
13
|
+
ata-smart-attribute.2.raw = 42949672960
|
|
14
|
+
ata-smart-attribute.3.id = 4
|
|
15
|
+
ata-smart-attribute.3.val = 100
|
|
16
|
+
ata-smart-attribute.3.worst = 100
|
|
17
|
+
ata-smart-attribute.3.raw = 45
|
|
18
|
+
ata-smart-attribute.4.id = 5
|
|
19
|
+
ata-smart-attribute.4.val = 95
|
|
20
|
+
ata-smart-attribute.4.worst = 95
|
|
21
|
+
ata-smart-attribute.4.raw = 16
|
|
22
|
+
ata-smart-attribute.5.id = 7
|
|
23
|
+
ata-smart-attribute.5.val = 100
|
|
24
|
+
ata-smart-attribute.5.worst = 100
|
|
25
|
+
ata-smart-attribute.6.id = 8
|
|
26
|
+
ata-smart-attribute.6.val = 135
|
|
27
|
+
ata-smart-attribute.6.worst = 135
|
|
28
|
+
ata-smart-attribute.6.raw = 30
|
|
29
|
+
ata-smart-attribute.7.id = 9
|
|
30
|
+
ata-smart-attribute.7.val = 75
|
|
31
|
+
ata-smart-attribute.7.worst = 75
|
|
32
|
+
ata-smart-attribute.7.raw = 105000
|
|
33
|
+
ata-smart-attribute.8.id = 10
|
|
34
|
+
ata-smart-attribute.8.val = 100
|
|
35
|
+
ata-smart-attribute.8.worst = 100
|
|
36
|
+
ata-smart-attribute.9.id = 12
|
|
37
|
+
ata-smart-attribute.9.val = 100
|
|
38
|
+
ata-smart-attribute.9.worst = 100
|
|
39
|
+
ata-smart-attribute.9.raw = 45
|
|
40
|
+
ata-smart-attribute.10.id = 192
|
|
41
|
+
ata-smart-attribute.10.val = 100
|
|
42
|
+
ata-smart-attribute.10.worst = 100
|
|
43
|
+
ata-smart-attribute.10.raw = 2000
|
|
44
|
+
ata-smart-attribute.11.id = 193
|
|
45
|
+
ata-smart-attribute.11.val = 100
|
|
46
|
+
ata-smart-attribute.11.worst = 100
|
|
47
|
+
ata-smart-attribute.11.raw = 2000
|
|
48
|
+
ata-smart-attribute.12.id = 194
|
|
49
|
+
ata-smart-attribute.12.val = 150
|
|
50
|
+
ata-smart-attribute.12.worst = 140
|
|
51
|
+
ata-smart-attribute.12.raw = 201864314921
|
|
52
|
+
ata-smart-attribute.13.id = 196
|
|
53
|
+
ata-smart-attribute.13.val = 95
|
|
54
|
+
ata-smart-attribute.13.worst = 95
|
|
55
|
+
ata-smart-attribute.13.raw = 16
|
|
56
|
+
ata-smart-attribute.14.id = 197
|
|
57
|
+
ata-smart-attribute.14.val = 100
|
|
58
|
+
ata-smart-attribute.14.worst = 100
|
|
59
|
+
ata-smart-attribute.14.raw = 2
|
|
60
|
+
ata-smart-attribute.15.id = 198
|
|
61
|
+
ata-smart-attribute.15.val = 100
|
|
62
|
+
ata-smart-attribute.15.worst = 100
|
|
63
|
+
ata-smart-attribute.16.id = 199
|
|
64
|
+
ata-smart-attribute.16.val = 200
|
|
65
|
+
ata-smart-attribute.16.worst = 200
|
|
66
|
+
ata-smart-attribute.16.raw = 3
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from unittest.mock import patch
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from datadog_checks.base import AgentCheck
|
|
7
|
+
from datadog_checks.smartd import SmartdCheck
|
|
8
|
+
|
|
9
|
+
from .common import (
|
|
10
|
+
CHECK_NAME,
|
|
11
|
+
DEGRADED_TAGS,
|
|
12
|
+
FIXTURE_DIR,
|
|
13
|
+
HEALTHY_TAGS,
|
|
14
|
+
INSTANCE,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
pytestmark = pytest.mark.unit
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_check_healthy_and_degraded(aggregator, dd_run_check):
|
|
21
|
+
check = SmartdCheck(CHECK_NAME, {}, [INSTANCE])
|
|
22
|
+
dd_run_check(check)
|
|
23
|
+
|
|
24
|
+
# Healthy drive metrics
|
|
25
|
+
aggregator.assert_metric('smartd.raw_read_error_rate', value=0, tags=HEALTHY_TAGS)
|
|
26
|
+
aggregator.assert_metric('smartd.reallocated_sectors', value=0, tags=HEALTHY_TAGS)
|
|
27
|
+
aggregator.assert_metric('smartd.power_on_hours', value=91000, tags=HEALTHY_TAGS)
|
|
28
|
+
aggregator.assert_metric('smartd.spin_retry_count', value=0, tags=HEALTHY_TAGS)
|
|
29
|
+
aggregator.assert_metric('smartd.power_cycle_count', value=29, tags=HEALTHY_TAGS)
|
|
30
|
+
aggregator.assert_metric('smartd.temperature', value=37, tags=HEALTHY_TAGS)
|
|
31
|
+
aggregator.assert_metric('smartd.reallocated_event_count', value=0, tags=HEALTHY_TAGS)
|
|
32
|
+
aggregator.assert_metric('smartd.current_pending_sectors', value=0, tags=HEALTHY_TAGS)
|
|
33
|
+
aggregator.assert_metric('smartd.offline_uncorrectable', value=0, tags=HEALTHY_TAGS)
|
|
34
|
+
aggregator.assert_metric('smartd.udma_crc_error_count', value=0, tags=HEALTHY_TAGS)
|
|
35
|
+
|
|
36
|
+
# Degraded drive metrics
|
|
37
|
+
aggregator.assert_metric('smartd.reallocated_sectors', value=16, tags=DEGRADED_TAGS)
|
|
38
|
+
aggregator.assert_metric('smartd.current_pending_sectors', value=2, tags=DEGRADED_TAGS)
|
|
39
|
+
aggregator.assert_metric('smartd.udma_crc_error_count', value=3, tags=DEGRADED_TAGS)
|
|
40
|
+
aggregator.assert_metric('smartd.temperature', value=41, tags=DEGRADED_TAGS)
|
|
41
|
+
aggregator.assert_metric('smartd.power_on_hours', value=105000, tags=DEGRADED_TAGS)
|
|
42
|
+
|
|
43
|
+
# Service checks
|
|
44
|
+
aggregator.assert_service_check('smartd.disk_health', AgentCheck.OK, tags=HEALTHY_TAGS)
|
|
45
|
+
aggregator.assert_service_check('smartd.disk_health', AgentCheck.WARNING, tags=DEGRADED_TAGS)
|
|
46
|
+
aggregator.assert_service_check('smartd.can_read', AgentCheck.OK)
|
|
47
|
+
|
|
48
|
+
aggregator.assert_all_metrics_covered()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_check_no_files(aggregator, dd_run_check):
|
|
52
|
+
instance = {
|
|
53
|
+
'smartd_state_dir': '/nonexistent/path',
|
|
54
|
+
}
|
|
55
|
+
check = SmartdCheck(CHECK_NAME, {}, [instance])
|
|
56
|
+
dd_run_check(check)
|
|
57
|
+
|
|
58
|
+
aggregator.assert_service_check('smartd.can_read', AgentCheck.CRITICAL)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_check_empty_file(aggregator, dd_run_check, tmp_path):
|
|
62
|
+
state_file = tmp_path / 'smartd.EMPTY_DRIVE-SERIAL000.ata.state'
|
|
63
|
+
state_file.write_text('')
|
|
64
|
+
|
|
65
|
+
instance = {
|
|
66
|
+
'smartd_state_dir': str(tmp_path),
|
|
67
|
+
}
|
|
68
|
+
check = SmartdCheck(CHECK_NAME, {}, [instance])
|
|
69
|
+
dd_run_check(check)
|
|
70
|
+
|
|
71
|
+
tags = ['device_model:EMPTY_DRIVE', 'serial_number:SERIAL000']
|
|
72
|
+
aggregator.assert_service_check('smartd.disk_health', AgentCheck.OK, tags=tags)
|
|
73
|
+
aggregator.assert_service_check('smartd.can_read', AgentCheck.OK)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_check_malformed_lines(aggregator, dd_run_check, tmp_path):
|
|
77
|
+
state_file = tmp_path / 'smartd.TEST_DRIVE-SERIAL001.ata.state'
|
|
78
|
+
state_file.write_text(
|
|
79
|
+
'# comment line\n'
|
|
80
|
+
'garbage line\n'
|
|
81
|
+
'ata-smart-attribute.0.id = 194\n'
|
|
82
|
+
'ata-smart-attribute.0.val = 160\n'
|
|
83
|
+
'ata-smart-attribute.0.raw = 201864314917\n'
|
|
84
|
+
'ata-smart-attribute.0.bad_field = 999\n'
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
instance = {
|
|
88
|
+
'smartd_state_dir': str(tmp_path),
|
|
89
|
+
}
|
|
90
|
+
check = SmartdCheck(CHECK_NAME, {}, [instance])
|
|
91
|
+
dd_run_check(check)
|
|
92
|
+
|
|
93
|
+
tags = ['device_model:TEST_DRIVE', 'serial_number:SERIAL001']
|
|
94
|
+
aggregator.assert_metric('smartd.temperature', value=37, tags=tags)
|
|
95
|
+
aggregator.assert_service_check('smartd.disk_health', AgentCheck.OK, tags=tags)
|
|
96
|
+
aggregator.assert_service_check('smartd.can_read', AgentCheck.OK)
|
|
97
|
+
aggregator.assert_all_metrics_covered()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_check_unparseable_filename(aggregator, dd_run_check, tmp_path):
|
|
101
|
+
state_file = tmp_path / 'smartd.bad-filename.state'
|
|
102
|
+
state_file.write_text('ata-smart-attribute.0.id = 194\n')
|
|
103
|
+
|
|
104
|
+
instance = {
|
|
105
|
+
'smartd_state_dir': str(tmp_path),
|
|
106
|
+
'file_pattern': 'smartd.*.state',
|
|
107
|
+
}
|
|
108
|
+
check = SmartdCheck(CHECK_NAME, {}, [instance])
|
|
109
|
+
dd_run_check(check)
|
|
110
|
+
|
|
111
|
+
# File is found so can_read is OK, but no metrics emitted for unparseable filename
|
|
112
|
+
aggregator.assert_service_check('smartd.can_read', AgentCheck.OK)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_custom_tags(aggregator, dd_run_check, tmp_path):
|
|
116
|
+
state_file = tmp_path / 'smartd.TAG_DRIVE-SERIAL002.ata.state'
|
|
117
|
+
state_file.write_text(
|
|
118
|
+
'ata-smart-attribute.0.id = 194\n'
|
|
119
|
+
'ata-smart-attribute.0.val = 160\n'
|
|
120
|
+
'ata-smart-attribute.0.raw = 201864314917\n'
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
instance = {
|
|
124
|
+
'smartd_state_dir': str(tmp_path),
|
|
125
|
+
'tags': ['datacenter:us-east', 'rack:42'],
|
|
126
|
+
}
|
|
127
|
+
check = SmartdCheck(CHECK_NAME, {}, [instance])
|
|
128
|
+
dd_run_check(check)
|
|
129
|
+
|
|
130
|
+
expected_tags = ['device_model:TAG_DRIVE', 'serial_number:SERIAL002', 'datacenter:us-east', 'rack:42']
|
|
131
|
+
aggregator.assert_metric('smartd.temperature', value=37, tags=expected_tags)
|
|
132
|
+
aggregator.assert_all_metrics_covered()
|