cem_acpt 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/cem_acpt/config/cem_acpt_scan.rb +1 -0
- data/lib/cem_acpt/scan/daemon_client.rb +85 -21
- data/lib/cem_acpt/test_runner.rb +2 -1
- data/lib/cem_acpt/version.rb +1 -1
- data/lib/terraform/gcp/linux/scan/scan_service.rb +53 -4
- data/specifications/CEM-6799.md +294 -0
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1e37459e007c8e05a53fddf7920cefb42223e7e490785a3601372fdf172de82f
|
|
4
|
+
data.tar.gz: 1147801fce2be7b5d0a1847d817449ad2492c8ec04707d5a82713663b2406804
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e3e28851afbfdeeb9a2d4ca0cfc0fb10f304ba121111422416f5a50757fc9ea50bdc30db20149d00b0d04dcf91d548e73c03276863601037417ce1c39a914090
|
|
7
|
+
data.tar.gz: 232de289f1ba433d6e5c25737cadffc8641dae7052075e359e663f935225547b44f3e430f8509c9f2519f8d8abf41204b83c76964eee387cf554f45bafd04b49
|
data/Gemfile.lock
CHANGED
|
@@ -9,33 +9,42 @@ require_relative 'result'
|
|
|
9
9
|
module CemAcpt
|
|
10
10
|
module Scan
|
|
11
11
|
# HTTP client for the on-node scan daemon. Mirrors the role of
|
|
12
|
-
# {CemAcpt::Goss::Api}: build URIs,
|
|
13
|
-
# turn the
|
|
12
|
+
# {CemAcpt::Goss::Api}: build URIs, drive the async scan endpoints, parse
|
|
13
|
+
# JSON responses, turn the result into a typed {Result}.
|
|
14
14
|
#
|
|
15
15
|
# The daemon is installed by {CemAcpt::Provision::Linux#scan_provision_commands}
|
|
16
|
-
# and serves
|
|
16
|
+
# and serves four endpoints on the configurable scan port:
|
|
17
17
|
#
|
|
18
|
-
# GET
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
#
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
18
|
+
# GET /health -> 200 OK once the daemon has started and the scanner
|
|
19
|
+
# binaries it needs are present on disk.
|
|
20
|
+
# POST /scan -> 202 Accepted; kicks off the scan in a background thread.
|
|
21
|
+
# 409 Conflict if a scan is already running.
|
|
22
|
+
# GET /status -> { "state": "idle" | "running" | "done" | "error" }.
|
|
23
|
+
# GET /result -> 200 with the JSON result when state is terminal,
|
|
24
|
+
# 404 otherwise.
|
|
25
|
+
#
|
|
26
|
+
# {#scan} drives the async flow: POST /scan, then poll /status, then
|
|
27
|
+
# GET /result. Each request is short-lived to avoid the connection-drop
|
|
28
|
+
# failure mode that affected the previous synchronous /scan design.
|
|
25
29
|
class DaemonClient
|
|
26
30
|
DEFAULT_PORT = 8084
|
|
27
31
|
DEFAULT_READY_TIMEOUT = 60
|
|
28
|
-
DEFAULT_HTTP_TIMEOUT = 1800 # 30 minutes —
|
|
32
|
+
DEFAULT_HTTP_TIMEOUT = 1800 # 30 minutes — overall deadline for a scan
|
|
33
|
+
DEFAULT_POLL_INTERVAL = 10
|
|
34
|
+
DEFAULT_REQUEST_TIMEOUT = 60
|
|
29
35
|
|
|
30
36
|
# @param host [String] The IP or DNS name of the test node.
|
|
31
37
|
# @param port [Integer] The port the daemon listens on.
|
|
32
38
|
# @param ready_timeout [Integer] How long to wait for /health.
|
|
33
|
-
# @param http_timeout [Integer]
|
|
34
|
-
|
|
39
|
+
# @param http_timeout [Integer] Overall deadline for a scan (POST + polls + result fetch).
|
|
40
|
+
# @param poll_interval [Integer] Seconds between /status polls.
|
|
41
|
+
def initialize(host:, port: DEFAULT_PORT, ready_timeout: DEFAULT_READY_TIMEOUT,
|
|
42
|
+
http_timeout: DEFAULT_HTTP_TIMEOUT, poll_interval: DEFAULT_POLL_INTERVAL)
|
|
35
43
|
@host = host
|
|
36
44
|
@port = port
|
|
37
45
|
@ready_timeout = ready_timeout
|
|
38
46
|
@http_timeout = http_timeout
|
|
47
|
+
@poll_interval = poll_interval
|
|
39
48
|
end
|
|
40
49
|
|
|
41
50
|
# Polls /health until it returns 200 or the timeout elapses.
|
|
@@ -56,35 +65,90 @@ module CemAcpt
|
|
|
56
65
|
raise DaemonNotReadyError, msg
|
|
57
66
|
end
|
|
58
67
|
|
|
59
|
-
#
|
|
68
|
+
# Kicks off the scan, polls until it finishes, and fetches the result.
|
|
60
69
|
# @param test_case [String] Acceptance-test directory name.
|
|
61
70
|
# @param scanner [Symbol] :openscap or :ciscat.
|
|
62
71
|
# @param profile [String] Scanner-native profile id.
|
|
63
72
|
# @param threshold [Float] Pass threshold (0-100).
|
|
64
73
|
# @return [Result]
|
|
65
|
-
# @raise [ScannerInvocationError] on
|
|
74
|
+
# @raise [ScannerInvocationError] on protocol errors or scan deadline exceeded.
|
|
66
75
|
def scan(test_case:, scanner:, profile:, threshold:)
|
|
67
|
-
|
|
68
|
-
|
|
76
|
+
start_scan
|
|
77
|
+
wait_for_completion
|
|
78
|
+
body = fetch_result
|
|
79
|
+
Result.new(test_case: test_case, scanner: scanner, profile: profile, threshold: threshold, body: body)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
def start_scan
|
|
85
|
+
status, body = post(URI("http://#{@host}:#{@port}/scan"), timeout: DEFAULT_REQUEST_TIMEOUT)
|
|
86
|
+
return if status.to_i == 202
|
|
87
|
+
|
|
88
|
+
raise ScannerInvocationError, "Scan kickoff at #{@host}:#{@port} returned status #{status}: #{body.inspect}"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def wait_for_completion
|
|
92
|
+
deadline = Time.now + @http_timeout
|
|
93
|
+
loop do
|
|
94
|
+
raise ScannerInvocationError, "Scan did not finish within #{@http_timeout}s" if Time.now > deadline
|
|
95
|
+
|
|
96
|
+
_status, body = get(URI("http://#{@host}:#{@port}/status"), timeout: DEFAULT_REQUEST_TIMEOUT)
|
|
97
|
+
case body['state']
|
|
98
|
+
when 'done', 'error' then return
|
|
99
|
+
when 'running' then sleep @poll_interval
|
|
100
|
+
else
|
|
101
|
+
raise ScannerInvocationError, "Unexpected scan state from #{@host}:#{@port}: #{body['state'].inspect}"
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def fetch_result
|
|
107
|
+
status, body = get(URI("http://#{@host}:#{@port}/result"), timeout: DEFAULT_REQUEST_TIMEOUT)
|
|
69
108
|
unless status.to_i == 200
|
|
70
109
|
raise ScannerInvocationError, "Scan daemon at #{@host}:#{@port} returned status #{status}: #{body.inspect}"
|
|
71
110
|
end
|
|
72
111
|
|
|
73
|
-
|
|
112
|
+
body
|
|
74
113
|
end
|
|
75
114
|
|
|
76
|
-
private
|
|
77
|
-
|
|
78
115
|
def get(uri, timeout:)
|
|
116
|
+
request(uri, Net::HTTP::Get.new(uri.request_uri), timeout: timeout)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def post(uri, timeout:)
|
|
120
|
+
request(uri, Net::HTTP::Post.new(uri.request_uri), timeout: timeout)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def request(uri, req, timeout:)
|
|
79
124
|
http = Net::HTTP.new(uri.host, uri.port)
|
|
80
125
|
http.read_timeout = timeout
|
|
81
126
|
http.open_timeout = [timeout, 30].min
|
|
82
|
-
|
|
127
|
+
http.start
|
|
128
|
+
enable_tcp_keepalive(http)
|
|
129
|
+
response = http.request(req)
|
|
83
130
|
body = response.body.to_s
|
|
84
131
|
parsed = body.empty? ? {} : JSON.parse(body)
|
|
85
132
|
[response.code, parsed]
|
|
86
133
|
rescue JSON::ParserError => e
|
|
87
134
|
raise ScannerInvocationError, "Scan daemon at #{@host}:#{@port} returned non-JSON body: #{e.message}"
|
|
135
|
+
ensure
|
|
136
|
+
http.finish if http.started?
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def enable_tcp_keepalive(http)
|
|
140
|
+
socket = http.instance_variable_get(:@socket)&.io
|
|
141
|
+
return unless socket
|
|
142
|
+
|
|
143
|
+
socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_KEEPALIVE, true)
|
|
144
|
+
# Linux uses TCP_KEEPIDLE; macOS uses TCP_KEEPALIVE for the same idle-time option.
|
|
145
|
+
idle_opt = Socket.const_defined?(:TCP_KEEPIDLE) ? Socket::TCP_KEEPIDLE : Socket::TCP_KEEPALIVE
|
|
146
|
+
socket.setsockopt(Socket::IPPROTO_TCP, idle_opt, 60)
|
|
147
|
+
socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPINTVL, 10)
|
|
148
|
+
socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPCNT, 6)
|
|
149
|
+
rescue Errno::ENOPROTOOPT
|
|
150
|
+
# Platform supports SO_KEEPALIVE but not the fine-grained probe constants;
|
|
151
|
+
# SO_KEEPALIVE alone was already set and provides partial protection.
|
|
88
152
|
end
|
|
89
153
|
end
|
|
90
154
|
end
|
data/lib/cem_acpt/test_runner.rb
CHANGED
|
@@ -315,6 +315,7 @@ module CemAcpt
|
|
|
315
315
|
logger.info('CemAcpt::TestRunner') { 'Running scan action...' }
|
|
316
316
|
port = config.get('cem_acpt_scan.daemon.port') || CemAcpt::Scan::DaemonClient::DEFAULT_PORT
|
|
317
317
|
ready_timeout = config.get('cem_acpt_scan.daemon.ready_timeout') || CemAcpt::Scan::DaemonClient::DEFAULT_READY_TIMEOUT
|
|
318
|
+
poll_interval = config.get('cem_acpt_scan.daemon.poll_interval') || CemAcpt::Scan::DaemonClient::DEFAULT_POLL_INTERVAL
|
|
318
319
|
global_threshold = (config.get('cem_acpt_scan.threshold') || 80.0).to_f
|
|
319
320
|
per_case_thresholds = config.get('cem_acpt_scan.test_thresholds') || {}
|
|
320
321
|
scan_output = config.get('cem_acpt_scan.scan_output')
|
|
@@ -326,7 +327,7 @@ module CemAcpt
|
|
|
326
327
|
scan_meta = scan_meta_for(test_name)
|
|
327
328
|
threshold = per_case_thresholds[test_name] || per_case_thresholds[test_name.to_sym] || global_threshold
|
|
328
329
|
|
|
329
|
-
client = CemAcpt::Scan::DaemonClient.new(host: host, port: port, ready_timeout: ready_timeout)
|
|
330
|
+
client = CemAcpt::Scan::DaemonClient.new(host: host, port: port, ready_timeout: ready_timeout, poll_interval: poll_interval)
|
|
330
331
|
client.wait_until_ready
|
|
331
332
|
result = client.scan(
|
|
332
333
|
test_case: test_name,
|
data/lib/cem_acpt/version.rb
CHANGED
|
@@ -127,6 +127,13 @@ def perform_scan
|
|
|
127
127
|
end
|
|
128
128
|
end
|
|
129
129
|
|
|
130
|
+
# Module-level scan state guarded by a Mutex. POST /scan flips state to
|
|
131
|
+
# 'running' and spawns a background thread; the thread parks the result and
|
|
132
|
+
# flips state to 'done' or 'error' on completion. GET /status and GET /result
|
|
133
|
+
# read the state under the mutex. One scan in flight per daemon.
|
|
134
|
+
SCAN_STATE = { state: 'idle', result: nil, started_at: nil }
|
|
135
|
+
SCAN_MUTEX = Mutex.new
|
|
136
|
+
|
|
130
137
|
port = ENV.fetch('SCAN_DAEMON_PORT', '8084').to_i
|
|
131
138
|
server = WEBrick::HTTPServer.new(Port: port, BindAddress: '0.0.0.0')
|
|
132
139
|
|
|
@@ -136,11 +143,53 @@ server.mount_proc('/health') do |_req, res|
|
|
|
136
143
|
res.body = JSON.generate('status' => 'ok')
|
|
137
144
|
end
|
|
138
145
|
|
|
139
|
-
server.mount_proc('/scan') do |
|
|
140
|
-
|
|
141
|
-
|
|
146
|
+
server.mount_proc('/scan') do |req, res|
|
|
147
|
+
res['Content-Type'] = 'application/json'
|
|
148
|
+
unless req.request_method == 'POST'
|
|
149
|
+
res.status = 405
|
|
150
|
+
res.body = JSON.generate('error' => 'use POST /scan')
|
|
151
|
+
next
|
|
152
|
+
end
|
|
153
|
+
SCAN_MUTEX.synchronize do
|
|
154
|
+
if SCAN_STATE[:state] == 'running'
|
|
155
|
+
res.status = 409
|
|
156
|
+
res.body = JSON.generate('error' => 'scan already running')
|
|
157
|
+
next
|
|
158
|
+
end
|
|
159
|
+
SCAN_STATE[:state] = 'running'
|
|
160
|
+
SCAN_STATE[:started_at] = Time.now
|
|
161
|
+
SCAN_STATE[:result] = nil
|
|
162
|
+
end
|
|
163
|
+
Thread.new do
|
|
164
|
+
payload = perform_scan
|
|
165
|
+
SCAN_MUTEX.synchronize do
|
|
166
|
+
SCAN_STATE[:state] = payload['error'] ? 'error' : 'done'
|
|
167
|
+
SCAN_STATE[:result] = payload
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
res.status = 202
|
|
171
|
+
res.body = JSON.generate('status' => 'started')
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
server.mount_proc('/status') do |_req, res|
|
|
175
|
+
res['Content-Type'] = 'application/json'
|
|
176
|
+
SCAN_MUTEX.synchronize do
|
|
177
|
+
res.status = 200
|
|
178
|
+
res.body = JSON.generate('state' => SCAN_STATE[:state])
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
server.mount_proc('/result') do |_req, res|
|
|
142
183
|
res['Content-Type'] = 'application/json'
|
|
143
|
-
|
|
184
|
+
SCAN_MUTEX.synchronize do
|
|
185
|
+
if %w[done error].include?(SCAN_STATE[:state])
|
|
186
|
+
res.status = 200
|
|
187
|
+
res.body = JSON.generate(SCAN_STATE[:result])
|
|
188
|
+
else
|
|
189
|
+
res.status = 404
|
|
190
|
+
res.body = JSON.generate('error' => "no result available, state: #{SCAN_STATE[:state]}")
|
|
191
|
+
end
|
|
192
|
+
end
|
|
144
193
|
end
|
|
145
194
|
|
|
146
195
|
trap('INT') { server.shutdown }
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
# CEM-6799 — Async scan protocol and TCP keepalive on DaemonClient
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
`DaemonClient#scan` makes a single synchronous `GET /scan` and holds the TCP
|
|
6
|
+
connection open for the full scan duration (15–30 minutes for STIG/CIS
|
|
7
|
+
benchmarks). Two failure modes follow from this:
|
|
8
|
+
|
|
9
|
+
1. **Idle connection drop.** During the long wait no data flows; GCP's VPC
|
|
10
|
+
firewall, Cloud NAT, or OS-level connection tracking drops the idle session
|
|
11
|
+
and the client raises `Errno::ECONNRESET` or `Errno::ENOTCONN` at
|
|
12
|
+
`read_nonblock`. Confirmed in local runs (`ENOTCONN` after ~1007 s) and
|
|
13
|
+
GitHub Actions CI (`ECONNRESET` after ~912 s).
|
|
14
|
+
2. **Response delivery failure.** Even when the scan completes successfully on
|
|
15
|
+
the node and writes its result file to disk, the response body cannot be
|
|
16
|
+
delivered back over a connection that has been idle long enough to be torn
|
|
17
|
+
down. Confirmed on Ubuntu 24.04: `ciscat-results.json` (81 KB) and
|
|
18
|
+
`ciscat-results-ARF.xml` (8.7 MB) both written to
|
|
19
|
+
`/opt/cem_acpt/scan/reports/`, but the client raised `ENOTCONN` at 736 s
|
|
20
|
+
while reading the response.
|
|
21
|
+
|
|
22
|
+
The fix has two parts:
|
|
23
|
+
|
|
24
|
+
1. **Async scan protocol.** `/scan` becomes a kickoff endpoint that returns
|
|
25
|
+
immediately; the daemon runs the scan in a background thread. The client
|
|
26
|
+
polls a new `/status` endpoint with short-lived HTTP calls, then fetches the
|
|
27
|
+
result via a new `/result` endpoint once the daemon reports completion.
|
|
28
|
+
Long-lived connections are eliminated entirely.
|
|
29
|
+
2. **TCP keepalive on `DaemonClient#get`.** Belt-and-suspenders: even the
|
|
30
|
+
short-lived polling calls enable `SO_KEEPALIVE` so any transient idle period
|
|
31
|
+
inside a single HTTP exchange is protected.
|
|
32
|
+
|
|
33
|
+
## Functional Behavior
|
|
34
|
+
|
|
35
|
+
### Daemon endpoints — `lib/terraform/gcp/linux/scan/scan_service.rb`
|
|
36
|
+
|
|
37
|
+
**Before:**
|
|
38
|
+
|
|
39
|
+
- `GET /health` — 200 OK when daemon is up.
|
|
40
|
+
- `GET /scan` — blocks for the scan duration; returns 200 with result JSON or
|
|
41
|
+
500 with an error.
|
|
42
|
+
|
|
43
|
+
**After:**
|
|
44
|
+
|
|
45
|
+
- `GET /health` — unchanged.
|
|
46
|
+
- `POST /scan` — kicks off the scan in a background thread; returns `202
|
|
47
|
+
Accepted` with `{"status": "started"}` immediately. Returns `409 Conflict`
|
|
48
|
+
with `{"error": "scan already running"}` if a scan is already in flight.
|
|
49
|
+
- `GET /status` — new. Returns `{"state": "idle" | "running" | "done" |
|
|
50
|
+
"error"}` so the client can poll.
|
|
51
|
+
- `GET /result` — new. Returns `200` with the result JSON when the daemon's
|
|
52
|
+
state is `done` or `error`; `404` with `{"error": "no result available, state:
|
|
53
|
+
<current>"}` otherwise.
|
|
54
|
+
|
|
55
|
+
Scan state is held in module-level variables guarded by a `Mutex`. There is no
|
|
56
|
+
new class — the daemon script stays script-shaped:
|
|
57
|
+
|
|
58
|
+
```ruby
|
|
59
|
+
SCAN_STATE = { state: 'idle', result: nil, started_at: nil }
|
|
60
|
+
SCAN_MUTEX = Mutex.new
|
|
61
|
+
|
|
62
|
+
server.mount_proc('/scan') do |req, res|
|
|
63
|
+
res['Content-Type'] = 'application/json'
|
|
64
|
+
unless req.request_method == 'POST'
|
|
65
|
+
res.status = 405
|
|
66
|
+
res.body = JSON.generate('error' => 'use POST /scan')
|
|
67
|
+
next
|
|
68
|
+
end
|
|
69
|
+
SCAN_MUTEX.synchronize do
|
|
70
|
+
if SCAN_STATE[:state] == 'running'
|
|
71
|
+
res.status = 409
|
|
72
|
+
res.body = JSON.generate('error' => 'scan already running')
|
|
73
|
+
next
|
|
74
|
+
end
|
|
75
|
+
SCAN_STATE[:state] = 'running'
|
|
76
|
+
SCAN_STATE[:started_at] = Time.now
|
|
77
|
+
SCAN_STATE[:result] = nil
|
|
78
|
+
end
|
|
79
|
+
Thread.new do
|
|
80
|
+
result = perform_scan
|
|
81
|
+
SCAN_MUTEX.synchronize do
|
|
82
|
+
SCAN_STATE[:state] = result['error'] ? 'error' : 'done'
|
|
83
|
+
SCAN_STATE[:result] = result
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
res.status = 202
|
|
87
|
+
res.body = JSON.generate('status' => 'started')
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
server.mount_proc('/status') do |_req, res|
|
|
91
|
+
res['Content-Type'] = 'application/json'
|
|
92
|
+
SCAN_MUTEX.synchronize do
|
|
93
|
+
res.status = 200
|
|
94
|
+
res.body = JSON.generate('state' => SCAN_STATE[:state])
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
server.mount_proc('/result') do |_req, res|
|
|
99
|
+
res['Content-Type'] = 'application/json'
|
|
100
|
+
SCAN_MUTEX.synchronize do
|
|
101
|
+
if %w[done error].include?(SCAN_STATE[:state])
|
|
102
|
+
res.status = 200
|
|
103
|
+
res.body = JSON.generate(SCAN_STATE[:result])
|
|
104
|
+
else
|
|
105
|
+
res.status = 404
|
|
106
|
+
res.body = JSON.generate('error' => "no result available, state: #{SCAN_STATE[:state]}")
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Client — `DaemonClient#scan` (`lib/cem_acpt/scan/daemon_client.rb`)
|
|
113
|
+
|
|
114
|
+
**Before:** single `GET /scan` with a 30-minute `read_timeout`.
|
|
115
|
+
|
|
116
|
+
**After:** POST → poll → GET. Each request is a short-lived connection.
|
|
117
|
+
|
|
118
|
+
```ruby
|
|
119
|
+
DEFAULT_POLL_INTERVAL = 10
|
|
120
|
+
DEFAULT_REQUEST_TIMEOUT = 60
|
|
121
|
+
|
|
122
|
+
def scan(test_case:, scanner:, profile:, threshold:)
|
|
123
|
+
start_scan
|
|
124
|
+
wait_for_completion
|
|
125
|
+
body = fetch_result
|
|
126
|
+
Result.new(test_case: test_case, scanner: scanner, profile: profile, threshold: threshold, body: body)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
private
|
|
130
|
+
|
|
131
|
+
def start_scan
|
|
132
|
+
status, body = post(URI("http://#{@host}:#{@port}/scan"), timeout: DEFAULT_REQUEST_TIMEOUT)
|
|
133
|
+
return if status.to_i == 202
|
|
134
|
+
raise ScannerInvocationError, "Scan kickoff at #{@host}:#{@port} returned status #{status}: #{body.inspect}"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def wait_for_completion
|
|
138
|
+
deadline = Time.now + @http_timeout
|
|
139
|
+
loop do
|
|
140
|
+
raise ScannerInvocationError, "Scan did not finish within #{@http_timeout}s" if Time.now > deadline
|
|
141
|
+
|
|
142
|
+
_status, body = get(URI("http://#{@host}:#{@port}/status"), timeout: DEFAULT_REQUEST_TIMEOUT)
|
|
143
|
+
case body['state']
|
|
144
|
+
when 'done', 'error' then return
|
|
145
|
+
when 'running' then sleep @poll_interval
|
|
146
|
+
else
|
|
147
|
+
raise ScannerInvocationError, "Unexpected scan state: #{body['state'].inspect}"
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def fetch_result
|
|
153
|
+
status, body = get(URI("http://#{@host}:#{@port}/result"), timeout: DEFAULT_REQUEST_TIMEOUT)
|
|
154
|
+
raise ScannerInvocationError, "Scan daemon at #{@host}:#{@port} returned status #{status}: #{body.inspect}" unless status.to_i == 200
|
|
155
|
+
body
|
|
156
|
+
end
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
`@poll_interval` defaults to `DEFAULT_POLL_INTERVAL` (10 s); configurable via
|
|
160
|
+
`cem_acpt_scan.daemon.poll_interval`.
|
|
161
|
+
|
|
162
|
+
A new private `post` helper mirrors `get` (same keepalive setup, same JSON
|
|
163
|
+
handling, no request body needed for `POST /scan` so it sends an empty body).
|
|
164
|
+
|
|
165
|
+
### TCP keepalive — `DaemonClient#enable_tcp_keepalive`
|
|
166
|
+
|
|
167
|
+
Unchanged from the current keepalive implementation. Sets `SO_KEEPALIVE`,
|
|
168
|
+
`TCP_KEEPIDLE` (Linux) / `TCP_KEEPALIVE` (macOS), `TCP_KEEPINTVL`,
|
|
169
|
+
`TCP_KEEPCNT` on every socket opened by `get` and `post`. Kept as
|
|
170
|
+
belt-and-suspenders even though connections are now short-lived.
|
|
171
|
+
|
|
172
|
+
## Non-Goals
|
|
173
|
+
|
|
174
|
+
- Persisting scan state across daemon restarts (state is in memory; a daemon
|
|
175
|
+
crash mid-scan loses the result).
|
|
176
|
+
- Multiple concurrent scans on a single node. The daemon serializes to one scan
|
|
177
|
+
at a time; concurrent `POST /scan` returns 409.
|
|
178
|
+
- Cancelling an in-flight scan via the API.
|
|
179
|
+
- Authentication on the scan endpoints. The daemon is firewalled to the
|
|
180
|
+
provisioning host; this is unchanged.
|
|
181
|
+
|
|
182
|
+
## Configuration
|
|
183
|
+
|
|
184
|
+
New config key:
|
|
185
|
+
|
|
186
|
+
- `cem_acpt_scan.daemon.poll_interval` — Integer, seconds between `/status`
|
|
187
|
+
polls. Default: `10`.
|
|
188
|
+
|
|
189
|
+
Existing keys unchanged:
|
|
190
|
+
|
|
191
|
+
- `cem_acpt_scan.daemon.port` — daemon port.
|
|
192
|
+
- `cem_acpt_scan.daemon.ready_timeout` — `/health` poll timeout.
|
|
193
|
+
- The host-side `http_timeout` (default 1800 s) is the total deadline for a
|
|
194
|
+
scan, including all polling, not the timeout for any single HTTP request.
|
|
195
|
+
|
|
196
|
+
## Edge Cases
|
|
197
|
+
|
|
198
|
+
- **Daemon crashes mid-scan.** The result file may still be on disk in
|
|
199
|
+
`/opt/cem_acpt/scan/reports/`, but the daemon's in-memory state is gone. On
|
|
200
|
+
daemon restart, `/status` returns `idle` and the client's poll loop will
|
|
201
|
+
receive an `Unexpected scan state` once it reconnects. Acceptable for now;
|
|
202
|
+
persistence is a non-goal.
|
|
203
|
+
- **`POST /scan` while a scan is running.** Returns 409. The client treats this
|
|
204
|
+
as an error.
|
|
205
|
+
- **`GET /result` while scan is running.** Returns 404 with state diagnostic.
|
|
206
|
+
The client only calls `/result` after `/status` reports a terminal state, so
|
|
207
|
+
this is for safety.
|
|
208
|
+
- **Background thread raises.** `perform_scan` already wraps errors and returns
|
|
209
|
+
a hash with an `error` key. The thread sets state to `error` and stores the
|
|
210
|
+
hash; the client picks it up via `/result`. The existing `ScannerInvocationError`
|
|
211
|
+
path in `DaemonClient` handles this when `Result.new` sees an error body.
|
|
212
|
+
- **macOS without `TCP_KEEPIDLE`.** The keepalive code already handles this
|
|
213
|
+
via `Socket.const_defined?(:TCP_KEEPIDLE)` falling back to
|
|
214
|
+
`Socket::TCP_KEEPALIVE`.
|
|
215
|
+
|
|
216
|
+
## Constraints / Invariants
|
|
217
|
+
|
|
218
|
+
- `DaemonClient` public interface is unchanged: `wait_until_ready` and `scan`
|
|
219
|
+
with the same signatures and return shapes.
|
|
220
|
+
- The daemon script remains a single-file standalone systemd service.
|
|
221
|
+
- One scan at a time per daemon. The daemon is per-node, so this matches the
|
|
222
|
+
per-test-case model.
|
|
223
|
+
|
|
224
|
+
## Alternatives Considered
|
|
225
|
+
|
|
226
|
+
**SCP the result file off the node.** After kicking off the scan, fetch the
|
|
227
|
+
result JSON via SSH/SCP from `/opt/cem_acpt/scan/reports/` rather than HTTP.
|
|
228
|
+
Rejected — would require threading SSH credentials through the scan layer (the
|
|
229
|
+
scan path currently uses HTTP only) and would bypass the daemon's error
|
|
230
|
+
encoding.
|
|
231
|
+
|
|
232
|
+
**Keep `GET /scan` synchronous with TCP keepalive only.** This was the original
|
|
233
|
+
CEM-6799 plan. Rejected because Ubuntu 24.04 testing on 2026-06-23 showed the
|
|
234
|
+
scan completing successfully but the response body being undeliverable: the
|
|
235
|
+
connection had been torn down despite keepalive probes. Keepalive on its own is
|
|
236
|
+
not sufficient.
|
|
237
|
+
|
|
238
|
+
## Tests
|
|
239
|
+
|
|
240
|
+
### `spec/cem_acpt/scan/daemon_client_spec.rb`
|
|
241
|
+
|
|
242
|
+
- Update existing `#scan` examples to stub three sequential calls: a `POST
|
|
243
|
+
/scan` returning 202, one or more `GET /status` returning `running` then
|
|
244
|
+
`done`, and a `GET /result` returning the JSON body.
|
|
245
|
+
- Add an example: `POST /scan` returns 409 → `ScannerInvocationError` raised.
|
|
246
|
+
- Add an example: `GET /status` returns an unrecognized state →
|
|
247
|
+
`ScannerInvocationError` raised.
|
|
248
|
+
- Add an example: poll loop hits `@http_timeout` without `done` →
|
|
249
|
+
`ScannerInvocationError` raised with "did not finish within" message.
|
|
250
|
+
- Existing `#enable_tcp_keepalive` examples unchanged.
|
|
251
|
+
|
|
252
|
+
### `spec/terraform/gcp/linux/scan/scan_service_spec.rb`
|
|
253
|
+
|
|
254
|
+
- Add text-grep assertions that `/status` and `/result` `mount_proc` blocks are
|
|
255
|
+
present, mirroring the style of the existing flag-presence assertions.
|
|
256
|
+
- Add behavioral coverage: load `scan_service.rb` under the existing WEBrick
|
|
257
|
+
stub harness, drive the new endpoints directly. Stub `perform_scan` to return
|
|
258
|
+
a fixed hash. Assert state transitions `idle → running → done` and that
|
|
259
|
+
`/result` returns the stubbed hash.
|
|
260
|
+
|
|
261
|
+
### Fixture mirror
|
|
262
|
+
|
|
263
|
+
- `spec/fixtures/config_testing/user_config_dir/terraform/gcp/linux/scan/scan_service.rb`
|
|
264
|
+
updated in lockstep.
|
|
265
|
+
- `spec/fixtures/config_testing/user_config_dir/terraform_checksum.txt`
|
|
266
|
+
regenerated.
|
|
267
|
+
|
|
268
|
+
## Acceptance Criteria
|
|
269
|
+
|
|
270
|
+
- [ ] `POST /scan` kicks off the scan in a background thread and returns 202.
|
|
271
|
+
- [ ] `POST /scan` returns 409 when a scan is already running.
|
|
272
|
+
- [ ] `GET /status` returns the current state.
|
|
273
|
+
- [ ] `GET /result` returns 200 with the result when state is terminal, 404 otherwise.
|
|
274
|
+
- [ ] `DaemonClient#scan` posts, polls, and fetches the result with short-lived requests.
|
|
275
|
+
- [ ] Poll interval is configurable via `cem_acpt_scan.daemon.poll_interval` (default 10 s).
|
|
276
|
+
- [ ] TCP keepalive remains enabled on every `DaemonClient#get` / `#post` call.
|
|
277
|
+
- [ ] Existing `daemon_client_spec.rb` examples pass after update.
|
|
278
|
+
- [ ] New unit tests for the async client flow pass.
|
|
279
|
+
- [ ] New behavioral tests for `/status` and `/result` pass.
|
|
280
|
+
- [ ] Long scans (15+ minutes) no longer fail with `ECONNRESET` or `ENOTCONN`.
|
|
281
|
+
|
|
282
|
+
## Files Touched
|
|
283
|
+
|
|
284
|
+
- `lib/cem_acpt/scan/daemon_client.rb` — async `scan` flow, new private `post`
|
|
285
|
+
helper, keepalive retained.
|
|
286
|
+
- `lib/terraform/gcp/linux/scan/scan_service.rb` — `POST /scan` semantics,
|
|
287
|
+
`/status` and `/result` endpoints, module-level state + mutex.
|
|
288
|
+
- `lib/cem_acpt/config/cem_acpt_scan.rb` — `poll_interval` default under
|
|
289
|
+
`cem_acpt_scan.daemon`.
|
|
290
|
+
- `spec/fixtures/config_testing/user_config_dir/terraform/gcp/linux/scan/scan_service.rb` — fixture mirror update.
|
|
291
|
+
- `spec/fixtures/config_testing/user_config_dir/terraform_checksum.txt` — checksum update.
|
|
292
|
+
- `spec/cem_acpt/scan/daemon_client_spec.rb` — async-flow tests.
|
|
293
|
+
- `spec/terraform/gcp/linux/scan/scan_service_spec.rb` — `/status` and `/result` tests.
|
|
294
|
+
- `specifications/CEM-6799.md` — this file.
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cem_acpt
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.12.
|
|
4
|
+
version: 0.12.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- puppetlabs
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: async-http
|
|
@@ -349,6 +349,7 @@ files:
|
|
|
349
349
|
- specifications/CEM-6762.md
|
|
350
350
|
- specifications/CEM-6765.md
|
|
351
351
|
- specifications/CEM-6798.md
|
|
352
|
+
- specifications/CEM-6799.md
|
|
352
353
|
homepage: https://github.com/puppetlabs/cem_acpt
|
|
353
354
|
licenses:
|
|
354
355
|
- proprietary
|