kekkai-cli 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
portal/ops/log_shipper.py DELETED
@@ -1,469 +0,0 @@
1
- """Log shipping to external systems for Kekkai Portal.
2
-
3
- Provides centralized logging capabilities:
4
- - Ship logs to syslog servers
5
- - Ship logs to webhook endpoints
6
- - Ship logs to file destinations
7
- - Log integrity verification
8
-
9
- ASVS 5.0 Requirements:
10
- - V16.4.3: Send logs to separate system
11
- - V16.4.2: Log protection
12
- """
13
-
14
- from __future__ import annotations
15
-
16
- import gzip
17
- import hashlib
18
- import json
19
- import logging
20
- import os
21
- import queue
22
- import socket
23
- import ssl
24
- import threading
25
- import time
26
- from dataclasses import dataclass, field
27
- from datetime import UTC, datetime
28
- from enum import Enum
29
- from pathlib import Path
30
- from typing import Any, BinaryIO
31
- from urllib.error import URLError
32
- from urllib.request import Request, urlopen
33
-
34
- logger = logging.getLogger(__name__)
35
-
36
- SHIPPER_VERSION = "1.0"
37
-
38
-
39
- class ShipperType(Enum):
40
- """Type of log destination."""
41
-
42
- SYSLOG = "syslog"
43
- WEBHOOK = "webhook"
44
- FILE = "file"
45
-
46
-
47
- class SyslogFacility(Enum):
48
- """Syslog facility codes."""
49
-
50
- LOCAL0 = 16
51
- LOCAL1 = 17
52
- LOCAL2 = 18
53
- LOCAL3 = 19
54
- LOCAL4 = 20
55
- LOCAL5 = 21
56
- LOCAL6 = 22
57
- LOCAL7 = 23
58
-
59
-
60
- class SyslogSeverity(Enum):
61
- """Syslog severity codes."""
62
-
63
- EMERGENCY = 0
64
- ALERT = 1
65
- CRITICAL = 2
66
- ERROR = 3
67
- WARNING = 4
68
- NOTICE = 5
69
- INFO = 6
70
- DEBUG = 7
71
-
72
-
73
- @dataclass
74
- class LogShipperConfig:
75
- """Configuration for log shipper."""
76
-
77
- shipper_type: ShipperType = ShipperType.FILE
78
- enabled: bool = True
79
-
80
- syslog_host: str = "localhost"
81
- syslog_port: int = 514
82
- syslog_protocol: str = "udp"
83
- syslog_facility: SyslogFacility = SyslogFacility.LOCAL0
84
- syslog_use_tls: bool = False
85
-
86
- webhook_url: str = ""
87
- webhook_auth_header: str = ""
88
- webhook_batch_size: int = 100
89
- webhook_flush_interval: int = 10
90
-
91
- file_path: Path = field(default_factory=lambda: Path("/var/log/kekkai/shipped.jsonl"))
92
- file_rotate_size_mb: int = 100
93
- file_rotate_count: int = 5
94
- file_compress: bool = True
95
-
96
- buffer_size: int = 10000
97
- retry_count: int = 3
98
- retry_delay_seconds: float = 1.0
99
-
100
- include_hash_chain: bool = True
101
- hostname: str = field(default_factory=socket.gethostname)
102
-
103
-
104
- @dataclass
105
- class LogEntry:
106
- """Represents a log entry to be shipped."""
107
-
108
- timestamp: datetime
109
- level: str
110
- message: str
111
- source: str = "kekkai-portal"
112
- metadata: dict[str, Any] = field(default_factory=dict)
113
- entry_hash: str = ""
114
-
115
- def to_dict(self) -> dict[str, Any]:
116
- """Convert to dictionary."""
117
- return {
118
- "timestamp": self.timestamp.isoformat(),
119
- "level": self.level,
120
- "message": self.message,
121
- "source": self.source,
122
- "metadata": self.metadata,
123
- "hash": self.entry_hash,
124
- }
125
-
126
- def to_json(self) -> str:
127
- """Convert to JSON string."""
128
- return json.dumps(self.to_dict(), separators=(",", ":"))
129
-
130
- def to_syslog(self, facility: SyslogFacility, hostname: str) -> str:
131
- """Format as syslog message (RFC 5424)."""
132
- severity = self._level_to_severity()
133
- priority = facility.value * 8 + severity.value
134
-
135
- timestamp = self.timestamp.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
136
- structured_data = "-"
137
-
138
- if self.metadata:
139
- sd_params = " ".join(f'{k}="{v}"' for k, v in self.metadata.items())
140
- structured_data = f"[kekkai@0 {sd_params}]"
141
-
142
- return (
143
- f"<{priority}>1 {timestamp} {hostname} "
144
- f"{self.source} - - {structured_data} {self.message}"
145
- )
146
-
147
- def _level_to_severity(self) -> SyslogSeverity:
148
- """Map log level to syslog severity."""
149
- mapping = {
150
- "DEBUG": SyslogSeverity.DEBUG,
151
- "INFO": SyslogSeverity.INFO,
152
- "WARNING": SyslogSeverity.WARNING,
153
- "ERROR": SyslogSeverity.ERROR,
154
- "CRITICAL": SyslogSeverity.CRITICAL,
155
- }
156
- return mapping.get(self.level.upper(), SyslogSeverity.INFO)
157
-
158
-
159
- class LogShipper:
160
- """Ships logs to external systems."""
161
-
162
- def __init__(self, config: LogShipperConfig) -> None:
163
- self._config = config
164
- self._buffer: queue.Queue[LogEntry] = queue.Queue(maxsize=config.buffer_size)
165
- self._last_hash = "0" * 64
166
- self._lock = threading.Lock()
167
- self._running = False
168
- self._ship_thread: threading.Thread | None = None
169
- self._stats = {"shipped": 0, "failed": 0, "dropped": 0}
170
-
171
- self._socket: socket.socket | None = None
172
- self._file_handle: BinaryIO | None = None
173
-
174
- def start(self) -> None:
175
- """Start the log shipper background thread."""
176
- if not self._config.enabled:
177
- return
178
-
179
- self._running = True
180
- self._ship_thread = threading.Thread(target=self._ship_loop, daemon=True)
181
- self._ship_thread.start()
182
- logger.info("log_shipper.started type=%s", self._config.shipper_type.value)
183
-
184
- def stop(self) -> None:
185
- """Stop the log shipper and flush remaining logs."""
186
- self._running = False
187
- if self._ship_thread:
188
- self._ship_thread.join(timeout=10)
189
-
190
- self._flush_buffer()
191
- self._cleanup()
192
- logger.info(
193
- "log_shipper.stopped shipped=%d failed=%d dropped=%d",
194
- self._stats["shipped"],
195
- self._stats["failed"],
196
- self._stats["dropped"],
197
- )
198
-
199
- def ship(self, entry: LogEntry) -> bool:
200
- """Add a log entry to the shipping queue."""
201
- if not self._config.enabled:
202
- return False
203
-
204
- if self._config.include_hash_chain:
205
- with self._lock:
206
- entry.entry_hash = self._compute_hash(entry)
207
- self._last_hash = entry.entry_hash
208
-
209
- try:
210
- self._buffer.put_nowait(entry)
211
- return True
212
- except queue.Full:
213
- self._stats["dropped"] += 1
214
- logger.warning("log_shipper.buffer_full")
215
- return False
216
-
217
- def ship_dict(self, data: dict[str, Any], level: str = "INFO") -> bool:
218
- """Ship a dictionary as a log entry."""
219
- entry = LogEntry(
220
- timestamp=datetime.now(UTC),
221
- level=level,
222
- message=data.get("message", json.dumps(data)),
223
- source=data.get("source", "kekkai-portal"),
224
- metadata={k: v for k, v in data.items() if k not in ("message", "source")},
225
- )
226
- return self.ship(entry)
227
-
228
- def get_stats(self) -> dict[str, int]:
229
- """Get shipping statistics."""
230
- return dict(self._stats)
231
-
232
- def _ship_loop(self) -> None:
233
- """Background loop to ship logs."""
234
- batch: list[LogEntry] = []
235
- last_flush = time.time()
236
-
237
- while self._running:
238
- try:
239
- entry = self._buffer.get(timeout=1.0)
240
- batch.append(entry)
241
-
242
- should_flush = (
243
- len(batch) >= self._config.webhook_batch_size
244
- or (time.time() - last_flush) >= self._config.webhook_flush_interval
245
- )
246
-
247
- if should_flush:
248
- self._ship_batch(batch)
249
- batch = []
250
- last_flush = time.time()
251
-
252
- except queue.Empty:
253
- if batch and (time.time() - last_flush) >= self._config.webhook_flush_interval:
254
- self._ship_batch(batch)
255
- batch = []
256
- last_flush = time.time()
257
-
258
- if batch:
259
- self._ship_batch(batch)
260
-
261
- def _ship_batch(self, batch: list[LogEntry]) -> None:
262
- """Ship a batch of log entries."""
263
- if not batch:
264
- return
265
-
266
- for attempt in range(self._config.retry_count):
267
- try:
268
- if self._config.shipper_type == ShipperType.SYSLOG:
269
- self._ship_syslog(batch)
270
- elif self._config.shipper_type == ShipperType.WEBHOOK:
271
- self._ship_webhook(batch)
272
- elif self._config.shipper_type == ShipperType.FILE:
273
- self._ship_file(batch)
274
-
275
- self._stats["shipped"] += len(batch)
276
- return
277
-
278
- except Exception as e:
279
- logger.warning(
280
- "log_shipper.retry attempt=%d error=%s",
281
- attempt + 1,
282
- str(e),
283
- )
284
- if attempt < self._config.retry_count - 1:
285
- time.sleep(self._config.retry_delay_seconds * (attempt + 1))
286
-
287
- self._stats["failed"] += len(batch)
288
- logger.error("log_shipper.failed count=%d", len(batch))
289
-
290
- def _ship_syslog(self, batch: list[LogEntry]) -> None:
291
- """Ship logs to syslog server."""
292
- if self._socket is None:
293
- self._connect_syslog()
294
-
295
- for entry in batch:
296
- msg = entry.to_syslog(self._config.syslog_facility, self._config.hostname)
297
- msg_bytes = msg.encode("utf-8")
298
-
299
- if self._config.syslog_protocol == "tcp":
300
- msg_bytes = msg_bytes + b"\n"
301
- self._socket.sendall(msg_bytes) # type: ignore
302
- else:
303
- self._socket.sendto( # type: ignore
304
- msg_bytes, (self._config.syslog_host, self._config.syslog_port)
305
- )
306
-
307
- def _ship_webhook(self, batch: list[LogEntry]) -> None:
308
- """Ship logs to webhook endpoint."""
309
- if not self._config.webhook_url:
310
- raise ValueError("Webhook URL not configured")
311
-
312
- payload = json.dumps(
313
- {
314
- "shipper_version": SHIPPER_VERSION,
315
- "hostname": self._config.hostname,
316
- "timestamp": datetime.now(UTC).isoformat(),
317
- "entries": [e.to_dict() for e in batch],
318
- }
319
- ).encode("utf-8")
320
-
321
- headers = {"Content-Type": "application/json"}
322
- if self._config.webhook_auth_header:
323
- headers["Authorization"] = self._config.webhook_auth_header
324
-
325
- req = Request( # noqa: S310
326
- self._config.webhook_url,
327
- data=payload,
328
- headers=headers,
329
- method="POST",
330
- )
331
-
332
- try:
333
- with urlopen(req, timeout=30) as resp: # noqa: S310
334
- if resp.status >= 400:
335
- raise ValueError(f"Webhook returned {resp.status}")
336
- except URLError as e:
337
- raise ConnectionError(f"Webhook request failed: {e}") from e
338
-
339
- def _ship_file(self, batch: list[LogEntry]) -> None:
340
- """Ship logs to local file."""
341
- self._ensure_file_handle()
342
-
343
- for entry in batch:
344
- line = entry.to_json() + "\n"
345
- self._file_handle.write(line.encode("utf-8")) # type: ignore
346
-
347
- self._file_handle.flush() # type: ignore
348
- self._maybe_rotate_file()
349
-
350
- def _connect_syslog(self) -> None:
351
- """Connect to syslog server."""
352
- if self._config.syslog_protocol == "tcp":
353
- self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
354
- if self._config.syslog_use_tls:
355
- context = ssl.create_default_context()
356
- self._socket = context.wrap_socket(
357
- self._socket, server_hostname=self._config.syslog_host
358
- )
359
- self._socket.connect((self._config.syslog_host, self._config.syslog_port))
360
- else:
361
- self._socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
362
-
363
- def _ensure_file_handle(self) -> None:
364
- """Ensure file handle is open."""
365
- if self._file_handle is not None:
366
- return
367
-
368
- self._config.file_path.parent.mkdir(parents=True, exist_ok=True)
369
- self._file_handle = open(self._config.file_path, "ab") # noqa: SIM115
370
-
371
- def _maybe_rotate_file(self) -> None:
372
- """Rotate log file if needed."""
373
- if not self._file_handle:
374
- return
375
-
376
- max_size = self._config.file_rotate_size_mb * 1024 * 1024
377
- current_size = self._config.file_path.stat().st_size
378
-
379
- if current_size < max_size:
380
- return
381
-
382
- self._file_handle.close()
383
- self._file_handle = None
384
-
385
- for i in range(self._config.file_rotate_count - 1, 0, -1):
386
- old_path = self._config.file_path.with_suffix(f".{i}.jsonl")
387
- new_path = self._config.file_path.with_suffix(f".{i + 1}.jsonl")
388
- if self._config.file_compress:
389
- old_path = old_path.with_suffix(old_path.suffix + ".gz")
390
- new_path = new_path.with_suffix(new_path.suffix + ".gz")
391
- if old_path.exists():
392
- old_path.rename(new_path)
393
-
394
- rotated_path = self._config.file_path.with_suffix(".1.jsonl")
395
- if self._config.file_compress:
396
- with (
397
- open(self._config.file_path, "rb") as f_in,
398
- gzip.open(rotated_path.with_suffix(rotated_path.suffix + ".gz"), "wb") as f_out,
399
- ):
400
- f_out.writelines(f_in)
401
- self._config.file_path.unlink()
402
- else:
403
- self._config.file_path.rename(rotated_path)
404
-
405
- self._ensure_file_handle()
406
- logger.info("log_shipper.file_rotated")
407
-
408
- def _flush_buffer(self) -> None:
409
- """Flush remaining entries from buffer."""
410
- batch: list[LogEntry] = []
411
- while True:
412
- try:
413
- entry = self._buffer.get_nowait()
414
- batch.append(entry)
415
- except queue.Empty:
416
- break
417
-
418
- if batch:
419
- self._ship_batch(batch)
420
-
421
- def _cleanup(self) -> None:
422
- """Clean up resources."""
423
- import contextlib
424
-
425
- if self._socket:
426
- with contextlib.suppress(OSError):
427
- self._socket.close()
428
- self._socket = None
429
-
430
- if self._file_handle:
431
- with contextlib.suppress(OSError):
432
- self._file_handle.close()
433
- self._file_handle = None
434
-
435
- def _compute_hash(self, entry: LogEntry) -> str:
436
- """Compute hash for integrity chain."""
437
- data = f"{self._last_hash}:{entry.to_json()}"
438
- return hashlib.sha256(data.encode()).hexdigest()
439
-
440
-
441
- def create_log_shipper(
442
- shipper_type: ShipperType | str = ShipperType.FILE,
443
- **kwargs: Any,
444
- ) -> LogShipper:
445
- """Create a configured LogShipper instance."""
446
- if isinstance(shipper_type, str):
447
- shipper_type = ShipperType(shipper_type)
448
-
449
- config = LogShipperConfig(shipper_type=shipper_type)
450
-
451
- if shipper_type == ShipperType.SYSLOG:
452
- config.syslog_host = kwargs.get("host", os.environ.get("SYSLOG_HOST", "localhost"))
453
- config.syslog_port = int(kwargs.get("port", os.environ.get("SYSLOG_PORT", "514")))
454
- config.syslog_protocol = kwargs.get("protocol", os.environ.get("SYSLOG_PROTOCOL", "udp"))
455
- config.syslog_use_tls = kwargs.get(
456
- "use_tls", os.environ.get("SYSLOG_USE_TLS", "").lower() == "true"
457
- )
458
-
459
- elif shipper_type == ShipperType.WEBHOOK:
460
- config.webhook_url = kwargs.get("url", os.environ.get("LOG_WEBHOOK_URL", ""))
461
- config.webhook_auth_header = kwargs.get("auth", os.environ.get("LOG_WEBHOOK_AUTH", ""))
462
-
463
- elif shipper_type == ShipperType.FILE:
464
- file_path = kwargs.get(
465
- "path", os.environ.get("LOG_SHIP_PATH", "/var/log/kekkai/shipped.jsonl")
466
- )
467
- config.file_path = Path(file_path)
468
-
469
- return LogShipper(config)