dwipe 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,643 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Structured Logger with JSON Lines format and weighted-age trimming.
4
+ ERR entries age 10x slower than other entries, so they persist longer.
5
+ """
6
+ import os
7
+ import sys
8
+ import json
9
+ import inspect
10
+ from pathlib import Path
11
+ from datetime import datetime
12
+ from dataclasses import dataclass, asdict, field
13
+ from typing import Optional, List, Dict, Any
14
+
15
+ # ============================================================================
16
+ # Data Classes for Structured Logging
17
+ # ============================================================================
18
+
19
+ @dataclass
20
+ class LogEntry:
21
+ """Structured log entry for JSON Lines format."""
22
+ timestamp: str
23
+ level: str # 'ERR', 'OK', 'MSG', 'DBG', etc.
24
+ file: str
25
+ line: int
26
+ function: str
27
+ module: str = ""
28
+ message: str = ""
29
+ data: Dict[str, Any] = field(default_factory=dict)
30
+ session_id: str = ""
31
+ _raw: str = "" # Original raw message
32
+
33
+ def to_dict(self) -> Dict[str, Any]:
34
+ """Convert to dictionary for JSON serialization."""
35
+ result = asdict(self)
36
+ # Remove private fields
37
+ result.pop('_raw', None)
38
+ return result
39
+
40
+ @staticmethod
41
+ def from_dict(data: Dict[str, Any]) -> 'LogEntry':
42
+ """Safely create LogEntry from dict, filtering unknown fields."""
43
+ # Only extract known fields to avoid TypeError from extra fields
44
+ known_fields = {
45
+ 'timestamp', 'level', 'file', 'line', 'function',
46
+ 'module', 'message', 'data', 'session_id'
47
+ }
48
+ filtered = {k: v for k, v in data.items() if k in known_fields}
49
+ return LogEntry(**filtered)
50
+
51
+ @property
52
+ def location(self) -> str:
53
+ """Short location string for display."""
54
+ return f"{self.file}:{self.line}"
55
+
56
+ @property
57
+ def display_summary(self) -> str:
58
+ """
59
+ Extract display-friendly summary from message.
60
+
61
+ If message contains JSON with 'filebase' or 'filepath', use that.
62
+ Otherwise return truncated message or location.
63
+ """
64
+ if not self.message:
65
+ return f"{self.file}:{self.line} {self.function}()"
66
+
67
+ # Try to extract filebase from JSON in message
68
+ if '{' in self.message:
69
+ try:
70
+ json_start = self.message.index('{')
71
+ json_str = self.message[json_start:]
72
+ data = json.loads(json_str)
73
+ filebase = data.get('filebase', data.get('filepath', None))
74
+ if filebase:
75
+ return filebase
76
+ except (json.JSONDecodeError, ValueError, KeyError):
77
+ pass
78
+
79
+ # Fall back to truncated message
80
+ return self.message[:70]
81
+
82
+ @staticmethod
83
+ def format_time_delta(seconds: float, signed: bool = False) -> str:
84
+ """
85
+ Format time delta in compact form (e.g., '18h39m', '5d3h').
86
+
87
+ Args:
88
+ seconds: Time difference in seconds
89
+ signed: If True, include '-' prefix for negative values
90
+
91
+ Returns:
92
+ Compact time string (e.g., '2h30m', '5d', '45s')
93
+ """
94
+ ago = int(max(0, abs(seconds)))
95
+ divs = (60, 60, 24, 7, 52, 9999999)
96
+ units = ('s', 'm', 'h', 'd', 'w', 'y')
97
+ vals = (ago % 60, int(ago / 60)) # seed with secs, mins
98
+ uidx = 1 # best units
99
+
100
+ for div in divs[1:]:
101
+ if vals[1] < div:
102
+ break
103
+ vals = (vals[1] % div, int(vals[1] / div))
104
+ uidx += 1
105
+
106
+ rv = '-' if signed and seconds < 0 else ''
107
+ rv += f'{vals[1]}{units[uidx]}' if vals[1] else ''
108
+ rv += f'{vals[0]:d}{units[uidx-1]}'
109
+ return rv
110
+
111
+ def format_ago(self) -> str:
112
+ """
113
+ Format this entry's timestamp as relative time (e.g., '5m', '2h39m').
114
+
115
+ Returns:
116
+ Compact relative time string, or '???' if timestamp is invalid
117
+ """
118
+ try:
119
+ ts = datetime.fromisoformat(self.timestamp)
120
+ now = datetime.now()
121
+ delta = now - ts
122
+ return LogEntry.format_time_delta(delta.total_seconds())
123
+ except (ValueError, AttributeError):
124
+ return "???"
125
+
126
+ # ============================================================================
127
+ # Main Logger Class
128
+ # ============================================================================
129
+
130
+ class StructuredLogger:
131
+ """
132
+ Structured logger using JSON Lines format with single log file
133
+ and weighted-age trimming (ERR entries age 10x slower).
134
+ """
135
+
136
+ # Size limits (adjust as needed)
137
+ MAX_LOG_SIZE = 10 * 1024 * 1024 # 10 MB
138
+ TRIM_TO_RATIO = 0.67 # Trim to 67% when max exceeded
139
+ ERR_AGE_WEIGHT = 10 # ERR entries age 10x slower
140
+
141
+ def __init__(self, app_name: str = 'rmbloat',
142
+ log_dir: Optional[Path] = None,
143
+ session_id: str = ""):
144
+ """
145
+ Initialize the structured logger.
146
+
147
+ Args:
148
+ app_name: Application name for log directory
149
+ log_dir: Optional override for log directory
150
+ session_id: Optional session identifier for log correlation
151
+ """
152
+ self.app_name = app_name
153
+ self.session_id = session_id or datetime.now().strftime("%Y%m%d_%H%M%S")
154
+ self._setup_paths(log_dir)
155
+
156
+ # Statistics
157
+ self.stats = {
158
+ 'entries_written': 0,
159
+ 'last_trim': datetime.now()
160
+ }
161
+
162
+ def _fix_ownership(self, path: Path) -> None:
163
+ """Fix file/directory ownership to real user when running with sudo."""
164
+ real_user = os.environ.get('SUDO_USER')
165
+ if real_user:
166
+ try:
167
+ import pwd
168
+ pw_record = pwd.getpwnam(real_user)
169
+ uid, gid = pw_record.pw_uid, pw_record.pw_gid
170
+ os.chown(path, uid, gid)
171
+ except (OSError, KeyError):
172
+ pass # Ignore permission errors and missing users
173
+
174
+ def _setup_paths(self, log_dir: Optional[Path]) -> None:
175
+ """Set up log directory and file paths."""
176
+ try:
177
+ if log_dir:
178
+ # Use provided directory exactly as specified
179
+ self.log_dir = Path(log_dir)
180
+ else:
181
+ # Construct default path from app_name
182
+ base_dir = Path.home() / '.config'
183
+ self.log_dir = base_dir / self.app_name
184
+
185
+ self.log_dir.mkdir(parents=True, exist_ok=True)
186
+ self._fix_ownership(self.log_dir)
187
+
188
+ # Single log file (JSON Lines format)
189
+ self.log_file = self.log_dir / "events.jsonl"
190
+
191
+ except Exception as e:
192
+ print(f"FATAL: Cannot setup log directory: {e}", file=sys.stderr)
193
+ # Fallback to current directory
194
+ self.log_dir = Path.cwd()
195
+ self.log_file = Path("events.jsonl")
196
+
197
+ def _get_caller_info(self, depth: int = 3) -> tuple:
198
+ """Get caller information from stack frame."""
199
+ try:
200
+ frame = inspect.currentframe()
201
+ for _ in range(depth):
202
+ if frame:
203
+ frame = frame.f_back
204
+
205
+ if frame:
206
+ return (
207
+ Path(frame.f_code.co_filename).name,
208
+ frame.f_lineno,
209
+ frame.f_code.co_name,
210
+ frame.f_code.co_filename.split('/')[-2] if '/' in frame.f_code.co_filename else ""
211
+ )
212
+ except Exception:
213
+ pass
214
+ return ("unknown", 0, "unknown", "")
215
+
216
+ def _create_log_entry(self, level: str, *args,
217
+ data: Optional[Dict] = None,
218
+ **kwargs) -> LogEntry:
219
+ """Create a structured log entry."""
220
+ file, line, function, module = self._get_caller_info()
221
+ timestamp = datetime.now().isoformat()
222
+ message = " ".join(str(arg) for arg in args)
223
+
224
+ return LogEntry(
225
+ timestamp=timestamp,
226
+ level=level,
227
+ file=file,
228
+ line=line,
229
+ function=function,
230
+ module=module,
231
+ message=message,
232
+ data=data or {},
233
+ session_id=self.session_id,
234
+ _raw=message
235
+ )
236
+
237
+ def _append_log(self, entry: LogEntry) -> None:
238
+ """
239
+ Append entry to log file, trimming if necessary.
240
+
241
+ Args:
242
+ entry: Log entry to append
243
+ """
244
+ # Check if we need to trim
245
+ if self.log_file.exists() and self.log_file.stat().st_size >= self.MAX_LOG_SIZE:
246
+ self._trim_log_file()
247
+
248
+ # Write the entry
249
+ try:
250
+ with open(self.log_file, 'a', encoding='utf-8') as f:
251
+ json_line = json.dumps(entry.to_dict())
252
+ f.write(json_line + '\n')
253
+
254
+ self.stats['entries_written'] += 1
255
+
256
+ # Fix ownership after writing
257
+ self._fix_ownership(self.log_file)
258
+
259
+ except Exception as e:
260
+ print(f"LOG WRITE ERROR: {e}", file=sys.stderr)
261
+
262
+ def _trim_log_file(self) -> None:
263
+ """
264
+ Trim log file by removing oldest entries (by weighted age) until size < MAX_LOG_SIZE * TRIM_TO_RATIO.
265
+ ERR entries have age/ERR_AGE_WEIGHT, so they're kept longer.
266
+ """
267
+ if not self.log_file.exists():
268
+ return
269
+
270
+ try:
271
+ # Read all entries
272
+ entries = [] # (timestamp, level, line, line_size)
273
+ with open(self.log_file, 'r', encoding='utf-8') as f:
274
+ while True:
275
+ line = f.readline()
276
+ if not line:
277
+ break
278
+ if line.strip():
279
+ try:
280
+ data = json.loads(line)
281
+ timestamp = datetime.fromisoformat(data['timestamp'])
282
+ level = data.get('level', 'OK')
283
+ line_size = len(line.encode('utf-8'))
284
+ entries.append((timestamp, level, line, line_size))
285
+ except (json.JSONDecodeError, KeyError, ValueError):
286
+ pass # Skip malformed entries
287
+
288
+ # Calculate effective age for each entry
289
+ now = datetime.now()
290
+ weighted = [] # (effective_age_seconds, line, line_size)
291
+ for timestamp, level, line, line_size in entries:
292
+ age_seconds = (now - timestamp).total_seconds()
293
+ # ERRs age slower (kept longer)
294
+ effective_age = age_seconds / self.ERR_AGE_WEIGHT if level == 'ERR' else age_seconds
295
+ weighted.append((effective_age, line, line_size))
296
+
297
+ # Sort by effective age (oldest first)
298
+ weighted.sort(key=lambda x: x[0])
299
+
300
+ # Target: keep newest entries until total size <= MAX_LOG_SIZE * TRIM_TO_RATIO
301
+ target_size = int(self.MAX_LOG_SIZE * self.TRIM_TO_RATIO)
302
+ kept = []
303
+ total_size = 0
304
+
305
+ # Work backwards (newest first) until we hit target
306
+ for effective_age, line, line_size in reversed(weighted):
307
+ if total_size + line_size <= target_size:
308
+ kept.append(line)
309
+ total_size += line_size
310
+ # else: discard (too old with weighted age)
311
+
312
+ # Write back (reverse to restore chronological order)
313
+ kept.reverse()
314
+ with open(self.log_file, 'w', encoding='utf-8') as f:
315
+ f.writelines(kept)
316
+
317
+ self.stats['last_trim'] = datetime.now()
318
+
319
+ # Fix ownership after trimming
320
+ self._fix_ownership(self.log_file)
321
+
322
+ except Exception as e:
323
+ print(f"TRIM ERROR: {e}", file=sys.stderr)
324
+
325
+
326
+ # ========================================================================
327
+ # Public API
328
+ # ========================================================================
329
+
330
+ def event(self, *args, data: Optional[Dict] = None, **kwargs) -> None:
331
+ """Log an event (successful operation)."""
332
+ entry = self._create_log_entry("OK", *args, data=data, **kwargs)
333
+ self._append_log(entry)
334
+
335
+ def error(self, *args, data: Optional[Dict] = None, **kwargs) -> None:
336
+ """Log an error."""
337
+ entry = self._create_log_entry("ERR", *args, data=data, **kwargs)
338
+ self._append_log(entry)
339
+
340
+ # Also print to stderr for immediate visibility
341
+ print(f"ERROR: {args[0] if args else ''}", file=sys.stderr)
342
+ if data:
343
+ print(f" Data: {json.dumps(data, indent=2)[:200]}...", file=sys.stderr)
344
+
345
+ def info(self, *args, data: Optional[Dict] = None, **kwargs) -> None:
346
+ """Log informational message."""
347
+ entry = self._create_log_entry("MSG", *args, data=data, **kwargs)
348
+ self._append_log(entry)
349
+
350
+ def debug(self, *args, data: Optional[Dict] = None, **kwargs) -> None:
351
+ """Log debug message."""
352
+ entry = self._create_log_entry("DBG", *args, data=data, **kwargs)
353
+ self._append_log(entry)
354
+
355
+ # ========================================================================
356
+ # Backward Compatibility Aliases (for RotatingLogger API)
357
+ # ========================================================================
358
+
359
+ def lg(self, *args, **kwargs) -> None:
360
+ """
361
+ Alias for info() - backward compatibility with RotatingLogger.
362
+
363
+ Logs an ordinary message with a 'MSG' tag.
364
+ Supports both simple messages and lists of strings.
365
+ """
366
+ # Handle list of strings like RotatingLogger did
367
+ if args and isinstance(args[0], list):
368
+ list_message = '\n'.join(str(item) for item in args[0])
369
+ args = (list_message,) + args[1:]
370
+
371
+ self.info(*args, **kwargs)
372
+
373
+ def err(self, *args, **kwargs) -> None:
374
+ """
375
+ Alias for error() - backward compatibility with RotatingLogger.
376
+
377
+ Logs an error message with an 'ERR' tag.
378
+ Supports both simple messages and lists of strings.
379
+ """
380
+ # Handle list of strings like RotatingLogger did
381
+ if args and isinstance(args[0], list):
382
+ list_message = '\n'.join(str(item) for item in args[0])
383
+ args = (list_message,) + args[1:]
384
+
385
+ self.error(*args, **kwargs)
386
+
387
+ def put(self, message_type: str, *args, **kwargs) -> None:
388
+ """
389
+ Alias for custom level logging - backward compatibility with RotatingLogger.
390
+
391
+ Logs a message with an arbitrary MESSAGE_TYPE tag.
392
+ Supports both simple messages and lists of strings.
393
+ """
394
+ # Handle list of strings like RotatingLogger did
395
+ if args and isinstance(args[0], list):
396
+ list_message = '\n'.join(str(item) for item in args[0])
397
+ args = (list_message,) + args[1:]
398
+
399
+ # Create entry with custom level
400
+ # Extract data from kwargs to avoid passing it twice
401
+ data = kwargs.pop('data', None)
402
+ entry = self._create_log_entry(str(message_type).upper(), *args, data=data)
403
+ self._append_log(entry)
404
+
405
+ # ========================================================================
406
+ # Filtering and Search Methods
407
+ # ========================================================================
408
+
409
+ @staticmethod
410
+ def filter_entries(entries: List[LogEntry], pattern: str,
411
+ deep: bool = False) -> tuple[List[LogEntry], set]:
412
+ """
413
+ Filter log entries by pattern (case-insensitive).
414
+
415
+ Args:
416
+ entries: List of LogEntry objects to filter
417
+ pattern: Search pattern (case-insensitive)
418
+ deep: If True, also search within JSON data in messages
419
+
420
+ Returns:
421
+ (filtered_entries, deep_match_timestamps): Tuple of filtered list and
422
+ set of timestamps that matched only in deep (JSON) search
423
+ """
424
+ if not pattern:
425
+ return entries, set()
426
+
427
+ pattern_lower = pattern.lower()
428
+ filtered = []
429
+ deep_matches = set()
430
+
431
+ for entry in entries:
432
+ # Build visible text (what shows in collapsed view)
433
+ timestamp_short = entry.timestamp[:19]
434
+ level = entry.level
435
+ summary = entry.display_summary
436
+
437
+ # Shallow search: check visible text
438
+ visible_text = f"{timestamp_short} {level} {summary}".lower()
439
+ shallow_match = pattern_lower in visible_text
440
+
441
+ # Deep search: check JSON content if requested
442
+ deep_match = False
443
+ if deep:
444
+ try:
445
+ # Search in the structured data field
446
+ if entry.data:
447
+ json_str = json.dumps(entry.data)
448
+ deep_match = pattern_lower in json_str.lower()
449
+ # Also search in message if it contains JSON
450
+ if not deep_match and '{' in entry.message:
451
+ json_start = entry.message.index('{')
452
+ json_str = entry.message[json_start:]
453
+ deep_match = pattern_lower in json_str.lower()
454
+ except (ValueError, IndexError, TypeError):
455
+ pass
456
+
457
+ # Include if either match
458
+ if shallow_match or deep_match:
459
+ filtered.append(entry)
460
+ # Mark as deep-only match
461
+ if deep_match and not shallow_match:
462
+ deep_matches.add(entry.timestamp)
463
+
464
+ return filtered, deep_matches
465
+
466
+ # ========================================================================
467
+ # Query Methods - Window-based for efficient incremental reads
468
+ # ========================================================================
469
+
470
+ def get_window_of_entries(self, window_size: int = 1000):
471
+ """
472
+ Get a window of log entries (newest first).
473
+ Returns: (entries_dict, window_state)
474
+ entries_dict: OrderedDict keyed by timestamp
475
+ window_state: dict with 'file_size' and 'last_position'
476
+ """
477
+ from collections import OrderedDict
478
+
479
+ entries = OrderedDict()
480
+ if not self.log_file.exists():
481
+ return entries, {'file_size': 0, 'last_position': 0}
482
+
483
+ file_size = self.log_file.stat().st_size
484
+
485
+ try:
486
+ with open(self.log_file, 'r', encoding='utf-8') as f:
487
+ while True:
488
+ line = f.readline()
489
+ if not line:
490
+ break
491
+ if line.strip():
492
+ try:
493
+ entry_dict = json.loads(line)
494
+ entry = LogEntry.from_dict(entry_dict)
495
+ entries[entry.timestamp] = entry
496
+ except (json.JSONDecodeError, TypeError, KeyError):
497
+ pass
498
+ except Exception:
499
+ pass
500
+
501
+ # Keep only newest window_size entries
502
+ if len(entries) > window_size:
503
+ # OrderedDict preserves insertion order (chronological)
504
+ # Keep last window_size items
505
+ items = list(entries.items())
506
+ entries = OrderedDict(items[-window_size:])
507
+
508
+ window_state = {
509
+ 'file_size': file_size,
510
+ 'last_position': file_size # Next read starts here
511
+ }
512
+
513
+ return entries, window_state
514
+
515
+ def refresh_window(self, window, window_state, window_size: int = 1000):
516
+ """
517
+ Refresh window with new entries from log file.
518
+ Returns: updated (entries_dict, window_state)
519
+ """
520
+ from collections import OrderedDict
521
+
522
+ if not self.log_file.exists():
523
+ return window, window_state
524
+
525
+ current_file_size = self.log_file.stat().st_size
526
+ last_file_size = window_state.get('file_size', 0)
527
+ last_position = window_state.get('last_position', 0)
528
+
529
+ # File was trimmed (size dropped), reset and re-read
530
+ if current_file_size < last_file_size:
531
+ return self.get_window_of_entries(window_size)
532
+
533
+ # No new data
534
+ if current_file_size == last_position:
535
+ return window, window_state
536
+
537
+ # Read new entries from last position
538
+ try:
539
+ with open(self.log_file, 'r', encoding='utf-8') as f:
540
+ f.seek(last_position)
541
+ while True:
542
+ line = f.readline()
543
+ if not line:
544
+ break
545
+ if line.strip():
546
+ try:
547
+ entry_dict = json.loads(line)
548
+ entry = LogEntry.from_dict(entry_dict)
549
+ window[entry.timestamp] = entry
550
+ except (json.JSONDecodeError, TypeError, KeyError):
551
+ pass
552
+
553
+ new_position = f.tell()
554
+ except Exception:
555
+ new_position = last_position
556
+
557
+ # Trim window if too large (keep newest)
558
+ if len(window) > window_size:
559
+ items = list(window.items())
560
+ window = OrderedDict(items[-window_size:])
561
+
562
+ window_state = {
563
+ 'file_size': current_file_size,
564
+ 'last_position': new_position
565
+ }
566
+
567
+ return window, window_state
568
+
569
+ # ========================================================================
570
+ # Properties
571
+ # ========================================================================
572
+
573
+ @property
574
+ def log_paths(self) -> List[str]:
575
+ """Return list of log file paths (for backward compatibility with -L option)."""
576
+ return [str(self.log_file)]
577
+
578
+
579
+ # ============================================================================
580
+ # Aliases for Backward Compatibility
581
+ # ============================================================================
582
+
583
+ # Alias for standard use (matches RotatingLogger pattern: Log = RotatingLogger)
584
+ Log = StructuredLogger
585
+
586
+ # ============================================================================
587
+ # Example Usage
588
+ # ============================================================================
589
+
590
+ def example_usage():
591
+ """Example of how to use the structured logger."""
592
+
593
+ # Create logger
594
+ logger = StructuredLogger(
595
+ app_name="VideoProcessor",
596
+ session_id="session_12345"
597
+ )
598
+
599
+ print(f"Logs will be written to: {logger.log_dir}")
600
+ print(f"Log file: {logger.log_file}")
601
+
602
+ # Log some events
603
+ logger.info("Starting video processing batch")
604
+
605
+ # Simulate processing
606
+ for i in range(5):
607
+ if i == 2:
608
+ # Log an error with structured data
609
+ logger.error(
610
+ "Failed to encode video",
611
+ data={
612
+ "filepath": f"/videos/video_{i}.mp4",
613
+ "error_code": 183,
614
+ "ffmpeg_output": ["Error opening input", "Invalid data"],
615
+ "attempts": 3
616
+ }
617
+ )
618
+ else:
619
+ # Log a successful event
620
+ logger.event(
621
+ f"Successfully encoded video_{i}",
622
+ data={
623
+ "filepath": f"/videos/video_{i}.mp4",
624
+ "original_size": 1000000,
625
+ "encoded_size": 500000,
626
+ "reduction": "50%",
627
+ "duration_seconds": 120.5
628
+ }
629
+ )
630
+
631
+ logger.info("Batch processing complete")
632
+
633
+ # Get recent entries programmatically using window
634
+ print("\n" + "="*60)
635
+ print("Recent Log Entries (window-based access):")
636
+ window, _ = logger.get_window_of_entries(window_size=10)
637
+ for _, entry in window.items():
638
+ print(f"{entry.timestamp} [{entry.level}] {entry.location}: {entry.message}")
639
+ if entry.data:
640
+ print(f" Data keys: {list(entry.data.keys())}")
641
+
642
+ if __name__ == "__main__":
643
+ example_usage()