nginx-lens 0.3.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
commands/logs.py CHANGED
@@ -1,17 +1,37 @@
1
+ import sys
2
+ from typing import Optional, List, Dict, Any
1
3
  import typer
2
4
  from rich.console import Console
3
5
  from rich.table import Table
4
6
  import re
7
+ import gzip
8
+ from datetime import datetime, timedelta
5
9
  from collections import Counter, defaultdict
10
+ from exporter.json_yaml import format_logs_results, print_export
11
+ from exporter.csv import export_logs_to_csv
12
+ from config.config_loader import get_config
6
13
 
7
14
  app = typer.Typer(help="Анализ access.log/error.log: топ-статусы, пути, IP, User-Agent, ошибки.")
8
15
  console = Console()
9
16
 
10
- log_line_re = re.compile(r'(?P<ip>\S+) \S+ \S+ \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<path>\S+) [^\"]+" (?P<status>\d{3})')
17
+ # Улучшенный regex для парсинга nginx access log (поддерживает response time)
18
+ # Формат: IP - - [timestamp] "method path protocol" status size "referer" "user-agent" "response_time"
19
+ log_line_re = re.compile(
20
+ r'(?P<ip>\S+) \S+ \S+ \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<path>\S+) [^\"]+" '
21
+ r'(?P<status>\d{3}) (?P<size>\S+) "(?P<referer>[^"]*)" "(?P<user_agent>[^"]*)"'
22
+ r'(?: "(?P<response_time>[^"]+)")?'
23
+ )
11
24
 
12
25
  def logs(
13
26
  log_path: str = typer.Argument(..., help="Путь к access.log или error.log"),
14
- top: int = typer.Option(10, help="Сколько топ-значений выводить")
27
+ top: Optional[int] = typer.Option(None, help="Сколько топ-значений выводить"),
28
+ json: bool = typer.Option(False, "--json", help="Экспортировать результаты в JSON"),
29
+ yaml: bool = typer.Option(False, "--yaml", help="Экспортировать результаты в YAML"),
30
+ csv: bool = typer.Option(False, "--csv", help="Экспортировать результаты в CSV"),
31
+ since: Optional[str] = typer.Option(None, "--since", help="Фильтр: с даты (формат: YYYY-MM-DD или YYYY-MM-DD HH:MM:SS)"),
32
+ until: Optional[str] = typer.Option(None, "--until", help="Фильтр: до даты (формат: YYYY-MM-DD или YYYY-MM-DD HH:MM:SS)"),
33
+ status: Optional[str] = typer.Option(None, "--status", help="Фильтр по статусам (например: 404,500)"),
34
+ detect_anomalies: bool = typer.Option(False, "--detect-anomalies", help="Обнаруживать аномалии в логах"),
15
35
  ):
16
36
  """
17
37
  Анализирует access.log/error.log.
@@ -22,40 +42,268 @@ def logs(
22
42
  - Топ IP-адресов
23
43
  - Топ User-Agent
24
44
  - Топ путей с ошибками 404/500
45
+ - Анализ времени ответа (если доступно)
46
+ - Обнаружение аномалий
25
47
 
26
48
  Пример:
27
49
  nginx-lens logs /var/log/nginx/access.log --top 20
50
+ nginx-lens logs /var/log/nginx/access.log --since "2024-01-01" --status 404,500
51
+ nginx-lens logs /var/log/nginx/access.log.gz --detect-anomalies --json
28
52
  """
53
+ # Загружаем конфигурацию
54
+ config = get_config()
55
+ defaults = config.get_defaults()
56
+
57
+ # Применяем значения из конфига, если не указаны через CLI
58
+ top = top if top is not None else defaults.get("top", 10)
59
+
60
+ # Парсинг фильтров
61
+ status_filter = None
62
+ if status:
63
+ status_filter = set(s.strip() for s in status.split(','))
64
+
65
+ since_dt = None
66
+ if since:
67
+ try:
68
+ if len(since) == 10: # YYYY-MM-DD
69
+ since_dt = datetime.strptime(since, "%Y-%m-%d")
70
+ else: # YYYY-MM-DD HH:MM:SS
71
+ since_dt = datetime.strptime(since, "%Y-%m-%d %H:%M:%S")
72
+ except ValueError:
73
+ console.print(f"[red]Неверный формат даты для --since: {since}. Используйте YYYY-MM-DD или YYYY-MM-DD HH:MM:SS[/red]")
74
+ sys.exit(1)
75
+
76
+ until_dt = None
77
+ if until:
78
+ try:
79
+ if len(until) == 10: # YYYY-MM-DD
80
+ until_dt = datetime.strptime(until, "%Y-%m-%d") + timedelta(days=1)
81
+ else: # YYYY-MM-DD HH:MM:SS
82
+ until_dt = datetime.strptime(until, "%Y-%m-%d %H:%M:%S")
83
+ except ValueError:
84
+ console.print(f"[red]Неверный формат даты для --until: {until}. Используйте YYYY-MM-DD или YYYY-MM-DD HH:MM:SS[/red]")
85
+ sys.exit(1)
86
+
87
+ # Чтение лога (поддержка gzip)
29
88
  try:
30
- with open(log_path) as f:
31
- lines = list(f)
89
+ if log_path.endswith('.gz'):
90
+ with gzip.open(log_path, 'rt', encoding='utf-8', errors='ignore') as f:
91
+ lines = list(f)
92
+ else:
93
+ with open(log_path, 'r', encoding='utf-8', errors='ignore') as f:
94
+ lines = list(f)
32
95
  except FileNotFoundError:
33
96
  console.print(f"[red]Файл {log_path} не найден. Проверьте путь к логу.[/red]")
34
- return
97
+ sys.exit(1)
35
98
  except Exception as e:
36
99
  console.print(f"[red]Ошибка при чтении {log_path}: {e}[/red]")
37
- return
100
+ sys.exit(1)
38
101
  status_counter = Counter()
39
102
  path_counter = Counter()
40
103
  ip_counter = Counter()
41
104
  user_agent_counter = Counter()
42
105
  errors = defaultdict(list)
106
+ response_times = []
107
+ log_entries = []
108
+
109
+ # Парсинг nginx формата времени: 01/Jan/2024:00:00:00 +0000
110
+ nginx_time_format = "%d/%b/%Y:%H:%M:%S %z"
111
+
43
112
  for line in lines:
44
113
  m = log_line_re.search(line)
45
114
  if m:
46
- ip = m.group('ip')
47
- path = m.group('path')
48
- status = m.group('status')
49
- status_counter[status] += 1
50
- path_counter[path] += 1
51
- ip_counter[ip] += 1
52
- if status.startswith('4') or status.startswith('5'):
53
- errors[status].append(path)
54
- # user-agent (если есть)
55
- if '" "' in line:
56
- ua = line.rsplit('" "', 1)[-1].strip().strip('"')
57
- if ua:
58
- user_agent_counter[ua] += 1
115
+ try:
116
+ # Парсинг времени
117
+ time_str = m.group('time')
118
+ log_time = datetime.strptime(time_str, nginx_time_format)
119
+
120
+ # Убираем timezone для сравнения (приводим к naive datetime)
121
+ if log_time.tzinfo:
122
+ log_time = log_time.replace(tzinfo=None)
123
+
124
+ # Фильтрация по времени
125
+ if since_dt and log_time < since_dt:
126
+ continue
127
+ if until_dt and log_time > until_dt:
128
+ continue
129
+
130
+ ip = m.group('ip')
131
+ path = m.group('path')
132
+ status = m.group('status')
133
+ method = m.group('method')
134
+ user_agent = m.group('user_agent') or ''
135
+ response_time_str = m.group('response_time')
136
+
137
+ # Фильтрация по статусам
138
+ if status_filter and status not in status_filter:
139
+ continue
140
+
141
+ # Сбор данных
142
+ entry = {
143
+ 'time': log_time,
144
+ 'ip': ip,
145
+ 'path': path,
146
+ 'status': status,
147
+ 'method': method,
148
+ 'user_agent': user_agent,
149
+ 'response_time': float(response_time_str) if response_time_str else None
150
+ }
151
+ log_entries.append(entry)
152
+
153
+ status_counter[status] += 1
154
+ path_counter[path] += 1
155
+ ip_counter[ip] += 1
156
+
157
+ if user_agent:
158
+ user_agent_counter[user_agent] += 1
159
+
160
+ if status.startswith('4') or status.startswith('5'):
161
+ errors[status].append(path)
162
+
163
+ if response_time_str:
164
+ try:
165
+ response_times.append(float(response_time_str))
166
+ except ValueError:
167
+ pass
168
+ except (ValueError, AttributeError) as e:
169
+ # Пропускаем строки с неверным форматом
170
+ continue
171
+
172
+ # Проверка на пустые результаты
173
+ if not log_entries:
174
+ if json or yaml or csv:
175
+ empty_data = {
176
+ "timestamp": __import__('datetime').datetime.now().isoformat(),
177
+ "summary": {"total_requests": 0},
178
+ "message": "Нет записей, соответствующих фильтрам"
179
+ }
180
+ if csv:
181
+ print("Category,Type,Value,Count\nNo Data,,,,No entries match filters")
182
+ else:
183
+ format_type = 'json' if json else 'yaml'
184
+ print_export(empty_data, format_type)
185
+ else:
186
+ console.print("[yellow]Нет записей, соответствующих указанным фильтрам.[/yellow]")
187
+ return
188
+
189
+ # Анализ времени ответа
190
+ response_time_stats = {}
191
+ if response_times:
192
+ response_time_stats = {
193
+ "min": min(response_times),
194
+ "max": max(response_times),
195
+ "avg": sum(response_times) / len(response_times),
196
+ "median": sorted(response_times)[len(response_times) // 2],
197
+ "p95": sorted(response_times)[int(len(response_times) * 0.95)] if response_times else 0,
198
+ "p99": sorted(response_times)[int(len(response_times) * 0.99)] if response_times else 0,
199
+ "total_requests_with_time": len(response_times)
200
+ }
201
+
202
+ # Обнаружение аномалий
203
+ anomalies = []
204
+ if detect_anomalies:
205
+ # Аномалия 1: Резкий скачок ошибок
206
+ if len(log_entries) > 100:
207
+ # Разбиваем на временные окна
208
+ window_size = max(100, len(log_entries) // 10)
209
+ error_rates = []
210
+ for i in range(0, len(log_entries), window_size):
211
+ window = log_entries[i:i+window_size]
212
+ error_count = sum(1 for e in window if e['status'].startswith('4') or e['status'].startswith('5'))
213
+ error_rates.append(error_count / len(window) if window else 0)
214
+
215
+ if len(error_rates) > 1:
216
+ avg_rate = sum(error_rates) / len(error_rates)
217
+ for i, rate in enumerate(error_rates):
218
+ if rate > avg_rate * 2: # Удвоение ошибок
219
+ anomalies.append({
220
+ "type": "error_spike",
221
+ "description": f"Резкий скачок ошибок в окне {i+1}: {rate*100:.1f}% (среднее: {avg_rate*100:.1f}%)",
222
+ "severity": "high"
223
+ })
224
+
225
+ # Аномалия 2: Медленные запросы
226
+ if response_times:
227
+ slow_threshold = response_time_stats.get("p95", 1.0) * 2
228
+ slow_requests = [e for e in log_entries if e.get('response_time') and e['response_time'] > slow_threshold]
229
+ if slow_requests:
230
+ anomalies.append({
231
+ "type": "slow_requests",
232
+ "description": f"Найдено {len(slow_requests)} медленных запросов (> {slow_threshold:.2f}s)",
233
+ "severity": "medium"
234
+ })
235
+
236
+ # Аномалия 3: Необычные паттерны IP
237
+ if len(log_entries) > 50:
238
+ ip_counts = Counter(e['ip'] for e in log_entries)
239
+ avg_ip_requests = len(log_entries) / len(ip_counts) if ip_counts else 0
240
+ suspicious_ips = [ip for ip, count in ip_counts.items() if count > avg_ip_requests * 5]
241
+ if suspicious_ips:
242
+ anomalies.append({
243
+ "type": "suspicious_ips",
244
+ "description": f"Подозрительная активность с IP: {', '.join(suspicious_ips[:5])}",
245
+ "severity": "medium"
246
+ })
247
+
248
+ # Аномалия 4: Необычные пути
249
+ if len(log_entries) > 50:
250
+ path_counts = Counter(e['path'] for e in log_entries)
251
+ avg_path_requests = len(log_entries) / len(path_counts) if path_counts else 0
252
+ unusual_paths = [path for path, count in path_counts.items() if count > avg_path_requests * 10]
253
+ if unusual_paths:
254
+ anomalies.append({
255
+ "type": "unusual_paths",
256
+ "description": f"Необычно много запросов к путям: {', '.join(unusual_paths[:3])}",
257
+ "severity": "low"
258
+ })
259
+
260
+ # Экспорт в CSV
261
+ if csv:
262
+ csv_output = export_logs_to_csv(
263
+ status_counter, path_counter, ip_counter, user_agent_counter,
264
+ errors, response_time_stats, anomalies
265
+ )
266
+ print(csv_output)
267
+ return
268
+
269
+ # Экспорт в JSON/YAML
270
+ if json or yaml:
271
+ export_data = format_logs_results(
272
+ status_counter, path_counter, ip_counter, user_agent_counter, errors, top,
273
+ response_time_stats if response_time_stats else None,
274
+ anomalies if anomalies else None
275
+ )
276
+ format_type = 'json' if json else 'yaml'
277
+ print_export(export_data, format_type)
278
+ return
279
+
280
+ # Показываем статистику по времени ответа
281
+ if response_time_stats:
282
+ table = Table(title="Response Time Statistics", show_header=True, header_style="bold green")
283
+ table.add_column("Metric")
284
+ table.add_column("Value")
285
+ for metric, value in response_time_stats.items():
286
+ if metric != "total_requests_with_time":
287
+ table.add_row(metric.replace("_", " ").title(), f"{value:.3f}s")
288
+ else:
289
+ table.add_row(metric.replace("_", " ").title(), str(int(value)))
290
+ console.print(table)
291
+
292
+ # Показываем аномалии
293
+ if anomalies:
294
+ table = Table(title="Detected Anomalies", show_header=True, header_style="bold red")
295
+ table.add_column("Type")
296
+ table.add_column("Description")
297
+ table.add_column("Severity")
298
+ for anomaly in anomalies:
299
+ severity_color = {"high": "red", "medium": "orange3", "low": "yellow"}.get(anomaly.get("severity", "low"), "white")
300
+ table.add_row(
301
+ anomaly.get("type", ""),
302
+ anomaly.get("description", ""),
303
+ f"[{severity_color}]{anomaly.get('severity', '')}[/{severity_color}]"
304
+ )
305
+ console.print(table)
306
+
59
307
  # Топ статусов
60
308
  table = Table(title="Top HTTP Status Codes", show_header=True, header_style="bold blue")
61
309
  table.add_column("Status")