iflow-mcp-m507_ai-soc-agent 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. iflow_mcp_m507_ai_soc_agent-1.0.0.dist-info/METADATA +410 -0
  2. iflow_mcp_m507_ai_soc_agent-1.0.0.dist-info/RECORD +85 -0
  3. iflow_mcp_m507_ai_soc_agent-1.0.0.dist-info/WHEEL +5 -0
  4. iflow_mcp_m507_ai_soc_agent-1.0.0.dist-info/entry_points.txt +2 -0
  5. iflow_mcp_m507_ai_soc_agent-1.0.0.dist-info/licenses/LICENSE +21 -0
  6. iflow_mcp_m507_ai_soc_agent-1.0.0.dist-info/top_level.txt +1 -0
  7. src/__init__.py +8 -0
  8. src/ai_controller/README.md +139 -0
  9. src/ai_controller/__init__.py +12 -0
  10. src/ai_controller/agent_executor.py +596 -0
  11. src/ai_controller/cli/__init__.py +2 -0
  12. src/ai_controller/cli/main.py +243 -0
  13. src/ai_controller/session_manager.py +409 -0
  14. src/ai_controller/web/__init__.py +2 -0
  15. src/ai_controller/web/server.py +1181 -0
  16. src/ai_controller/web/static/css/README.md +102 -0
  17. src/api/__init__.py +13 -0
  18. src/api/case_management.py +271 -0
  19. src/api/edr.py +187 -0
  20. src/api/kb.py +136 -0
  21. src/api/siem.py +308 -0
  22. src/core/__init__.py +10 -0
  23. src/core/config.py +242 -0
  24. src/core/config_storage.py +684 -0
  25. src/core/dto.py +50 -0
  26. src/core/errors.py +36 -0
  27. src/core/logging.py +128 -0
  28. src/integrations/__init__.py +8 -0
  29. src/integrations/case_management/__init__.py +5 -0
  30. src/integrations/case_management/iris/__init__.py +11 -0
  31. src/integrations/case_management/iris/iris_client.py +885 -0
  32. src/integrations/case_management/iris/iris_http.py +274 -0
  33. src/integrations/case_management/iris/iris_mapper.py +263 -0
  34. src/integrations/case_management/iris/iris_models.py +128 -0
  35. src/integrations/case_management/thehive/__init__.py +8 -0
  36. src/integrations/case_management/thehive/thehive_client.py +193 -0
  37. src/integrations/case_management/thehive/thehive_http.py +147 -0
  38. src/integrations/case_management/thehive/thehive_mapper.py +190 -0
  39. src/integrations/case_management/thehive/thehive_models.py +125 -0
  40. src/integrations/cti/__init__.py +6 -0
  41. src/integrations/cti/local_tip/__init__.py +10 -0
  42. src/integrations/cti/local_tip/local_tip_client.py +90 -0
  43. src/integrations/cti/local_tip/local_tip_http.py +110 -0
  44. src/integrations/cti/opencti/__init__.py +10 -0
  45. src/integrations/cti/opencti/opencti_client.py +101 -0
  46. src/integrations/cti/opencti/opencti_http.py +418 -0
  47. src/integrations/edr/__init__.py +6 -0
  48. src/integrations/edr/elastic_defend/__init__.py +6 -0
  49. src/integrations/edr/elastic_defend/elastic_defend_client.py +351 -0
  50. src/integrations/edr/elastic_defend/elastic_defend_http.py +162 -0
  51. src/integrations/eng/__init__.py +10 -0
  52. src/integrations/eng/clickup/__init__.py +8 -0
  53. src/integrations/eng/clickup/clickup_client.py +513 -0
  54. src/integrations/eng/clickup/clickup_http.py +156 -0
  55. src/integrations/eng/github/__init__.py +8 -0
  56. src/integrations/eng/github/github_client.py +169 -0
  57. src/integrations/eng/github/github_http.py +158 -0
  58. src/integrations/eng/trello/__init__.py +8 -0
  59. src/integrations/eng/trello/trello_client.py +207 -0
  60. src/integrations/eng/trello/trello_http.py +162 -0
  61. src/integrations/kb/__init__.py +12 -0
  62. src/integrations/kb/fs_kb_client.py +313 -0
  63. src/integrations/siem/__init__.py +6 -0
  64. src/integrations/siem/elastic/__init__.py +6 -0
  65. src/integrations/siem/elastic/elastic_client.py +3319 -0
  66. src/integrations/siem/elastic/elastic_http.py +165 -0
  67. src/mcp/README.md +183 -0
  68. src/mcp/TOOLS.md +2827 -0
  69. src/mcp/__init__.py +13 -0
  70. src/mcp/__main__.py +18 -0
  71. src/mcp/agent_profiles.py +408 -0
  72. src/mcp/flow_agent_profiles.py +424 -0
  73. src/mcp/mcp_server.py +4086 -0
  74. src/mcp/rules_engine.py +487 -0
  75. src/mcp/runbook_manager.py +264 -0
  76. src/orchestrator/__init__.py +11 -0
  77. src/orchestrator/incident_workflow.py +244 -0
  78. src/orchestrator/tools_case.py +1085 -0
  79. src/orchestrator/tools_cti.py +359 -0
  80. src/orchestrator/tools_edr.py +315 -0
  81. src/orchestrator/tools_eng.py +378 -0
  82. src/orchestrator/tools_kb.py +156 -0
  83. src/orchestrator/tools_siem.py +1709 -0
  84. src/web/__init__.py +8 -0
  85. src/web/config_server.py +511 -0
@@ -0,0 +1,3319 @@
1
+ """
2
+ Elasticsearch/Elastic SIEM implementation of the generic ``SIEMClient`` interface.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ from datetime import datetime
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from ....api.siem import (
12
+ FileBehaviorSummary,
13
+ FileReport,
14
+ IpAddressReport,
15
+ QueryResult,
16
+ RelatedEntities,
17
+ SIEMClient,
18
+ SiemEvent,
19
+ Severity,
20
+ SourceType,
21
+ )
22
+ from ....core.config import SamiConfig
23
+ from ....core.errors import IntegrationError
24
+ from ....core.logging import get_logger
25
+ from .elastic_http import ElasticHttpClient
26
+
27
+
28
+ logger = get_logger("sami.integrations.elastic.client")
29
+
30
+
31
+ class ElasticSIEMClient:
32
+ """
33
+ SIEM client backed by Elasticsearch/Elastic SIEM.
34
+
35
+ This implementation uses Elasticsearch query DSL for searching security events.
36
+ """
37
+
38
+ def __init__(self, http_client: ElasticHttpClient) -> None:
39
+ self._http = http_client
40
+
41
+ @classmethod
42
+ def from_config(cls, config: SamiConfig) -> "ElasticSIEMClient":
43
+ """
44
+ Factory to construct a client from ``SamiConfig``.
45
+ """
46
+ if not config.elastic:
47
+ raise IntegrationError("Elastic configuration is not set in SamiConfig")
48
+
49
+ http_client = ElasticHttpClient(
50
+ base_url=config.elastic.base_url,
51
+ api_key=config.elastic.api_key,
52
+ username=config.elastic.username,
53
+ password=config.elastic.password,
54
+ timeout_seconds=config.elastic.timeout_seconds,
55
+ verify_ssl=config.elastic.verify_ssl,
56
+ )
57
+ return cls(http_client=http_client)
58
+
59
+ def search_security_events(
60
+ self,
61
+ query: str,
62
+ limit: int = 100,
63
+ ) -> QueryResult:
64
+ """
65
+ Search security events/logs using Elasticsearch query DSL.
66
+
67
+ The query can be:
68
+ - A simple text query (will be wrapped in a match query)
69
+ - Full Elasticsearch query DSL JSON
70
+ """
71
+ try:
72
+ # If query looks like JSON, parse it as Elasticsearch DSL
73
+ import json
74
+ try:
75
+ query_dict = json.loads(query)
76
+ if isinstance(query_dict, dict) and "query" in query_dict:
77
+ es_query = query_dict
78
+ else:
79
+ # Wrap in query DSL
80
+ es_query = {"query": query_dict}
81
+ except (json.JSONDecodeError, ValueError):
82
+ # Simple text query - use match query
83
+ es_query = {
84
+ "query": {
85
+ "query_string": {
86
+ "query": query
87
+ }
88
+ },
89
+ "size": limit
90
+ }
91
+
92
+ # Search across common security indices with fallback
93
+ indices_patterns = [
94
+ "logs-*,security-*,winlogbeat-*,filebeat-*",
95
+ "_all", # Fallback to all indices if specific patterns fail
96
+ ]
97
+ response = self._search_with_fallback(indices_patterns, es_query)
98
+
99
+ # Parse Elasticsearch response
100
+ hits = response.get("hits", {}).get("hits", [])
101
+ total = response.get("hits", {}).get("total", {})
102
+ if isinstance(total, dict):
103
+ total_count = total.get("value", len(hits))
104
+ else:
105
+ total_count = total
106
+
107
+ events = []
108
+ for hit in hits[:limit]:
109
+ source = hit.get("_source", {})
110
+ timestamp_str = source.get("@timestamp") or source.get("timestamp")
111
+ timestamp = None
112
+ if timestamp_str:
113
+ try:
114
+ timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
115
+ except Exception:
116
+ pass
117
+ if not timestamp:
118
+ timestamp = datetime.utcnow()
119
+
120
+ # Determine source type from index or event fields
121
+ source_type = SourceType.OTHER
122
+ index = hit.get("_index", "")
123
+ if "winlogbeat" in index or "windows" in index.lower():
124
+ source_type = SourceType.ENDPOINT
125
+ elif "network" in index.lower() or "firewall" in index.lower():
126
+ source_type = SourceType.NETWORK
127
+ elif "auth" in index.lower() or "login" in index.lower():
128
+ source_type = SourceType.AUTH
129
+ elif "cloud" in index.lower():
130
+ source_type = SourceType.CLOUD
131
+
132
+ event = SiemEvent(
133
+ id=hit.get("_id", ""),
134
+ timestamp=timestamp,
135
+ source_type=source_type,
136
+ message=source.get("message", source.get("event", {}).get("original", "")),
137
+ host=source.get("host", {}).get("name") if isinstance(source.get("host"), dict) else source.get("host"),
138
+ username=source.get("user", {}).get("name") if isinstance(source.get("user"), dict) else source.get("user"),
139
+ ip=source.get("source", {}).get("ip") if isinstance(source.get("source"), dict) else source.get("source.ip"),
140
+ process_name=source.get("process", {}).get("name") if isinstance(source.get("process"), dict) else source.get("process.name"),
141
+ file_hash=source.get("file", {}).get("hash", {}).get("sha256") if isinstance(source.get("file"), dict) else source.get("file.hash.sha256"),
142
+ raw=source,
143
+ )
144
+ events.append(event)
145
+
146
+ return QueryResult(
147
+ query=query,
148
+ events=events,
149
+ total_count=total_count,
150
+ )
151
+ except Exception as e:
152
+ logger.exception(f"Error searching Elasticsearch: {e}")
153
+ raise IntegrationError(f"Failed to search security events: {e}") from e
154
+
155
+ def get_siem_event_by_id(
156
+ self,
157
+ event_id: str,
158
+ ) -> SiemEvent:
159
+ """
160
+ Retrieve a specific security event by its ID.
161
+
162
+ Args:
163
+ event_id: The unique identifier of the event to retrieve.
164
+
165
+ Returns:
166
+ SiemEvent containing the event details.
167
+
168
+ Raises:
169
+ IntegrationError: If the event is not found or retrieval fails.
170
+ """
171
+ try:
172
+ # Search for the event by _id across all security indices
173
+ # Elasticsearch uses 'ids' query for searching by document IDs
174
+ query = {
175
+ "query": {
176
+ "ids": {
177
+ "values": [event_id]
178
+ }
179
+ },
180
+ "size": 1
181
+ }
182
+
183
+ # Search across common security indices
184
+ # Search with fallback index patterns
185
+ indices_patterns = [
186
+ "logs-*,security-*,winlogbeat-*,filebeat-*",
187
+ "_all", # Fallback to all indices if specific patterns fail
188
+ ]
189
+ response = self._search_with_fallback(indices_patterns, query)
190
+
191
+ hits = response.get("hits", {}).get("hits", [])
192
+
193
+ if not hits:
194
+ raise IntegrationError(f"Event with ID {event_id} not found")
195
+
196
+ # Parse the first (and should be only) hit
197
+ hit = hits[0]
198
+ source = hit.get("_source", {})
199
+ timestamp_str = source.get("@timestamp") or source.get("timestamp")
200
+ timestamp = None
201
+ if timestamp_str:
202
+ try:
203
+ timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
204
+ except Exception:
205
+ pass
206
+ if not timestamp:
207
+ timestamp = datetime.utcnow()
208
+
209
+ # Determine source type from index or event fields
210
+ source_type = SourceType.OTHER
211
+ index = hit.get("_index", "")
212
+ if "winlogbeat" in index or "windows" in index.lower():
213
+ source_type = SourceType.ENDPOINT
214
+ elif "network" in index.lower() or "firewall" in index.lower():
215
+ source_type = SourceType.NETWORK
216
+ elif "auth" in index.lower() or "login" in index.lower():
217
+ source_type = SourceType.AUTH
218
+ elif "cloud" in index.lower():
219
+ source_type = SourceType.CLOUD
220
+
221
+ event = SiemEvent(
222
+ id=hit.get("_id", event_id),
223
+ timestamp=timestamp,
224
+ source_type=source_type,
225
+ message=source.get("message", source.get("event", {}).get("original", "")),
226
+ host=source.get("host", {}).get("name") if isinstance(source.get("host"), dict) else source.get("host"),
227
+ username=source.get("user", {}).get("name") if isinstance(source.get("user"), dict) else source.get("user"),
228
+ ip=source.get("source", {}).get("ip") if isinstance(source.get("source"), dict) else source.get("source.ip"),
229
+ process_name=source.get("process", {}).get("name") if isinstance(source.get("process"), dict) else source.get("process.name"),
230
+ file_hash=source.get("file", {}).get("hash", {}).get("sha256") if isinstance(source.get("file"), dict) else source.get("file.hash.sha256"),
231
+ raw=source,
232
+ )
233
+
234
+ return event
235
+ except IntegrationError:
236
+ raise
237
+ except Exception as e:
238
+ logger.exception(f"Error retrieving event by ID from Elasticsearch: {e}")
239
+ raise IntegrationError(f"Failed to get event by ID {event_id}: {e}") from e
240
+
241
+ def _get_events_by_ids(self, event_ids: List[str]) -> List[Dict[str, Any]]:
242
+ """
243
+ Retrieve multiple security events by their IDs from Elasticsearch.
244
+
245
+ This is used to fetch the ancestor events that triggered an alert.
246
+
247
+ Args:
248
+ event_ids: List of event IDs to retrieve
249
+
250
+ Returns:
251
+ List of event dictionaries with normalized structure
252
+ """
253
+ if not event_ids:
254
+ return []
255
+
256
+ try:
257
+ # Search for events by IDs using ids query
258
+ query = {
259
+ "query": {
260
+ "ids": {
261
+ "values": event_ids
262
+ }
263
+ },
264
+ "size": len(event_ids)
265
+ }
266
+
267
+ # Search across common security indices
268
+ indices_patterns = [
269
+ "logs-*,security-*,winlogbeat-*,filebeat-*,logs-endpoint.*",
270
+ "_all", # Fallback to all indices if specific patterns fail
271
+ ]
272
+ response = self._search_with_fallback(indices_patterns, query)
273
+
274
+ hits = response.get("hits", {}).get("hits", [])
275
+ events = []
276
+
277
+ for hit in hits:
278
+ source = hit.get("_source", {})
279
+ event_id = hit.get("_id", "")
280
+
281
+ # Parse timestamp
282
+ timestamp_str = source.get("@timestamp") or source.get("timestamp")
283
+ timestamp = None
284
+ if timestamp_str:
285
+ try:
286
+ timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
287
+ except Exception:
288
+ pass
289
+ if not timestamp:
290
+ timestamp = datetime.utcnow()
291
+
292
+ # Determine source type from index or event fields
293
+ source_type = "other"
294
+ index = hit.get("_index", "")
295
+ if "winlogbeat" in index or "windows" in index.lower():
296
+ source_type = "endpoint"
297
+ elif "network" in index.lower() or "firewall" in index.lower():
298
+ source_type = "network"
299
+ elif "auth" in index.lower() or "login" in index.lower():
300
+ source_type = "auth"
301
+ elif "cloud" in index.lower():
302
+ source_type = "cloud"
303
+
304
+ # Extract common fields
305
+ # Handle nested structures (e.g., host.name vs host: {name: ...})
306
+ host = None
307
+ if isinstance(source.get("host"), dict):
308
+ host = source.get("host", {}).get("name")
309
+ else:
310
+ host = source.get("host.name") or source.get("host")
311
+
312
+ username = None
313
+ if isinstance(source.get("user"), dict):
314
+ username = source.get("user", {}).get("name")
315
+ else:
316
+ username = source.get("user.name") or source.get("user")
317
+
318
+ ip = None
319
+ if isinstance(source.get("source"), dict):
320
+ ip = source.get("source", {}).get("ip")
321
+ else:
322
+ ip = source.get("source.ip") or source.get("source")
323
+
324
+ process_name = None
325
+ if isinstance(source.get("process"), dict):
326
+ process_name = source.get("process", {}).get("name")
327
+ else:
328
+ process_name = source.get("process.name") or source.get("process")
329
+
330
+ file_hash = None
331
+ file_obj = source.get("file", {})
332
+ if isinstance(file_obj, dict):
333
+ hash_obj = file_obj.get("hash", {})
334
+ if isinstance(hash_obj, dict):
335
+ file_hash = hash_obj.get("sha256")
336
+ if not file_hash:
337
+ file_hash = source.get("file.hash.sha256")
338
+
339
+ # Get message
340
+ message = source.get("message") or source.get("event", {}).get("original", "")
341
+ if isinstance(source.get("event"), dict):
342
+ message = message or source.get("event", {}).get("original", "")
343
+
344
+ # Build normalized event dictionary
345
+ event = {
346
+ "id": event_id,
347
+ "timestamp": timestamp.isoformat() if timestamp else None,
348
+ "source_type": source_type,
349
+ "message": message,
350
+ "host": host,
351
+ "username": username,
352
+ "ip": ip,
353
+ "process_name": process_name,
354
+ "file_hash": file_hash,
355
+ "raw": source, # Include full raw source for detailed analysis
356
+ }
357
+
358
+ events.append(event)
359
+
360
+ logger.debug(f"Retrieved {len(events)} events from {len(event_ids)} requested IDs")
361
+ return events
362
+
363
+ except Exception as e:
364
+ logger.exception(f"Error retrieving events by IDs from Elasticsearch: {e}")
365
+ # Return empty list rather than failing - ancestor events are supplementary
366
+ return []
367
+
368
+ def get_file_report(self, file_hash: str) -> FileReport:
369
+ """Get a report about a file by hash."""
370
+ try:
371
+ # Search for events containing this file hash
372
+ query = {
373
+ "query": {
374
+ "bool": {
375
+ "should": [
376
+ {"match": {"file.hash.sha256": file_hash}},
377
+ {"match": {"file.hash.sha1": file_hash}},
378
+ {"match": {"file.hash.md5": file_hash}},
379
+ {"match": {"hash": file_hash}},
380
+ ]
381
+ }
382
+ },
383
+ "size": 100,
384
+ "sort": [{"@timestamp": {"order": "asc"}}]
385
+ }
386
+
387
+ # Search with fallback index patterns
388
+ indices_patterns = [
389
+ "logs-*,security-*,winlogbeat-*,filebeat-*",
390
+ "_all", # Fallback to all indices if specific patterns fail
391
+ ]
392
+ response = self._search_with_fallback(indices_patterns, query)
393
+
394
+ hits = response.get("hits", {}).get("hits", [])
395
+
396
+ first_seen = None
397
+ last_seen = None
398
+ affected_hosts = set()
399
+
400
+ for hit in hits:
401
+ source = hit.get("_source", {})
402
+ timestamp_str = source.get("@timestamp") or source.get("timestamp")
403
+ if timestamp_str:
404
+ try:
405
+ ts = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
406
+ if not first_seen or ts < first_seen:
407
+ first_seen = ts
408
+ if not last_seen or ts > last_seen:
409
+ last_seen = ts
410
+ except Exception:
411
+ pass
412
+
413
+ host = source.get("host", {}).get("name") if isinstance(source.get("host"), dict) else source.get("host")
414
+ if host:
415
+ affected_hosts.add(host)
416
+
417
+ return FileReport(
418
+ file_hash=file_hash,
419
+ first_seen=first_seen,
420
+ last_seen=last_seen,
421
+ detection_count=len(hits),
422
+ affected_hosts=list(affected_hosts) if affected_hosts else None,
423
+ raw={"hits": hits[:10]} if hits else None,
424
+ )
425
+ except Exception as e:
426
+ logger.exception(f"Error getting file report from Elasticsearch: {e}")
427
+ raise IntegrationError(f"Failed to get file report: {e}") from e
428
+
429
+ def get_file_behavior_summary(self, file_hash: str) -> FileBehaviorSummary:
430
+ """Get behavior summary for a file."""
431
+ # Use get_file_report and extract behavior information
432
+ report = self.get_file_report(file_hash)
433
+
434
+ # Try to extract process trees and network activity from events
435
+ process_trees = []
436
+ network_activity = []
437
+
438
+ if report.raw and "hits" in report.raw:
439
+ for hit in report.raw["hits"]:
440
+ source = hit.get("_source", {})
441
+ # Extract process information
442
+ process = source.get("process", {})
443
+ if process:
444
+ process_trees.append({
445
+ "name": process.get("name"),
446
+ "pid": process.get("pid"),
447
+ "parent": process.get("parent"),
448
+ "command_line": process.get("command_line"),
449
+ })
450
+
451
+ # Extract network information
452
+ network = source.get("network", {}) or source.get("destination", {})
453
+ if network:
454
+ network_activity.append({
455
+ "ip": network.get("ip") or source.get("destination", {}).get("ip"),
456
+ "port": network.get("port") or source.get("destination", {}).get("port"),
457
+ "protocol": network.get("protocol"),
458
+ })
459
+
460
+ return FileBehaviorSummary(
461
+ file_hash=file_hash,
462
+ process_trees=process_trees[:20] if process_trees else None,
463
+ network_activity=network_activity[:20] if network_activity else None,
464
+ persistence_mechanisms=None, # Would need specific queries for this
465
+ notes=f"Found {report.detection_count} events related to this file",
466
+ )
467
+
468
+ def get_entities_related_to_file(self, file_hash: str) -> RelatedEntities:
469
+ """Get entities (hosts, users, processes, alerts) related to a file."""
470
+ report = self.get_file_report(file_hash)
471
+
472
+ hosts = set()
473
+ users = set()
474
+ processes = set()
475
+ alerts = []
476
+
477
+ if report.raw and "hits" in report.raw:
478
+ for hit in report.raw["hits"]:
479
+ source = hit.get("_source", {})
480
+
481
+ host = source.get("host", {}).get("name") if isinstance(source.get("host"), dict) else source.get("host")
482
+ if host:
483
+ hosts.add(host)
484
+
485
+ user = source.get("user", {}).get("name") if isinstance(source.get("user"), dict) else source.get("user")
486
+ if user:
487
+ users.add(user)
488
+
489
+ process = source.get("process", {}).get("name") if isinstance(source.get("process"), dict) else source.get("process.name")
490
+ if process:
491
+ processes.add(process)
492
+
493
+ # Check if this is an alert
494
+ if source.get("event", {}).get("kind") == "alert" or "alert" in source.get("tags", []):
495
+ alerts.append(hit.get("_id", ""))
496
+
497
+ return RelatedEntities(
498
+ indicator=file_hash,
499
+ hosts=list(hosts) if hosts else None,
500
+ users=list(users) if users else None,
501
+ processes=list(processes) if processes else None,
502
+ alerts=alerts if alerts else None,
503
+ )
504
+
505
+ def get_ip_address_report(self, ip: str) -> IpAddressReport:
506
+ """Get a report about an IP address."""
507
+ try:
508
+ # Search for events containing this IP
509
+ query = {
510
+ "query": {
511
+ "bool": {
512
+ "should": [
513
+ {"match": {"source.ip": ip}},
514
+ {"match": {"destination.ip": ip}},
515
+ {"match": {"client.ip": ip}},
516
+ {"match": {"server.ip": ip}},
517
+ {"match": {"ip": ip}},
518
+ ]
519
+ }
520
+ },
521
+ "size": 50,
522
+ "sort": [{"@timestamp": {"order": "desc"}}]
523
+ }
524
+
525
+ # Search with fallback index patterns
526
+ indices_patterns = [
527
+ "logs-*,security-*,winlogbeat-*,filebeat-*",
528
+ "_all", # Fallback to all indices if specific patterns fail
529
+ ]
530
+ response = self._search_with_fallback(indices_patterns, query)
531
+
532
+ hits = response.get("hits", {}).get("hits", [])
533
+ alerts = []
534
+
535
+ for hit in hits:
536
+ source = hit.get("_source", {})
537
+ if source.get("event", {}).get("kind") == "alert" or "alert" in source.get("tags", []):
538
+ alerts.append(hit.get("_id", ""))
539
+
540
+ return IpAddressReport(
541
+ ip=ip,
542
+ reputation=None, # Would need threat intelligence integration
543
+ geo=None, # Would need GeoIP lookup
544
+ related_alerts=alerts if alerts else None,
545
+ raw={"hits": hits[:10]} if hits else None,
546
+ )
547
+ except Exception as e:
548
+ logger.exception(f"Error getting IP report from Elasticsearch: {e}")
549
+ raise IntegrationError(f"Failed to get IP address report: {e}") from e
550
+
551
+ def search_user_activity(
552
+ self,
553
+ username: str,
554
+ limit: int = 100,
555
+ ) -> QueryResult:
556
+ """Search for user activity."""
557
+ query = {
558
+ "query": {
559
+ "bool": {
560
+ "should": [
561
+ {"match": {"user.name": username}},
562
+ {"match": {"user": username}},
563
+ {"match": {"username": username}},
564
+ ]
565
+ }
566
+ },
567
+ "size": limit,
568
+ "sort": [{"@timestamp": {"order": "desc"}}]
569
+ }
570
+
571
+ return self.search_security_events(json.dumps(query), limit=limit)
572
+
573
+ def pivot_on_indicator(
574
+ self,
575
+ indicator: str,
576
+ limit: int = 200,
577
+ ) -> QueryResult:
578
+ """
579
+ Given an IOC (hash, IP, domain, etc.), search for related events.
580
+ """
581
+ # Try to detect indicator type and search accordingly
582
+ import re
583
+
584
+ # IP address pattern
585
+ ip_pattern = r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$'
586
+ # Hash patterns (MD5, SHA1, SHA256)
587
+ hash_pattern = r'^[a-fA-F0-9]{32,64}$'
588
+ # Domain pattern
589
+ domain_pattern = r'^[a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?)*$'
590
+
591
+ if re.match(ip_pattern, indicator):
592
+ # IP address
593
+ query = {
594
+ "query": {
595
+ "bool": {
596
+ "should": [
597
+ {"match": {"source.ip": indicator}},
598
+ {"match": {"destination.ip": indicator}},
599
+ {"match": {"client.ip": indicator}},
600
+ {"match": {"server.ip": indicator}},
601
+ ]
602
+ }
603
+ },
604
+ "size": limit
605
+ }
606
+ elif re.match(hash_pattern, indicator):
607
+ # File hash
608
+ query = {
609
+ "query": {
610
+ "bool": {
611
+ "should": [
612
+ {"match": {"file.hash.sha256": indicator}},
613
+ {"match": {"file.hash.sha1": indicator}},
614
+ {"match": {"file.hash.md5": indicator}},
615
+ {"match": {"hash": indicator}},
616
+ ]
617
+ }
618
+ },
619
+ "size": limit
620
+ }
621
+ elif re.match(domain_pattern, indicator) and '.' in indicator:
622
+ # Domain
623
+ query = {
624
+ "query": {
625
+ "bool": {
626
+ "should": [
627
+ {"match": {"dns.question.name": indicator}},
628
+ {"match": {"url.domain": indicator}},
629
+ {"match": {"domain": indicator}},
630
+ ]
631
+ }
632
+ },
633
+ "size": limit
634
+ }
635
+ else:
636
+ # Generic search
637
+ query = {
638
+ "query": {
639
+ "query_string": {
640
+ "query": indicator
641
+ }
642
+ },
643
+ "size": limit
644
+ }
645
+
646
+ return self.search_security_events(json.dumps(query), limit=limit)
647
+
648
+ def search_kql_query(
649
+ self,
650
+ kql_query: str,
651
+ limit: int = 500,
652
+ hours_back: Optional[int] = None,
653
+ ) -> QueryResult:
654
+ """
655
+ Execute a KQL (Kusto Query Language) or advanced query for deeper investigations.
656
+
657
+ For Elasticsearch, this method accepts:
658
+ - Full Elasticsearch Query DSL (JSON)
659
+ - KQL-like queries that are converted to Elasticsearch DSL
660
+ - Advanced aggregations and time-based analysis
661
+
662
+ Args:
663
+ kql_query: KQL query string or Elasticsearch Query DSL
664
+ limit: Maximum number of events to return (default: 500)
665
+ hours_back: Optional time window in hours to limit the search
666
+ """
667
+ try:
668
+ # Try to parse as JSON first (Elasticsearch Query DSL)
669
+ try:
670
+ query_dict = json.loads(kql_query)
671
+ if isinstance(query_dict, dict):
672
+ es_query = query_dict
673
+ # Ensure size is set
674
+ if "size" not in es_query:
675
+ es_query["size"] = limit
676
+ else:
677
+ # Not a dict, treat as KQL
678
+ es_query = self._kql_to_elasticsearch(kql_query, limit=limit, hours_back=hours_back)
679
+ except (json.JSONDecodeError, ValueError):
680
+ # Parse as KQL-like query and convert to Elasticsearch DSL
681
+ es_query = self._kql_to_elasticsearch(kql_query, limit=limit, hours_back=hours_back)
682
+
683
+ # Add time range filter if specified
684
+ if hours_back:
685
+ time_filter = {
686
+ "range": {
687
+ "@timestamp": {
688
+ "gte": f"now-{hours_back}h"
689
+ }
690
+ }
691
+ }
692
+ if "query" in es_query:
693
+ if "bool" in es_query["query"]:
694
+ if "must" not in es_query["query"]["bool"]:
695
+ es_query["query"]["bool"]["must"] = []
696
+ es_query["query"]["bool"]["must"].append(time_filter)
697
+ else:
698
+ # Wrap existing query in bool
699
+ es_query["query"] = {
700
+ "bool": {
701
+ "must": [
702
+ es_query["query"],
703
+ time_filter
704
+ ]
705
+ }
706
+ }
707
+ else:
708
+ es_query["query"] = time_filter
709
+
710
+ # Search across all security indices with fallback
711
+ indices_patterns = [
712
+ "logs-*,security-*,winlogbeat-*,filebeat-*,alerts-*,.siem-signals-*",
713
+ "_all", # Fallback to all indices if specific patterns fail
714
+ ]
715
+ response = self._search_with_fallback(indices_patterns, es_query)
716
+
717
+ # Parse Elasticsearch response
718
+ hits = response.get("hits", {}).get("hits", [])
719
+ total = response.get("hits", {}).get("total", {})
720
+ if isinstance(total, dict):
721
+ total_count = total.get("value", len(hits))
722
+ else:
723
+ total_count = total
724
+
725
+ events = []
726
+ for hit in hits[:limit]:
727
+ source = hit.get("_source", {})
728
+ timestamp_str = source.get("@timestamp") or source.get("timestamp")
729
+ timestamp = None
730
+ if timestamp_str:
731
+ try:
732
+ timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
733
+ except Exception:
734
+ pass
735
+ if not timestamp:
736
+ timestamp = datetime.utcnow()
737
+
738
+ # Determine source type from index or event fields
739
+ source_type = SourceType.OTHER
740
+ index = hit.get("_index", "")
741
+ if "winlogbeat" in index or "windows" in index.lower():
742
+ source_type = SourceType.ENDPOINT
743
+ elif "network" in index.lower() or "firewall" in index.lower():
744
+ source_type = SourceType.NETWORK
745
+ elif "auth" in index.lower() or "login" in index.lower():
746
+ source_type = SourceType.AUTH
747
+ elif "cloud" in index.lower():
748
+ source_type = SourceType.CLOUD
749
+
750
+ event = SiemEvent(
751
+ id=hit.get("_id", ""),
752
+ timestamp=timestamp,
753
+ source_type=source_type,
754
+ message=source.get("message", source.get("event", {}).get("original", "")),
755
+ host=source.get("host", {}).get("name") if isinstance(source.get("host"), dict) else source.get("host"),
756
+ username=source.get("user", {}).get("name") if isinstance(source.get("user"), dict) else source.get("user"),
757
+ ip=source.get("source", {}).get("ip") if isinstance(source.get("source"), dict) else source.get("source.ip"),
758
+ process_name=source.get("process", {}).get("name") if isinstance(source.get("process"), dict) else source.get("process.name"),
759
+ file_hash=source.get("file", {}).get("hash", {}).get("sha256") if isinstance(source.get("file"), dict) else source.get("file.hash.sha256"),
760
+ raw=source,
761
+ )
762
+ events.append(event)
763
+
764
+ return QueryResult(
765
+ query=kql_query,
766
+ events=events,
767
+ total_count=total_count,
768
+ )
769
+ except Exception as e:
770
+ logger.exception(f"Error executing KQL query: {e}")
771
+ raise IntegrationError(f"Failed to execute KQL query: {e}") from e
772
+
773
+ def _kql_to_elasticsearch(self, kql_query: str, limit: int = 500, hours_back: Optional[int] = None) -> Dict[str, Any]:
774
+ """
775
+ Convert a KQL-like query to Elasticsearch Query DSL.
776
+
777
+ Supports basic KQL patterns:
778
+ - Field filters: field == value, field != value
779
+ - Logical operators: and, or, not
780
+ - Comparison operators: ==, !=, >, <, >=, <=
781
+ - Contains: field contains "value"
782
+ - Time ranges: | where timestamp > ago(1h)
783
+ """
784
+ import re
785
+
786
+ # Start with base query structure
787
+ query = {
788
+ "size": limit,
789
+ "sort": [{"@timestamp": {"order": "desc"}}]
790
+ }
791
+
792
+ # Parse KQL query
793
+ # Remove pipe operators and parse filters
794
+ filters = []
795
+
796
+ # Handle time range (e.g., | where timestamp > ago(1h))
797
+ if "ago(" in kql_query.lower():
798
+ ago_match = re.search(r'ago\((\d+)([hdm])\)', kql_query.lower())
799
+ if ago_match:
800
+ value = int(ago_match.group(1))
801
+ unit = ago_match.group(2)
802
+ if unit == "h":
803
+ hours_back = value
804
+ elif unit == "d":
805
+ hours_back = value * 24
806
+ elif unit == "m":
807
+ hours_back = value / 60
808
+
809
+ # Remove time filters from query string for field parsing
810
+ query_str = re.sub(r'\|\s*where\s+timestamp.*', '', kql_query, flags=re.IGNORECASE)
811
+ query_str = re.sub(r'ago\([^)]+\)', '', query_str, flags=re.IGNORECASE)
812
+
813
+ # Parse field filters
814
+ # Pattern: field == value, field != value, field > value, etc.
815
+ field_patterns = [
816
+ (r'(\w+)\s*==\s*"([^"]+)"', "term"),
817
+ (r'(\w+)\s*==\s*(\S+)', "term"),
818
+ (r'(\w+)\s*!=\s*"([^"]+)"', "must_not_term"),
819
+ (r'(\w+)\s*!=\s*(\S+)', "must_not_term"),
820
+ (r'(\w+)\s*contains\s*"([^"]+)"', "match"),
821
+ (r'(\w+)\s*contains\s*(\S+)', "match"),
822
+ ]
823
+
824
+ bool_query = {"bool": {"must": []}}
825
+
826
+ for pattern, query_type in field_patterns:
827
+ matches = re.finditer(pattern, query_str)
828
+ for match in matches:
829
+ field = match.group(1)
830
+ value = match.group(2)
831
+
832
+ # Map common KQL fields to Elasticsearch fields
833
+ field_mapping = {
834
+ "host": "host.name",
835
+ "ip": "source.ip",
836
+ "user": "user.name",
837
+ "username": "user.name",
838
+ "process": "process.name",
839
+ "file": "file.path",
840
+ "hash": "file.hash.sha256",
841
+ "domain": "dns.question.name",
842
+ }
843
+
844
+ es_field = field_mapping.get(field.lower(), field)
845
+
846
+ if query_type == "term":
847
+ bool_query["bool"]["must"].append({"term": {es_field: value}})
848
+ elif query_type == "must_not_term":
849
+ if "must_not" not in bool_query["bool"]:
850
+ bool_query["bool"]["must_not"] = []
851
+ bool_query["bool"]["must_not"].append({"term": {es_field: value}})
852
+ elif query_type == "match":
853
+ bool_query["bool"]["must"].append({"match": {es_field: value}})
854
+
855
+ # If no filters were parsed, use query_string as fallback
856
+ if not bool_query["bool"]["must"] and not bool_query["bool"].get("must_not"):
857
+ bool_query["bool"]["must"].append({
858
+ "query_string": {
859
+ "query": query_str.strip()
860
+ }
861
+ })
862
+
863
+ query["query"] = bool_query
864
+
865
+ return query
866
+
867
+ # Alert Management Methods
868
+
869
+ def get_security_alerts(
870
+ self,
871
+ hours_back: int = 24,
872
+ max_alerts: int = 10,
873
+ status_filter: Optional[str] = None,
874
+ severity: Optional[str] = None,
875
+ hostname: Optional[str] = None,
876
+ ) -> List[Dict[str, Any]]:
877
+ """
878
+ Get security alerts from Elasticsearch.
879
+
880
+ Searches for alerts in security indices, typically in alerts-* or .siem-signals-* indices.
881
+
882
+ **CRITICAL:** Automatically excludes alerts that have already been investigated
883
+ (alerts with signal.ai.verdict field). This prevents SOC1 from re-investigating
884
+ alerts that have already been triaged. The verdict field is only set after an
885
+ alert has been investigated, so its presence indicates the alert should be skipped.
886
+
887
+ Args:
888
+ hours_back: How many hours to look back
889
+ max_alerts: Maximum number of alerts to return
890
+ status_filter: Filter by status
891
+ severity: Filter by severity
892
+ hostname: Optional hostname to filter alerts by (matches host.name field)
893
+ """
894
+ try:
895
+ # Build query for alerts
896
+ query = {
897
+ "query": {
898
+ "bool": {
899
+ "must": [
900
+ {"range": {"@timestamp": {"gte": f"now-{hours_back}h"}}}
901
+ ]
902
+ }
903
+ },
904
+ "size": max_alerts,
905
+ "sort": [{"@timestamp": {"order": "desc"}}]
906
+ }
907
+
908
+ # Add status filter
909
+ if status_filter:
910
+ query["query"]["bool"]["must"].append({"match": {"signal.status": status_filter}})
911
+ else:
912
+ # Default: exclude closed alerts
913
+ query["query"]["bool"]["must_not"] = [{"term": {"signal.status": "closed"}}]
914
+
915
+ # Add severity filter
916
+ if severity:
917
+ query["query"]["bool"]["must"].append({"match": {"signal.severity": severity}})
918
+
919
+ # Add hostname filter
920
+ if hostname:
921
+ query["query"]["bool"]["must"].append({
922
+ "bool": {
923
+ "should": [
924
+ {"match": {"host.name": hostname}},
925
+ {"match": {"hostname": hostname}},
926
+ {"match": {"host": hostname}},
927
+ ]
928
+ }
929
+ })
930
+
931
+ # CRITICAL: Exclude alerts that have already been investigated (have signal.ai.verdict)
932
+ # This prevents SOC1 from re-investigating alerts that have already been triaged
933
+ # The verdict field is only set after an alert has been investigated
934
+ # Ensure must_not array exists (it may have been created by status filter above)
935
+ if "must_not" not in query["query"]["bool"]:
936
+ query["query"]["bool"]["must_not"] = []
937
+ query["query"]["bool"]["must_not"].append({
938
+ "exists": {"field": "signal.ai.verdict"}
939
+ })
940
+
941
+ # Search with fallback index patterns
942
+ indices_patterns = [
943
+ "alerts-*,.siem-signals-*,logs-endpoint.alerts-*",
944
+ "alerts-*",
945
+ "_all", # Fallback to all indices if specific patterns fail
946
+ ]
947
+ response = self._search_with_fallback(indices_patterns, query)
948
+
949
+ hits = response.get("hits", {}).get("hits", [])
950
+ alerts = []
951
+
952
+ for hit in hits:
953
+ source = hit.get("_source", {})
954
+ signal = source.get("signal", {})
955
+
956
+ # CRITICAL: Extract verdict from signal.ai.verdict to determine if alert has been investigated
957
+ # The verdict field (signal.ai.verdict) indicates the alert has already been triaged
958
+ signal_ai = signal.get("ai", {})
959
+ verdict = signal_ai.get("verdict") if signal_ai else None
960
+
961
+ # Skip alerts that have already been investigated (have signal.ai.verdict)
962
+ # This is a safety check in case the query filter didn't catch it
963
+ if verdict:
964
+ continue # Skip this alert - it has already been investigated
965
+
966
+ # Get title: prefer signal.rule.name, fallback to kibana.alert.rule.name, then rule.name, then message, then event.reason
967
+ title = ""
968
+ if isinstance(signal.get("rule"), dict):
969
+ title = signal.get("rule", {}).get("name", "")
970
+ if not title:
971
+ title = source.get("kibana.alert.rule.name", "")
972
+ if not title:
973
+ # Check for endpoint detection format (rule.name directly on document)
974
+ rule_obj = source.get("rule", {})
975
+ if isinstance(rule_obj, dict):
976
+ title = rule_obj.get("name", "")
977
+ if not title:
978
+ # Check message field (common in endpoint detections)
979
+ title = source.get("message", "")
980
+ if not title:
981
+ title = source.get("event", {}).get("reason", "")
982
+
983
+ # Get severity: prefer signal.severity, fallback to kibana.alert.severity
984
+ severity = signal.get("severity") or source.get("kibana.alert.severity", "medium")
985
+
986
+ # Get status: prefer signal.status, fallback to kibana.alert.workflow_status
987
+ status = signal.get("status") or source.get("kibana.alert.workflow_status", "open")
988
+
989
+ alerts.append({
990
+ "id": hit.get("_id", ""),
991
+ "title": title,
992
+ "severity": severity,
993
+ "status": status,
994
+ "created_at": source.get("@timestamp", ""),
995
+ "description": self._extract_description_from_alert(source, signal),
996
+ "source": "elastic",
997
+ "related_entities": self._extract_entities_from_alert(source),
998
+ "verdict": verdict, # Include verdict field (from signal.ai.verdict) - None for uninvestigated alerts
999
+ "signal": {
1000
+ "ai": {
1001
+ "verdict": verdict # Include full path for explicit checking
1002
+ }
1003
+ },
1004
+ })
1005
+
1006
+ return alerts
1007
+ except Exception as e:
1008
+ logger.exception(f"Error getting security alerts: {e}")
1009
+ raise IntegrationError(f"Failed to get security alerts: {e}") from e
1010
+
1011
+ def get_security_alert_by_id(
1012
+ self,
1013
+ alert_id: str,
1014
+ include_detections: bool = True,
1015
+ ) -> Dict[str, Any]:
1016
+ """Get detailed information about a specific security alert."""
1017
+ try:
1018
+ # Search for alert by ID using ids query (correct way to search by document ID)
1019
+ query = {
1020
+ "query": {
1021
+ "ids": {
1022
+ "values": [alert_id]
1023
+ }
1024
+ }
1025
+ }
1026
+
1027
+ # Search with fallback index patterns
1028
+ indices_patterns = [
1029
+ "alerts-*,.siem-signals-*,logs-endpoint.alerts-*",
1030
+ "alerts-*",
1031
+ "_all", # Fallback to all indices if specific patterns fail
1032
+ ]
1033
+ response = self._search_with_fallback(indices_patterns, query)
1034
+
1035
+ hits = response.get("hits", {}).get("hits", [])
1036
+ if not hits:
1037
+ raise IntegrationError(f"Alert {alert_id} not found")
1038
+
1039
+ hit = hits[0]
1040
+ source = hit.get("_source", {})
1041
+ signal = source.get("signal", {})
1042
+ rule = signal.get("rule", {}) if isinstance(signal.get("rule"), dict) else {}
1043
+
1044
+ # Extract from Kibana alert format (newer format with flat dot notation keys)
1045
+ # kibana.alert.rule.parameters is an object, others are flat keys
1046
+ kibana_rule_params = source.get("kibana.alert.rule.parameters", {})
1047
+ if not isinstance(kibana_rule_params, dict):
1048
+ kibana_rule_params = {}
1049
+
1050
+ # Get title: prefer signal.rule.name, fallback to kibana.alert.rule.name, then rule.name, then message
1051
+ title = rule.get("name", "")
1052
+ if not title:
1053
+ title = source.get("kibana.alert.rule.name", "")
1054
+ if not title:
1055
+ # Check for endpoint detection format (rule.name directly on document)
1056
+ rule_obj = source.get("rule", {})
1057
+ if isinstance(rule_obj, dict):
1058
+ title = rule_obj.get("name", "")
1059
+ if not title:
1060
+ # Check message field (common in endpoint detections)
1061
+ title = source.get("message", "")
1062
+
1063
+ # Get description: prefer signal.rule.description, fallback to kibana.alert.rule.parameters.description, then rule.description
1064
+ description = rule.get("description", "")
1065
+ if not description:
1066
+ description = kibana_rule_params.get("description", "")
1067
+ # If still empty, try kibana.alert.rule.description (flat key)
1068
+ if not description:
1069
+ description = source.get("kibana.alert.rule.description", "")
1070
+ if not description:
1071
+ # Check for endpoint detection format (rule.description directly on document)
1072
+ rule_obj = source.get("rule", {})
1073
+ if isinstance(rule_obj, dict):
1074
+ description = rule_obj.get("description", "")
1075
+
1076
+ # Get severity: prefer signal.severity, fallback to kibana.alert.severity
1077
+ severity = signal.get("severity", "")
1078
+ if not severity:
1079
+ severity = source.get("kibana.alert.severity", "")
1080
+ if not severity:
1081
+ severity = "medium" # Default fallback
1082
+
1083
+ # Get status: prefer signal.status, fallback to kibana.alert.workflow_status
1084
+ status = signal.get("status", "")
1085
+ if not status:
1086
+ status = source.get("kibana.alert.workflow_status", "")
1087
+ if not status:
1088
+ status = "open" # Default fallback
1089
+
1090
+ # Extract comments from signal.ai.comments.comment
1091
+ comments = []
1092
+
1093
+ # Check signal.ai.comments.comment
1094
+ signal_ai = signal.get("ai", {})
1095
+ if isinstance(signal_ai, dict):
1096
+ ai_comments = signal_ai.get("comments", {})
1097
+ if isinstance(ai_comments, dict):
1098
+ ai_comment = ai_comments.get("comment")
1099
+ if isinstance(ai_comment, list):
1100
+ logger.debug(f"Found {len(ai_comment)} comments in signal.ai.comments.comment")
1101
+ comments.extend(ai_comment)
1102
+ elif ai_comment:
1103
+ # Single comment as string or dict
1104
+ comments.append(ai_comment)
1105
+
1106
+ logger.debug(f"Total comments found for alert {alert_id}: {len(comments)}")
1107
+
1108
+ # Remove duplicates based on comment text and timestamp
1109
+ seen_comments = set()
1110
+ unique_comments = []
1111
+ for comment in comments:
1112
+ if isinstance(comment, dict):
1113
+ comment_key = (comment.get("comment", ""), comment.get("timestamp", ""))
1114
+ else:
1115
+ comment_key = (str(comment), "")
1116
+ if comment_key not in seen_comments:
1117
+ seen_comments.add(comment_key)
1118
+ unique_comments.append(comment)
1119
+
1120
+ # Get verdict from signal.ai.verdict
1121
+ verdict = ""
1122
+ signal_ai = signal.get("ai", {})
1123
+ if isinstance(signal_ai, dict):
1124
+ verdict = signal_ai.get("verdict", "")
1125
+
1126
+ alert = {
1127
+ "id": alert_id,
1128
+ "title": title,
1129
+ "severity": severity,
1130
+ "status": status,
1131
+ "priority": self._severity_to_priority(severity),
1132
+ "verdict": verdict,
1133
+ "description": description,
1134
+ "created_at": source.get("@timestamp", ""),
1135
+ "updated_at": source.get("@timestamp", ""),
1136
+ "related_entities": self._extract_entities_from_alert(source),
1137
+ "comments": unique_comments,
1138
+ }
1139
+
1140
+ if include_detections:
1141
+ alert["detections"] = [{
1142
+ "id": alert_id,
1143
+ "timestamp": source.get("@timestamp", ""),
1144
+ "severity": severity,
1145
+ "status": status,
1146
+ "description": description,
1147
+ }]
1148
+
1149
+ # Extract and retrieve ancestor events that triggered this alert
1150
+ ancestor_event_ids = []
1151
+
1152
+ # Check for kibana.alert.ancestors (newer format)
1153
+ kibana_ancestors = source.get("kibana.alert.ancestors", [])
1154
+ if isinstance(kibana_ancestors, list):
1155
+ for ancestor in kibana_ancestors:
1156
+ if isinstance(ancestor, dict):
1157
+ ancestor_id = ancestor.get("id")
1158
+ if ancestor_id:
1159
+ ancestor_event_ids.append(ancestor_id)
1160
+
1161
+ # Check for signal.ancestors (older format) if no kibana ancestors found
1162
+ if not ancestor_event_ids:
1163
+ signal_ancestors = signal.get("ancestors", [])
1164
+ if isinstance(signal_ancestors, list):
1165
+ for ancestor in signal_ancestors:
1166
+ if isinstance(ancestor, dict):
1167
+ ancestor_id = ancestor.get("id")
1168
+ if ancestor_id:
1169
+ ancestor_event_ids.append(ancestor_id)
1170
+
1171
+ # Retrieve the ancestor events
1172
+ if ancestor_event_ids:
1173
+ try:
1174
+ ancestor_events = self._get_events_by_ids(ancestor_event_ids)
1175
+ alert["events"] = ancestor_events
1176
+ logger.debug(f"Retrieved {len(ancestor_events)} ancestor events for alert {alert_id}")
1177
+ except Exception as e:
1178
+ logger.warning(f"Failed to retrieve ancestor events for alert {alert_id}: {e}")
1179
+ # Continue without events rather than failing the entire alert retrieval
1180
+ alert["events"] = []
1181
+ else:
1182
+ alert["events"] = []
1183
+
1184
+ return alert
1185
+ except Exception as e:
1186
+ logger.exception(f"Error getting security alert by ID: {e}")
1187
+ raise IntegrationError(f"Failed to get security alert: {e}") from e
1188
+
1189
+ def get_raw_alert_document(self, alert_id: str) -> Dict[str, Any]:
1190
+ """
1191
+ Get the raw Elasticsearch document for an alert by ID.
1192
+ Useful for debugging and investigating field structures.
1193
+
1194
+ Args:
1195
+ alert_id: The alert ID to fetch
1196
+
1197
+ Returns:
1198
+ Raw _source document from Elasticsearch
1199
+ """
1200
+ try:
1201
+ query = {
1202
+ "query": {
1203
+ "ids": {
1204
+ "values": [alert_id]
1205
+ }
1206
+ },
1207
+ "size": 1
1208
+ }
1209
+
1210
+ indices_patterns = [
1211
+ "alerts-*,.siem-signals-*,logs-endpoint.alerts-*",
1212
+ "alerts-*",
1213
+ "_all",
1214
+ ]
1215
+ response = self._search_with_fallback(indices_patterns, query)
1216
+
1217
+ hits = response.get("hits", {}).get("hits", [])
1218
+ if not hits:
1219
+ raise IntegrationError(f"Alert {alert_id} not found")
1220
+
1221
+ return hits[0].get("_source", {})
1222
+ except Exception as e:
1223
+ logger.exception(f"Error getting raw alert document {alert_id}: {e}")
1224
+ raise IntegrationError(f"Failed to get raw alert document: {e}") from e
1225
+
1226
+ def close_alert(
1227
+ self,
1228
+ alert_id: str,
1229
+ reason: Optional[str] = None,
1230
+ comment: Optional[str] = None,
1231
+ ) -> Dict[str, Any]:
1232
+ """
1233
+ Set verdict for an alert in Elasticsearch (FP, TP, etc.).
1234
+
1235
+ Updates signal.ai.verdict with the reason instead of closing the alert.
1236
+ The reason should be one of: "false_positive", "benign_true_positive", "true_positive", etc.
1237
+ """
1238
+ try:
1239
+ # First, find the alert to get its index
1240
+ query = {
1241
+ "query": {
1242
+ "term": {"_id": alert_id}
1243
+ }
1244
+ }
1245
+
1246
+ # Search with fallback index patterns
1247
+ indices_patterns = [
1248
+ "alerts-*,.siem-signals-*,logs-endpoint.alerts-*",
1249
+ "alerts-*",
1250
+ "_all", # Fallback to all indices if specific patterns fail
1251
+ ]
1252
+ response = self._search_with_fallback(indices_patterns, query)
1253
+
1254
+ hits = response.get("hits", {}).get("hits", [])
1255
+ if not hits:
1256
+ raise IntegrationError(f"Alert {alert_id} not found")
1257
+
1258
+ hit = hits[0]
1259
+ index_name = hit.get("_index")
1260
+ if not index_name:
1261
+ raise IntegrationError(f"Could not determine index for alert {alert_id}")
1262
+
1263
+ # Normalize reason to verdict format
1264
+ verdict = reason or "false_positive"
1265
+ # Map common reason values to verdict format
1266
+ if verdict in ["FP", "fp", "false_positive"]:
1267
+ verdict = "false_positive"
1268
+ elif verdict in ["BTP", "btp", "benign_true_positive"]:
1269
+ verdict = "benign_true_positive"
1270
+ elif verdict in ["TP", "tp", "true_positive"]:
1271
+ verdict = "true_positive"
1272
+ elif verdict in ["in-progress", "in_progress", "inprogress", "investigating"]:
1273
+ verdict = "in-progress"
1274
+
1275
+ # Build update document using script for nested signal.ai object
1276
+ script_update = {
1277
+ "script": {
1278
+ "source": """
1279
+ if (ctx._source.signal == null) {
1280
+ ctx._source.signal = [:];
1281
+ }
1282
+ if (ctx._source.signal.ai == null) {
1283
+ ctx._source.signal.ai = [:];
1284
+ }
1285
+ ctx._source.signal.ai.verdict = params.verdict;
1286
+ ctx._source.signal.ai.verdict_at = params.timestamp;
1287
+ """,
1288
+ "lang": "painless",
1289
+ "params": {
1290
+ "verdict": verdict,
1291
+ "timestamp": datetime.utcnow().isoformat() + "Z"
1292
+ }
1293
+ }
1294
+ }
1295
+
1296
+ # If comment is provided, also add it to signal.ai.comments.comment
1297
+ if comment:
1298
+ # Get existing comments first
1299
+ source = hit.get("_source", {})
1300
+ existing_comments = []
1301
+ signal = source.get("signal", {})
1302
+ if isinstance(signal, dict):
1303
+ signal_ai = signal.get("ai", {})
1304
+ if isinstance(signal_ai, dict):
1305
+ ai_comments = signal_ai.get("comments", {})
1306
+ if isinstance(ai_comments, dict):
1307
+ ai_comment = ai_comments.get("comment")
1308
+ if isinstance(ai_comment, list):
1309
+ existing_comments = list(ai_comment)
1310
+ elif ai_comment:
1311
+ existing_comments = [ai_comment]
1312
+
1313
+ # Add the new comment
1314
+ new_note = {
1315
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1316
+ "comment": comment,
1317
+ "author": "sami-gpt",
1318
+ }
1319
+ existing_comments.append(new_note)
1320
+
1321
+ # Update script to also set comments
1322
+ script_update["script"]["source"] = """
1323
+ if (ctx._source.signal == null) {
1324
+ ctx._source.signal = [:];
1325
+ }
1326
+ if (ctx._source.signal.ai == null) {
1327
+ ctx._source.signal.ai = [:];
1328
+ }
1329
+ if (ctx._source.signal.ai.comments == null) {
1330
+ ctx._source.signal.ai.comments = [:];
1331
+ }
1332
+ ctx._source.signal.ai.verdict = params.verdict;
1333
+ ctx._source.signal.ai.verdict_at = params.timestamp;
1334
+ ctx._source.signal.ai.comments.comment = params.comments;
1335
+ """
1336
+ script_update["script"]["params"]["comments"] = existing_comments
1337
+
1338
+ # Update the alert using Elasticsearch update API
1339
+ update_response = self._http.post(
1340
+ f"/{index_name}/_update/{alert_id}?refresh=wait_for",
1341
+ json_data=script_update
1342
+ )
1343
+
1344
+ # Verify the update was successful
1345
+ if update_response.get("result") not in ["updated", "noop"]:
1346
+ logger.warning(f"Unexpected update result: {update_response.get('result')}")
1347
+
1348
+ # Check for errors in the response
1349
+ if "error" in update_response:
1350
+ error_msg = update_response.get("error", {})
1351
+ logger.error(f"Elasticsearch update error: {error_msg}")
1352
+ raise IntegrationError(f"Failed to update alert: {error_msg}")
1353
+
1354
+ # Get updated alert details
1355
+ updated_alert = self.get_security_alert_by_id(alert_id, include_detections=False)
1356
+
1357
+ return {
1358
+ "success": True,
1359
+ "alert_id": alert_id,
1360
+ "verdict": verdict,
1361
+ "comment": comment,
1362
+ "alert": updated_alert,
1363
+ }
1364
+ except IntegrationError:
1365
+ raise
1366
+ except Exception as e:
1367
+ logger.exception(f"Error setting verdict for alert {alert_id}: {e}")
1368
+ raise IntegrationError(f"Failed to set verdict for alert: {e}") from e
1369
+
1370
+ def update_alert_verdict(
1371
+ self,
1372
+ alert_id: str,
1373
+ verdict: str,
1374
+ comment: Optional[str] = None,
1375
+ ) -> Dict[str, Any]:
1376
+ """
1377
+ Update the verdict for an alert in Elasticsearch.
1378
+
1379
+ This method sets or updates the signal.ai.verdict field. The verdict can be:
1380
+ - "in-progress": Alert is being investigated
1381
+ - "false_positive": Alert is a false positive
1382
+ - "benign_true_positive": Alert is a benign true positive
1383
+ - "true_positive": Alert is a true positive requiring investigation
1384
+ - "uncertain": Alert legitimacy cannot be determined with available information
1385
+
1386
+ Args:
1387
+ alert_id: The ID of the alert to update
1388
+ verdict: The verdict value to set
1389
+ comment: Optional comment to add to the alert
1390
+
1391
+ Returns:
1392
+ Dictionary with success status, alert_id, verdict, and updated alert details
1393
+ """
1394
+ try:
1395
+ # First, find the alert to get its index
1396
+ query = {
1397
+ "query": {
1398
+ "term": {"_id": alert_id}
1399
+ }
1400
+ }
1401
+
1402
+ # Search with fallback index patterns
1403
+ indices_patterns = [
1404
+ "alerts-*,.siem-signals-*,logs-endpoint.alerts-*",
1405
+ "alerts-*",
1406
+ "_all", # Fallback to all indices if specific patterns fail
1407
+ ]
1408
+ response = self._search_with_fallback(indices_patterns, query)
1409
+
1410
+ hits = response.get("hits", {}).get("hits", [])
1411
+ if not hits:
1412
+ raise IntegrationError(f"Alert {alert_id} not found")
1413
+
1414
+ hit = hits[0]
1415
+ index_name = hit.get("_index")
1416
+ if not index_name:
1417
+ raise IntegrationError(f"Could not determine index for alert {alert_id}")
1418
+
1419
+ # Normalize verdict to standard format
1420
+ verdict_normalized = verdict.lower().strip()
1421
+ # Map common verdict values to standard format
1422
+ if verdict_normalized in ["fp", "false_positive", "false-positive"]:
1423
+ verdict_normalized = "false_positive"
1424
+ elif verdict_normalized in ["btp", "benign_true_positive", "benign-true-positive"]:
1425
+ verdict_normalized = "benign_true_positive"
1426
+ elif verdict_normalized in ["tp", "true_positive", "true-positive"]:
1427
+ verdict_normalized = "true_positive"
1428
+ elif verdict_normalized in ["in-progress", "in_progress", "inprogress", "investigating"]:
1429
+ verdict_normalized = "in-progress"
1430
+ elif verdict_normalized in ["uncertain", "unknown", "unclear", "needs_more_investigation"]:
1431
+ verdict_normalized = "uncertain"
1432
+ else:
1433
+ # Use the provided verdict as-is if it doesn't match known patterns
1434
+ verdict_normalized = verdict
1435
+
1436
+ # Build update document using script for nested signal.ai object
1437
+ script_update = {
1438
+ "script": {
1439
+ "source": """
1440
+ if (ctx._source.signal == null) {
1441
+ ctx._source.signal = [:];
1442
+ }
1443
+ if (ctx._source.signal.ai == null) {
1444
+ ctx._source.signal.ai = [:];
1445
+ }
1446
+ ctx._source.signal.ai.verdict = params.verdict;
1447
+ ctx._source.signal.ai.verdict_at = params.timestamp;
1448
+ """,
1449
+ "lang": "painless",
1450
+ "params": {
1451
+ "verdict": verdict_normalized,
1452
+ "timestamp": datetime.utcnow().isoformat() + "Z"
1453
+ }
1454
+ }
1455
+ }
1456
+
1457
+ # If comment is provided, also add it to signal.ai.comments.comment
1458
+ if comment:
1459
+ # Get existing comments first
1460
+ source = hit.get("_source", {})
1461
+ existing_comments = []
1462
+ signal = source.get("signal", {})
1463
+ if isinstance(signal, dict):
1464
+ signal_ai = signal.get("ai", {})
1465
+ if isinstance(signal_ai, dict):
1466
+ ai_comments = signal_ai.get("comments", {})
1467
+ if isinstance(ai_comments, dict):
1468
+ ai_comment = ai_comments.get("comment")
1469
+ if isinstance(ai_comment, list):
1470
+ existing_comments = list(ai_comment)
1471
+ elif ai_comment:
1472
+ existing_comments = [ai_comment]
1473
+
1474
+ # Add the new comment
1475
+ new_note = {
1476
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1477
+ "comment": comment,
1478
+ "author": "sami-gpt",
1479
+ }
1480
+ existing_comments.append(new_note)
1481
+
1482
+ # Update script to also set comments
1483
+ script_update["script"]["source"] = """
1484
+ if (ctx._source.signal == null) {
1485
+ ctx._source.signal = [:];
1486
+ }
1487
+ if (ctx._source.signal.ai == null) {
1488
+ ctx._source.signal.ai = [:];
1489
+ }
1490
+ if (ctx._source.signal.ai.comments == null) {
1491
+ ctx._source.signal.ai.comments = [:];
1492
+ }
1493
+ ctx._source.signal.ai.verdict = params.verdict;
1494
+ ctx._source.signal.ai.verdict_at = params.timestamp;
1495
+ ctx._source.signal.ai.comments.comment = params.comments;
1496
+ """
1497
+ script_update["script"]["params"]["comments"] = existing_comments
1498
+
1499
+ # Update the alert using Elasticsearch update API
1500
+ update_response = self._http.post(
1501
+ f"/{index_name}/_update/{alert_id}?refresh=wait_for",
1502
+ json_data=script_update
1503
+ )
1504
+
1505
+ # Verify the update was successful
1506
+ if update_response.get("result") not in ["updated", "noop"]:
1507
+ logger.warning(f"Unexpected update result: {update_response.get('result')}")
1508
+
1509
+ # Check for errors in the response
1510
+ if "error" in update_response:
1511
+ error_msg = update_response.get("error", {})
1512
+ logger.error(f"Elasticsearch update error: {error_msg}")
1513
+ raise IntegrationError(f"Failed to update alert verdict: {error_msg}")
1514
+
1515
+ # Get updated alert details
1516
+ updated_alert = self.get_security_alert_by_id(alert_id, include_detections=False)
1517
+
1518
+ return {
1519
+ "success": True,
1520
+ "alert_id": alert_id,
1521
+ "verdict": verdict_normalized,
1522
+ "comment": comment,
1523
+ "alert": updated_alert,
1524
+ }
1525
+ except IntegrationError:
1526
+ raise
1527
+ except Exception as e:
1528
+ logger.exception(f"Error updating verdict for alert {alert_id}: {e}")
1529
+ raise IntegrationError(f"Failed to update alert verdict: {e}") from e
1530
+
1531
+ def tag_alert(
1532
+ self,
1533
+ alert_id: str,
1534
+ tag: str,
1535
+ ) -> Dict[str, Any]:
1536
+ """
1537
+ Tag an alert with a classification tag (FP, TP, or NMI).
1538
+
1539
+ Updates the alert with the specified tag, adding it to existing tags if present.
1540
+ Valid tags are: FP (False Positive), TP (True Positive), NMI (Need More Investigation).
1541
+ """
1542
+ try:
1543
+ # Validate tag
1544
+ valid_tags = {"FP", "TP", "NMI"}
1545
+ tag_upper = tag.upper()
1546
+ if tag_upper not in valid_tags:
1547
+ raise IntegrationError(
1548
+ f"Invalid tag '{tag}'. Must be one of: FP (False Positive), "
1549
+ f"TP (True Positive), or NMI (Need More Investigation)"
1550
+ )
1551
+
1552
+ # First, find the alert to get its index and current tags
1553
+ query = {
1554
+ "query": {
1555
+ "term": {"_id": alert_id}
1556
+ }
1557
+ }
1558
+
1559
+ # Search with fallback index patterns
1560
+ indices_patterns = [
1561
+ "alerts-*,.siem-signals-*,logs-endpoint.alerts-*",
1562
+ "alerts-*",
1563
+ "_all", # Fallback to all indices if specific patterns fail
1564
+ ]
1565
+ response = self._search_with_fallback(indices_patterns, query)
1566
+
1567
+ hits = response.get("hits", {}).get("hits", [])
1568
+ if not hits:
1569
+ raise IntegrationError(f"Alert {alert_id} not found")
1570
+
1571
+ hit = hits[0]
1572
+ index_name = hit.get("_index")
1573
+ if not index_name:
1574
+ raise IntegrationError(f"Could not determine index for alert {alert_id}")
1575
+
1576
+ source = hit.get("_source", {})
1577
+
1578
+ # Get existing tags from signal.ai.tags
1579
+ existing_tags = []
1580
+ signal = source.get("signal", {})
1581
+ if isinstance(signal, dict):
1582
+ signal_ai = signal.get("ai", {})
1583
+ if isinstance(signal_ai, dict):
1584
+ ai_tags = signal_ai.get("tags")
1585
+ if isinstance(ai_tags, list):
1586
+ existing_tags = list(ai_tags)
1587
+ elif ai_tags:
1588
+ existing_tags = [ai_tags]
1589
+
1590
+ # Remove duplicates and ensure tag is added
1591
+ existing_tags = list(set(existing_tags))
1592
+
1593
+ # Remove any existing classification tags (FP, TP, NMI) to avoid duplicates
1594
+ classification_tags = {"FP", "TP", "NMI"}
1595
+ existing_tags = [t for t in existing_tags if t.upper() not in classification_tags]
1596
+
1597
+ # Add the new tag
1598
+ existing_tags.append(tag_upper)
1599
+
1600
+ # Build update document using script for nested signal.ai object
1601
+ script_update = {
1602
+ "script": {
1603
+ "source": """
1604
+ if (ctx._source.signal == null) {
1605
+ ctx._source.signal = [:];
1606
+ }
1607
+ if (ctx._source.signal.ai == null) {
1608
+ ctx._source.signal.ai = [:];
1609
+ }
1610
+ ctx._source.signal.ai.tags = params.tags;
1611
+ ctx._source.signal.ai.tagged_at = params.timestamp;
1612
+ """,
1613
+ "lang": "painless",
1614
+ "params": {
1615
+ "tags": existing_tags,
1616
+ "timestamp": datetime.utcnow().isoformat() + "Z"
1617
+ }
1618
+ }
1619
+ }
1620
+
1621
+ # Update the alert using Elasticsearch update API
1622
+ update_response = self._http.post(
1623
+ f"/{index_name}/_update/{alert_id}?refresh=wait_for",
1624
+ json_data=script_update
1625
+ )
1626
+
1627
+ # Verify the update was successful
1628
+ if update_response.get("result") not in ["updated", "noop"]:
1629
+ logger.warning(f"Unexpected update result: {update_response.get('result')}")
1630
+
1631
+ # Check for errors in the response
1632
+ if "error" in update_response:
1633
+ error_msg = update_response.get("error", {})
1634
+ logger.error(f"Elasticsearch update error: {error_msg}")
1635
+ raise IntegrationError(f"Failed to update alert: {error_msg}")
1636
+
1637
+ # Get updated alert details
1638
+ updated_alert = self.get_security_alert_by_id(alert_id, include_detections=False)
1639
+
1640
+ return {
1641
+ "success": True,
1642
+ "alert_id": alert_id,
1643
+ "tag": tag_upper,
1644
+ "tags": existing_tags,
1645
+ "alert": updated_alert,
1646
+ }
1647
+ except IntegrationError:
1648
+ raise
1649
+ except Exception as e:
1650
+ logger.exception(f"Error tagging alert {alert_id}: {e}")
1651
+ raise IntegrationError(f"Failed to tag alert: {e}") from e
1652
+
1653
+ def add_alert_note(
1654
+ self,
1655
+ alert_id: str,
1656
+ note: str,
1657
+ ) -> Dict[str, Any]:
1658
+ """
1659
+ Add a note/comment to an alert in Elasticsearch.
1660
+
1661
+ Adds the note to the alert's comments array and also stores it
1662
+ in a dedicated notes field for easy retrieval.
1663
+ """
1664
+ try:
1665
+ # First, find the alert to get its index
1666
+ query = {
1667
+ "query": {
1668
+ "term": {"_id": alert_id}
1669
+ }
1670
+ }
1671
+
1672
+ # Search with fallback index patterns
1673
+ indices_patterns = [
1674
+ "alerts-*,.siem-signals-*,logs-endpoint.alerts-*",
1675
+ "alerts-*",
1676
+ "_all", # Fallback to all indices if specific patterns fail
1677
+ ]
1678
+ response = self._search_with_fallback(indices_patterns, query)
1679
+
1680
+ hits = response.get("hits", {}).get("hits", [])
1681
+ if not hits:
1682
+ raise IntegrationError(f"Alert {alert_id} not found")
1683
+
1684
+ hit = hits[0]
1685
+ index_name = hit.get("_index")
1686
+ if not index_name:
1687
+ raise IntegrationError(f"Could not determine index for alert {alert_id}")
1688
+
1689
+ source = hit.get("_source", {})
1690
+
1691
+ # Get existing comments from signal.ai.comments.comment
1692
+ existing_comments = []
1693
+ signal = source.get("signal", {})
1694
+ if isinstance(signal, dict):
1695
+ signal_ai = signal.get("ai", {})
1696
+ if isinstance(signal_ai, dict):
1697
+ ai_comments = signal_ai.get("comments", {})
1698
+ if isinstance(ai_comments, dict):
1699
+ ai_comment = ai_comments.get("comment")
1700
+ if isinstance(ai_comment, list):
1701
+ existing_comments = list(ai_comment)
1702
+ elif ai_comment:
1703
+ existing_comments = [ai_comment]
1704
+
1705
+ # Remove duplicates based on comment text and timestamp
1706
+ seen_comments = set()
1707
+ unique_comments = []
1708
+ for comment in existing_comments:
1709
+ if isinstance(comment, dict):
1710
+ comment_key = (comment.get("comment", ""), comment.get("timestamp", ""))
1711
+ else:
1712
+ comment_key = (str(comment), "")
1713
+ if comment_key not in seen_comments:
1714
+ seen_comments.add(comment_key)
1715
+ unique_comments.append(comment)
1716
+
1717
+ # Add the new note
1718
+ new_note = {
1719
+ "timestamp": datetime.utcnow().isoformat() + "Z",
1720
+ "comment": note,
1721
+ "author": "sami-gpt",
1722
+ }
1723
+ unique_comments.append(new_note)
1724
+
1725
+ # Build update document using script for nested fields
1726
+ # Script updates are more reliable for nested structures in Elasticsearch
1727
+ logger.debug(f"Updating alert {alert_id} with {len(unique_comments)} comments using script update")
1728
+
1729
+ # Use script update to handle nested signal.ai object properly
1730
+ script_update = {
1731
+ "script": {
1732
+ "source": """
1733
+ if (ctx._source.signal == null) {
1734
+ ctx._source.signal = [:];
1735
+ }
1736
+ if (ctx._source.signal.ai == null) {
1737
+ ctx._source.signal.ai = [:];
1738
+ }
1739
+ if (ctx._source.signal.ai.comments == null) {
1740
+ ctx._source.signal.ai.comments = [:];
1741
+ }
1742
+ ctx._source.signal.ai.comments.comment = params.comments;
1743
+ """,
1744
+ "lang": "painless",
1745
+ "params": {
1746
+ "comments": unique_comments,
1747
+ "timestamp": datetime.utcnow().isoformat() + "Z"
1748
+ }
1749
+ }
1750
+ }
1751
+
1752
+ # Update the alert using Elasticsearch update API with script
1753
+ # Add refresh=wait_for to ensure the update is immediately visible
1754
+ update_response = self._http.post(
1755
+ f"/{index_name}/_update/{alert_id}?refresh=wait_for",
1756
+ json_data=script_update
1757
+ )
1758
+
1759
+ # Log the response for debugging
1760
+ logger.debug(f"Update response for alert {alert_id}: {update_response.get('result')}")
1761
+
1762
+ # Verify the update was successful
1763
+ if update_response.get("result") not in ["updated", "noop"]:
1764
+ logger.warning(f"Unexpected update result: {update_response.get('result')}")
1765
+ logger.warning(f"Update response: {update_response}")
1766
+
1767
+ # Check for errors in the response
1768
+ if "error" in update_response:
1769
+ error_msg = update_response.get("error", {})
1770
+ logger.error(f"Elasticsearch update error: {error_msg}")
1771
+ raise IntegrationError(f"Failed to update alert: {error_msg}")
1772
+ else:
1773
+ logger.info(f"Script update successful for alert {alert_id}")
1774
+
1775
+ # Verify the update by directly fetching the document
1776
+ try:
1777
+ verify_response = self._http.get(f"/{index_name}/_doc/{alert_id}")
1778
+ if verify_response.get("found"):
1779
+ verified_source = verify_response.get("_source", {})
1780
+ verified_signal = verified_source.get("signal", {})
1781
+ verified_ai = verified_signal.get("ai", {})
1782
+ verified_comments_obj = verified_ai.get("comments", {})
1783
+ verified_comments = verified_comments_obj.get("comment", [])
1784
+ if not isinstance(verified_comments, list):
1785
+ verified_comments = [verified_comments] if verified_comments else []
1786
+ logger.debug(f"Verified: Document has {len(verified_comments)} comments in signal.ai.comments.comment")
1787
+ if not verified_comments:
1788
+ logger.warning(f"Update reported success but no comments found in verified document")
1789
+ except Exception as e:
1790
+ logger.warning(f"Could not verify update: {e}")
1791
+
1792
+ # Get updated alert details
1793
+ updated_alert = self.get_security_alert_by_id(alert_id, include_detections=False)
1794
+
1795
+ return {
1796
+ "success": True,
1797
+ "alert_id": alert_id,
1798
+ "note": note,
1799
+ "alert": updated_alert,
1800
+ }
1801
+ except IntegrationError:
1802
+ raise
1803
+ except Exception as e:
1804
+ logger.exception(f"Error adding note to alert {alert_id}: {e}")
1805
+ raise IntegrationError(f"Failed to add note to alert: {e}") from e
1806
+
1807
+ # Entity & Intelligence Methods
1808
+
1809
+ def lookup_entity(
1810
+ self,
1811
+ entity_value: str,
1812
+ entity_type: Optional[str] = None,
1813
+ hours_back: int = 24,
1814
+ ) -> Dict[str, Any]:
1815
+ """Look up an entity (IP, domain, hash, user, etc.) for enrichment."""
1816
+ try:
1817
+ # Auto-detect entity type if not provided
1818
+ if not entity_type:
1819
+ entity_type = self._detect_entity_type(entity_value)
1820
+
1821
+ # Use pivot_on_indicator to get events
1822
+ result = self.pivot_on_indicator(entity_value, limit=100)
1823
+
1824
+ # Extract summary information
1825
+ first_seen = None
1826
+ last_seen = None
1827
+ event_count = result.total_count
1828
+ related_alerts = []
1829
+ related_entities = set()
1830
+
1831
+ if result.events:
1832
+ timestamps = [e.timestamp for e in result.events if e.timestamp]
1833
+ if timestamps:
1834
+ first_seen = min(timestamps)
1835
+ last_seen = max(timestamps)
1836
+
1837
+ # Extract related entities
1838
+ for event in result.events:
1839
+ if event.host:
1840
+ related_entities.add(f"host:{event.host}")
1841
+ if event.username:
1842
+ related_entities.add(f"user:{event.username}")
1843
+ if event.ip:
1844
+ related_entities.add(f"ip:{event.ip}")
1845
+ if event.file_hash:
1846
+ related_entities.add(f"hash:{event.file_hash}")
1847
+
1848
+ # Build summary
1849
+ summary = f"Entity {entity_value} ({entity_type}): Found {event_count} events"
1850
+ if first_seen:
1851
+ summary += f" from {first_seen.isoformat()} to {last_seen.isoformat() if last_seen else 'now'}"
1852
+
1853
+ return {
1854
+ "entity_value": entity_value,
1855
+ "entity_type": entity_type,
1856
+ "summary": summary,
1857
+ "first_seen": first_seen.isoformat() if first_seen else None,
1858
+ "last_seen": last_seen.isoformat() if last_seen else None,
1859
+ "event_count": event_count,
1860
+ "reputation": None,
1861
+ "related_alerts": related_alerts,
1862
+ "related_entities": list(related_entities),
1863
+ }
1864
+ except Exception as e:
1865
+ logger.exception(f"Error looking up entity: {e}")
1866
+ raise IntegrationError(f"Failed to lookup entity: {e}") from e
1867
+
1868
+ def get_ioc_matches(
1869
+ self,
1870
+ hours_back: int = 24,
1871
+ max_matches: int = 20,
1872
+ ioc_type: Optional[str] = None,
1873
+ severity: Optional[str] = None,
1874
+ ) -> List[Dict[str, Any]]:
1875
+ """
1876
+ Get Indicators of Compromise (IoC) matches from Elasticsearch.
1877
+
1878
+ This method is selective and only returns actual threat indicators:
1879
+ - Threat intelligence indicators (threat.indicator field)
1880
+ - File hashes with malicious indicators
1881
+ - IP addresses that are in threat feeds or have malicious indicators
1882
+ - Excludes private/internal IP addresses (RFC 1918)
1883
+ """
1884
+ import ipaddress
1885
+
1886
+ def is_private_ip(ip_str: str) -> bool:
1887
+ """Check if an IP address is private/internal (RFC 1918)."""
1888
+ try:
1889
+ ip = ipaddress.ip_address(ip_str)
1890
+ return ip.is_private or ip.is_loopback or ip.is_link_local
1891
+ except (ValueError, AttributeError):
1892
+ return False
1893
+
1894
+ def is_malicious_indicator(source: Dict[str, Any]) -> bool:
1895
+ """Check if an event contains actual malicious indicators."""
1896
+ threat = source.get("threat", {})
1897
+
1898
+ # Check for threat intelligence indicators
1899
+ if threat.get("indicator"):
1900
+ # Check if it's marked as malicious
1901
+ threat_type = threat.get("framework", "").lower()
1902
+ threat_ind = threat.get("indicator", {})
1903
+ if isinstance(threat_ind, dict):
1904
+ if threat_ind.get("type") == "malicious" or "malicious" in threat_type:
1905
+ return True
1906
+ # If threat.indicator exists, it's likely from a threat feed
1907
+ return True
1908
+
1909
+ # Check for malicious file indicators
1910
+ file = source.get("file", {})
1911
+ if file.get("hash"):
1912
+ # Check if file is marked as malicious
1913
+ if file.get("type") == "malicious" or file.get("malware_classification"):
1914
+ return True
1915
+
1916
+ # Check for threat.enrichments (Elastic Security threat intel)
1917
+ if threat.get("enrichments"):
1918
+ return True
1919
+
1920
+ # Check for event.category related to threats
1921
+ event = source.get("event", {})
1922
+ if event.get("category") in ["threat", "malware", "intrusion_detection"]:
1923
+ return True
1924
+
1925
+ return False
1926
+
1927
+ try:
1928
+ # Build query that prioritizes actual threat indicators
1929
+ query = {
1930
+ "query": {
1931
+ "bool": {
1932
+ "must": [
1933
+ {"range": {"@timestamp": {"gte": f"now-{hours_back}h"}}},
1934
+ {
1935
+ "bool": {
1936
+ "should": [
1937
+ # Priority 1: Explicit threat indicators
1938
+ {"exists": {"field": "threat.indicator"}},
1939
+ {"exists": {"field": "threat.enrichments"}},
1940
+ # Priority 2: Malicious file hashes
1941
+ {
1942
+ "bool": {
1943
+ "must": [
1944
+ {"exists": {"field": "file.hash"}},
1945
+ {
1946
+ "bool": {
1947
+ "should": [
1948
+ {"term": {"file.type": "malicious"}},
1949
+ {"exists": {"field": "file.malware_classification"}},
1950
+ {"exists": {"field": "threat.framework"}},
1951
+ ]
1952
+ }
1953
+ }
1954
+ ]
1955
+ }
1956
+ },
1957
+ # Priority 3: IPs with threat indicators (but we'll filter private IPs)
1958
+ {
1959
+ "bool": {
1960
+ "must": [
1961
+ {
1962
+ "bool": {
1963
+ "should": [
1964
+ {"exists": {"field": "source.ip"}},
1965
+ {"exists": {"field": "destination.ip"}},
1966
+ ]
1967
+ }
1968
+ },
1969
+ {
1970
+ "bool": {
1971
+ "should": [
1972
+ {"exists": {"field": "threat.indicator.ip"}},
1973
+ {"exists": {"field": "threat.enrichments"}},
1974
+ {"term": {"event.category": "threat"}},
1975
+ {"term": {"event.category": "malware"}},
1976
+ ]
1977
+ }
1978
+ }
1979
+ ]
1980
+ }
1981
+ },
1982
+ ],
1983
+ "minimum_should_match": 1
1984
+ }
1985
+ }
1986
+ ]
1987
+ }
1988
+ },
1989
+ "size": max_matches * 3, # Get more results to filter
1990
+ "sort": [{"@timestamp": {"order": "desc"}}]
1991
+ }
1992
+
1993
+ if ioc_type:
1994
+ if ioc_type == "ip":
1995
+ # For IPs, require threat indicators
1996
+ query["query"]["bool"]["must"][1]["bool"]["should"] = [
1997
+ {"exists": {"field": "threat.indicator.ip"}},
1998
+ {"exists": {"field": "threat.enrichments"}},
1999
+ ]
2000
+ elif ioc_type == "hash":
2001
+ query["query"]["bool"]["must"][1]["bool"]["should"] = [
2002
+ {
2003
+ "bool": {
2004
+ "must": [
2005
+ {"exists": {"field": "file.hash"}},
2006
+ {
2007
+ "bool": {
2008
+ "should": [
2009
+ {"term": {"file.type": "malicious"}},
2010
+ {"exists": {"field": "file.malware_classification"}},
2011
+ {"exists": {"field": "threat.framework"}},
2012
+ ]
2013
+ }
2014
+ }
2015
+ ]
2016
+ }
2017
+ }
2018
+ ]
2019
+ elif ioc_type == "domain":
2020
+ query["query"]["bool"]["must"][1]["bool"]["should"] = [
2021
+ {"exists": {"field": "threat.indicator.domain"}},
2022
+ {"exists": {"field": "dns.question.name"}},
2023
+ ]
2024
+
2025
+ if severity:
2026
+ query["query"]["bool"]["must"].append({"match": {"event.severity": severity}})
2027
+
2028
+ # Search with fallback index patterns
2029
+ indices_patterns = [
2030
+ "logs-*,security-*,winlogbeat-*,filebeat-*",
2031
+ "_all", # Fallback to all indices if specific patterns fail
2032
+ ]
2033
+ response = self._search_with_fallback(indices_patterns, query)
2034
+
2035
+ hits = response.get("hits", {}).get("hits", [])
2036
+ matches = []
2037
+ seen_indicators = set()
2038
+
2039
+ for hit in hits:
2040
+ source = hit.get("_source", {})
2041
+
2042
+ # Extract IoC - prioritize threat indicators
2043
+ indicator = None
2044
+ ioc_type_detected = None
2045
+ is_malicious = False
2046
+
2047
+ # Priority 1: Threat intelligence indicators
2048
+ threat = source.get("threat", {})
2049
+ if threat.get("indicator"):
2050
+ threat_ind = threat.get("indicator", {})
2051
+ if isinstance(threat_ind, dict):
2052
+ # Extract from threat.indicator object
2053
+ indicator = threat_ind.get("ip") or threat_ind.get("domain") or threat_ind.get("file", {}).get("hash", {}).get("sha256")
2054
+ # If threat.indicator exists, it's from a threat feed - consider it malicious
2055
+ is_malicious = True
2056
+ elif isinstance(threat_ind, str):
2057
+ indicator = threat_ind
2058
+ is_malicious = True
2059
+ ioc_type_detected = "threat_indicator"
2060
+
2061
+ # Also check threat.enrichments (Elastic Security threat intel)
2062
+ if not indicator and threat.get("enrichments"):
2063
+ # Extract IP from enrichments
2064
+ enrichments = threat.get("enrichments", [])
2065
+ for enrichment in enrichments:
2066
+ if isinstance(enrichment, dict):
2067
+ indicator = enrichment.get("indicator", {}).get("ip") or enrichment.get("indicator", {}).get("domain")
2068
+ if indicator:
2069
+ is_malicious = True
2070
+ ioc_type_detected = "threat_indicator"
2071
+ break
2072
+
2073
+ # Priority 2: File hashes with malicious indicators
2074
+ if not indicator:
2075
+ file = source.get("file", {})
2076
+ file_hash = file.get("hash", {})
2077
+ if isinstance(file_hash, dict):
2078
+ hash_value = file_hash.get("sha256") or file_hash.get("md5") or file_hash.get("sha1")
2079
+ elif isinstance(file_hash, str):
2080
+ hash_value = file_hash
2081
+ else:
2082
+ hash_value = None
2083
+
2084
+ if hash_value and (file.get("type") == "malicious" or file.get("malware_classification") or threat.get("framework")):
2085
+ indicator = hash_value
2086
+ ioc_type_detected = "hash"
2087
+ is_malicious = True
2088
+
2089
+ # Priority 3: IP addresses from threat feeds (exclude private IPs)
2090
+ if not indicator:
2091
+ source_ip = source.get("source", {}).get("ip")
2092
+ dest_ip = source.get("destination", {}).get("ip")
2093
+
2094
+ # Check if IP has threat indicators
2095
+ if source_ip and not is_private_ip(source_ip) and is_malicious_indicator(source):
2096
+ indicator = source_ip
2097
+ ioc_type_detected = "ip"
2098
+ is_malicious = True
2099
+ elif dest_ip and not is_private_ip(dest_ip) and is_malicious_indicator(source):
2100
+ indicator = dest_ip
2101
+ ioc_type_detected = "ip"
2102
+ is_malicious = True
2103
+
2104
+ # Only include if we found an indicator and it's marked as malicious or from threat feed
2105
+ if indicator and indicator not in seen_indicators and is_malicious:
2106
+ seen_indicators.add(indicator)
2107
+ matches.append({
2108
+ "indicator": indicator,
2109
+ "ioc_type": ioc_type_detected or ioc_type or "unknown",
2110
+ "first_seen": source.get("@timestamp", ""),
2111
+ "last_seen": source.get("@timestamp", ""),
2112
+ "match_count": 1,
2113
+ "severity": source.get("event", {}).get("severity", "medium"),
2114
+ "source": "elastic",
2115
+ "affected_hosts": [source.get("host", {}).get("name")] if source.get("host", {}).get("name") else [],
2116
+ })
2117
+
2118
+ # Return top matches sorted by severity (if available)
2119
+ matches.sort(key=lambda x: {
2120
+ "critical": 4,
2121
+ "high": 3,
2122
+ "medium": 2,
2123
+ "low": 1
2124
+ }.get(x.get("severity", "medium").lower(), 0), reverse=True)
2125
+
2126
+ return matches[:max_matches]
2127
+ except Exception as e:
2128
+ logger.exception(f"Error getting IoC matches: {e}")
2129
+ raise IntegrationError(f"Failed to get IoC matches: {e}") from e
2130
+
2131
+ def get_threat_intel(
2132
+ self,
2133
+ query: str,
2134
+ context: Optional[Dict[str, Any]] = None,
2135
+ ) -> Dict[str, Any]:
2136
+ """Get threat intelligence answers."""
2137
+ try:
2138
+ answer = f"Threat intelligence query: {query}\n\n"
2139
+
2140
+ if context:
2141
+ for key, value in context.items():
2142
+ if isinstance(value, str):
2143
+ try:
2144
+ entity_info = self.lookup_entity(value, hours_back=168)
2145
+ answer += f"\n{key} ({value}): {entity_info.get('summary', 'No information found')}\n"
2146
+ except Exception:
2147
+ pass
2148
+
2149
+ answer += "\nNote: Full threat intelligence integration requires additional threat intelligence feeds or AI models."
2150
+
2151
+ return {
2152
+ "query": query,
2153
+ "answer": answer,
2154
+ "sources": ["elasticsearch"],
2155
+ "confidence": "medium",
2156
+ }
2157
+ except Exception as e:
2158
+ logger.exception(f"Error getting threat intelligence: {e}")
2159
+ raise IntegrationError(f"Failed to get threat intelligence: {e}") from e
2160
+
2161
+ # Detection Rule Management Methods
2162
+
2163
+ def list_security_rules(
2164
+ self,
2165
+ enabled_only: bool = False,
2166
+ limit: int = 100,
2167
+ ) -> List[Dict[str, Any]]:
2168
+ """List security detection rules configured in Elasticsearch."""
2169
+ try:
2170
+ query = {
2171
+ "query": {
2172
+ "bool": {
2173
+ "must": [
2174
+ {"exists": {"field": "rule"}}
2175
+ ]
2176
+ }
2177
+ },
2178
+ "size": limit
2179
+ }
2180
+
2181
+ if enabled_only:
2182
+ query["query"]["bool"]["must"].append({"term": {"enabled": True}})
2183
+
2184
+ # Search with fallback index patterns
2185
+ indices_patterns = [
2186
+ ".siem-signals-*,alerts-*",
2187
+ "alerts-*",
2188
+ "_all", # Fallback to all indices if specific patterns fail
2189
+ ]
2190
+ response = self._search_with_fallback(indices_patterns, query)
2191
+
2192
+ hits = response.get("hits", {}).get("hits", [])
2193
+ rules = []
2194
+ seen_rules = {}
2195
+
2196
+ for hit in hits:
2197
+ source = hit.get("_source", {})
2198
+ signal = source.get("signal", {})
2199
+ rule = signal.get("rule", {}) if isinstance(signal.get("rule"), dict) else {}
2200
+
2201
+ rule_id = rule.get("id") or rule.get("rule_id") or hit.get("_id", "")
2202
+ if rule_id and rule_id not in seen_rules:
2203
+ seen_rules[rule_id] = True
2204
+ rules.append({
2205
+ "id": rule_id,
2206
+ "name": rule.get("name", ""),
2207
+ "description": rule.get("description", ""),
2208
+ "enabled": True,
2209
+ "severity": rule.get("severity", "medium"),
2210
+ "category": rule.get("category", ""),
2211
+ "created_at": source.get("@timestamp", ""),
2212
+ "updated_at": source.get("@timestamp", ""),
2213
+ })
2214
+
2215
+ return rules[:limit]
2216
+ except Exception as e:
2217
+ logger.exception(f"Error listing security rules: {e}")
2218
+ raise IntegrationError(f"Failed to list security rules: {e}") from e
2219
+
2220
+ def search_security_rules(
2221
+ self,
2222
+ query: str,
2223
+ category: Optional[str] = None,
2224
+ enabled_only: bool = False,
2225
+ ) -> List[Dict[str, Any]]:
2226
+ """Search for security detection rules."""
2227
+ try:
2228
+ all_rules = self.list_security_rules(enabled_only=enabled_only, limit=1000)
2229
+
2230
+ import re
2231
+ pattern = re.compile(query, re.IGNORECASE)
2232
+ matching_rules = []
2233
+
2234
+ for rule in all_rules:
2235
+ if pattern.search(rule.get("name", "")) or pattern.search(rule.get("description", "")):
2236
+ if not category or rule.get("category", "").lower() == category.lower():
2237
+ matching_rules.append(rule)
2238
+
2239
+ return matching_rules
2240
+ except Exception as e:
2241
+ logger.exception(f"Error searching security rules: {e}")
2242
+ raise IntegrationError(f"Failed to search security rules: {e}") from e
2243
+
2244
+ def get_rule_detections(
2245
+ self,
2246
+ rule_id: str,
2247
+ alert_state: Optional[str] = None,
2248
+ hours_back: int = 24,
2249
+ limit: int = 50,
2250
+ ) -> List[Dict[str, Any]]:
2251
+ """Get historical detections from a specific rule."""
2252
+ try:
2253
+ query = {
2254
+ "query": {
2255
+ "bool": {
2256
+ "must": [
2257
+ {"range": {"@timestamp": {"gte": f"now-{hours_back}h"}}},
2258
+ {
2259
+ "bool": {
2260
+ "should": [
2261
+ {"term": {"signal.rule.id": rule_id}},
2262
+ {"term": {"signal.rule.rule_id": rule_id}},
2263
+ ]
2264
+ }
2265
+ }
2266
+ ]
2267
+ }
2268
+ },
2269
+ "size": limit,
2270
+ "sort": [{"@timestamp": {"order": "desc"}}]
2271
+ }
2272
+
2273
+ if alert_state:
2274
+ query["query"]["bool"]["must"].append({"term": {"signal.status": alert_state}})
2275
+
2276
+ # Search with fallback index patterns
2277
+ indices_patterns = [
2278
+ ".siem-signals-*,alerts-*",
2279
+ "alerts-*",
2280
+ "_all", # Fallback to all indices if specific patterns fail
2281
+ ]
2282
+ response = self._search_with_fallback(indices_patterns, query)
2283
+
2284
+ hits = response.get("hits", {}).get("hits", [])
2285
+ detections = []
2286
+
2287
+ for hit in hits:
2288
+ source = hit.get("_source", {})
2289
+ signal = source.get("signal", {})
2290
+
2291
+ detections.append({
2292
+ "id": hit.get("_id", ""),
2293
+ "alert_id": hit.get("_id", ""),
2294
+ "timestamp": source.get("@timestamp", ""),
2295
+ "severity": signal.get("severity", "medium"),
2296
+ "status": signal.get("status", "open"),
2297
+ "description": signal.get("rule", {}).get("description", "") if isinstance(signal.get("rule"), dict) else "",
2298
+ })
2299
+
2300
+ return detections
2301
+ except Exception as e:
2302
+ logger.exception(f"Error getting rule detections: {e}")
2303
+ raise IntegrationError(f"Failed to get rule detections: {e}") from e
2304
+
2305
+ def list_rule_errors(
2306
+ self,
2307
+ rule_id: str,
2308
+ hours_back: int = 24,
2309
+ ) -> List[Dict[str, Any]]:
2310
+ """List execution errors for a specific rule."""
2311
+ try:
2312
+ query = {
2313
+ "query": {
2314
+ "bool": {
2315
+ "must": [
2316
+ {"range": {"@timestamp": {"gte": f"now-{hours_back}h"}}},
2317
+ {"match": {"message": "error"}},
2318
+ {
2319
+ "bool": {
2320
+ "should": [
2321
+ {"match": {"rule_id": rule_id}},
2322
+ {"match": {"rule.id": rule_id}},
2323
+ ]
2324
+ }
2325
+ }
2326
+ ]
2327
+ }
2328
+ },
2329
+ "size": 100
2330
+ }
2331
+
2332
+ # Search with fallback index patterns
2333
+ indices_patterns = [
2334
+ "logs-*,.siem-*",
2335
+ "_all", # Fallback to all indices if specific patterns fail
2336
+ ]
2337
+ response = self._search_with_fallback(indices_patterns, query)
2338
+
2339
+ hits = response.get("hits", {}).get("hits", [])
2340
+ errors = []
2341
+
2342
+ for hit in hits:
2343
+ source = hit.get("_source", {})
2344
+ errors.append({
2345
+ "timestamp": source.get("@timestamp", ""),
2346
+ "error_type": "execution_error",
2347
+ "error_message": source.get("message", ""),
2348
+ "severity": "high",
2349
+ })
2350
+
2351
+ return errors
2352
+ except Exception as e:
2353
+ logger.exception(f"Error listing rule errors: {e}")
2354
+ raise IntegrationError(f"Failed to list rule errors: {e}") from e
2355
+
2356
+ # Helper Methods
2357
+
2358
+ def _search_with_fallback(self, indices_patterns: List[str], query: Dict[str, Any]) -> Dict[str, Any]:
2359
+ """
2360
+ Search Elasticsearch with fallback index patterns.
2361
+
2362
+ Tries each index pattern in order until one succeeds. If all fail, raises the last error.
2363
+
2364
+ Args:
2365
+ indices_patterns: List of index patterns to try (e.g., ["logs-*", "_all"])
2366
+ query: Elasticsearch query dictionary
2367
+
2368
+ Returns:
2369
+ Elasticsearch response dictionary
2370
+
2371
+ Raises:
2372
+ IntegrationError: If all index patterns fail
2373
+ """
2374
+ response = None
2375
+ last_error = None
2376
+
2377
+ for indices in indices_patterns:
2378
+ try:
2379
+ endpoint = f"/{indices}/_search"
2380
+ response = self._http.post(endpoint, json_data=query)
2381
+ break # Success, exit loop
2382
+ except IntegrationError as e:
2383
+ last_error = e
2384
+ # If it's a 404 and we have more patterns to try, continue
2385
+ if "404" in str(e) and indices != indices_patterns[-1]:
2386
+ logger.debug(f"Index pattern '{indices}' returned 404, trying next pattern...")
2387
+ continue
2388
+ # For non-404 errors or if this is the last pattern, re-raise
2389
+ if indices == indices_patterns[-1]:
2390
+ # Last pattern failed, provide helpful error message
2391
+ logger.error(f"Failed to search Elasticsearch with all index patterns. Last error: {e}")
2392
+ raise IntegrationError(
2393
+ f"Failed to search Elasticsearch. Tried patterns: {indices_patterns}. "
2394
+ f"Last error: {e}. "
2395
+ f"This may indicate that the Elasticsearch API path is incorrect or the indices don't exist. "
2396
+ f"Check your base_url configuration."
2397
+ ) from e
2398
+ raise
2399
+
2400
+ if response is None:
2401
+ raise IntegrationError(f"All index patterns failed. Last error: {last_error}") from last_error
2402
+
2403
+ return response
2404
+
2405
+ def _extract_description_from_alert(self, source: Dict[str, Any], signal: Dict[str, Any]) -> str:
2406
+ """
2407
+ Extract description from alert document, supporting multiple formats.
2408
+
2409
+ Checks in order:
2410
+ 1. signal.rule.description (Kibana alert format)
2411
+ 2. kibana.alert.rule.parameters.description (Kibana alert format)
2412
+ 3. kibana.alert.rule.description (Kibana alert format, flat key)
2413
+ 4. rule.description (Endpoint detection format)
2414
+ """
2415
+ description = ""
2416
+
2417
+ # Check signal.rule.description (Kibana alert format)
2418
+ if isinstance(signal.get("rule"), dict):
2419
+ description = signal.get("rule", {}).get("description", "")
2420
+
2421
+ if not description:
2422
+ # Check kibana.alert.rule.parameters.description
2423
+ kibana_rule_params = source.get("kibana.alert.rule.parameters", {})
2424
+ if isinstance(kibana_rule_params, dict):
2425
+ description = kibana_rule_params.get("description", "")
2426
+
2427
+ if not description:
2428
+ # Check kibana.alert.rule.description (flat key)
2429
+ description = source.get("kibana.alert.rule.description", "")
2430
+
2431
+ if not description:
2432
+ # Check for endpoint detection format (rule.description directly on document)
2433
+ rule_obj = source.get("rule", {})
2434
+ if isinstance(rule_obj, dict):
2435
+ description = rule_obj.get("description", "")
2436
+
2437
+ return description
2438
+
2439
+ def _extract_entities_from_alert(self, source: Dict[str, Any]) -> List[str]:
2440
+ """Extract related entities from an alert source."""
2441
+ entities = []
2442
+
2443
+ if source.get("source", {}).get("ip"):
2444
+ entities.append(f"ip:{source['source']['ip']}")
2445
+ if source.get("destination", {}).get("ip"):
2446
+ entities.append(f"ip:{source['destination']['ip']}")
2447
+ if source.get("dns", {}).get("question", {}).get("name"):
2448
+ entities.append(f"domain:{source['dns']['question']['name']}")
2449
+ if source.get("file", {}).get("hash", {}).get("sha256"):
2450
+ entities.append(f"hash:{source['file']['hash']['sha256']}")
2451
+ if source.get("user", {}).get("name"):
2452
+ entities.append(f"user:{source['user']['name']}")
2453
+
2454
+ return entities
2455
+
2456
+ def _detect_entity_type(self, value: str) -> str:
2457
+ """Auto-detect entity type from value."""
2458
+ import re
2459
+
2460
+ ip_pattern = r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$'
2461
+ hash_pattern = r'^[a-fA-F0-9]{32,64}$'
2462
+ domain_pattern = r'^[a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?)*$'
2463
+
2464
+ if re.match(ip_pattern, value):
2465
+ return "ip"
2466
+ elif re.match(hash_pattern, value):
2467
+ return "hash"
2468
+ elif re.match(domain_pattern, value) and '.' in value:
2469
+ return "domain"
2470
+ else:
2471
+ return "user"
2472
+
2473
+ def _severity_to_priority(self, severity: str) -> str:
2474
+ """Convert severity to priority."""
2475
+ mapping = {
2476
+ "critical": "critical",
2477
+ "high": "high",
2478
+ "medium": "medium",
2479
+ "low": "low",
2480
+ }
2481
+ return mapping.get(severity.lower(), "medium")
2482
+
2483
+ def _severity_score_to_level(self, score: int) -> str:
2484
+ """Convert severity score (0-100) to level."""
2485
+ if score >= 75:
2486
+ return "critical"
2487
+ elif score >= 50:
2488
+ return "high"
2489
+ elif score >= 25:
2490
+ return "medium"
2491
+ else:
2492
+ return "low"
2493
+
2494
+ # New SOC1 Tools - Network, DNS, Email Events, and Alert Correlation
2495
+
2496
+ def get_network_events(
2497
+ self,
2498
+ source_ip: Optional[str] = None,
2499
+ destination_ip: Optional[str] = None,
2500
+ port: Optional[int] = None,
2501
+ protocol: Optional[str] = None,
2502
+ hours_back: int = 24,
2503
+ limit: int = 100,
2504
+ event_type: Optional[str] = None,
2505
+ ) -> Dict[str, Any]:
2506
+ """
2507
+ Retrieve network traffic events (firewall, netflow, proxy logs) with structured fields.
2508
+
2509
+ Returns network events with source/destination IPs, ports, protocols, bytes, packets, and connection duration.
2510
+ """
2511
+ try:
2512
+ # Build Elasticsearch query for network events
2513
+ must_clauses = []
2514
+
2515
+ # Time range
2516
+ must_clauses.append({"range": {"@timestamp": {"gte": f"now-{hours_back}h"}}})
2517
+
2518
+ # Network event type filter
2519
+ if event_type and event_type != "all":
2520
+ if event_type == "firewall":
2521
+ must_clauses.append({"match": {"event.category": "network"}})
2522
+ elif event_type == "netflow":
2523
+ must_clauses.append({"match": {"event.dataset": "flow"}})
2524
+ elif event_type == "proxy":
2525
+ must_clauses.append({"match": {"event.category": "web"}})
2526
+
2527
+ # IP filters
2528
+ if source_ip:
2529
+ must_clauses.append({
2530
+ "bool": {
2531
+ "should": [
2532
+ {"match": {"source.ip": source_ip}},
2533
+ {"match": {"client.ip": source_ip}},
2534
+ ]
2535
+ }
2536
+ })
2537
+
2538
+ if destination_ip:
2539
+ must_clauses.append({
2540
+ "bool": {
2541
+ "should": [
2542
+ {"match": {"destination.ip": destination_ip}},
2543
+ {"match": {"server.ip": destination_ip}},
2544
+ ]
2545
+ }
2546
+ })
2547
+
2548
+ # Port filter
2549
+ if port:
2550
+ must_clauses.append({
2551
+ "bool": {
2552
+ "should": [
2553
+ {"match": {"source.port": port}},
2554
+ {"match": {"destination.port": port}},
2555
+ {"match": {"client.port": port}},
2556
+ {"match": {"server.port": port}},
2557
+ ]
2558
+ }
2559
+ })
2560
+
2561
+ # Protocol filter
2562
+ if protocol:
2563
+ must_clauses.append({
2564
+ "bool": {
2565
+ "should": [
2566
+ {"match": {"network.protocol": protocol}},
2567
+ {"match": {"network.transport": protocol}},
2568
+ {"match": {"protocol": protocol}},
2569
+ ]
2570
+ }
2571
+ })
2572
+
2573
+ query = {
2574
+ "query": {
2575
+ "bool": {
2576
+ "must": must_clauses,
2577
+ "should": [
2578
+ {"match": {"event.category": "network"}},
2579
+ {"match": {"event.dataset": "flow"}},
2580
+ {"exists": {"field": "source.ip"}},
2581
+ {"exists": {"field": "destination.ip"}},
2582
+ ],
2583
+ "minimum_should_match": 1,
2584
+ }
2585
+ },
2586
+ "size": limit,
2587
+ "sort": [{"@timestamp": {"order": "desc"}}]
2588
+ }
2589
+
2590
+ # Search with fallback index patterns
2591
+ indices_patterns = [
2592
+ "logs-*,security-*,filebeat-*,packetbeat-*",
2593
+ "_all", # Fallback to all indices if specific patterns fail
2594
+ ]
2595
+ response = self._search_with_fallback(indices_patterns, query)
2596
+
2597
+ hits = response.get("hits", {}).get("hits", [])
2598
+ total = response.get("hits", {}).get("total", {})
2599
+ if isinstance(total, dict):
2600
+ total_count = total.get("value", len(hits))
2601
+ else:
2602
+ total_count = total
2603
+
2604
+ events = []
2605
+ for hit in hits[:limit]:
2606
+ source = hit.get("_source", {})
2607
+ timestamp_str = source.get("@timestamp") or source.get("timestamp")
2608
+ timestamp = None
2609
+ if timestamp_str:
2610
+ try:
2611
+ timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
2612
+ except Exception:
2613
+ timestamp = datetime.utcnow()
2614
+ else:
2615
+ timestamp = datetime.utcnow()
2616
+
2617
+ # Extract network fields
2618
+ network = source.get("network", {})
2619
+ source_data = source.get("source", {})
2620
+ dest_data = source.get("destination", {})
2621
+
2622
+ event = {
2623
+ "id": hit.get("_id", ""),
2624
+ "timestamp": timestamp.isoformat(),
2625
+ "source_ip": source_data.get("ip") if isinstance(source_data, dict) else source.get("source.ip"),
2626
+ "destination_ip": dest_data.get("ip") if isinstance(dest_data, dict) else source.get("destination.ip"),
2627
+ "source_port": source_data.get("port") if isinstance(source_data, dict) else source.get("source.port"),
2628
+ "destination_port": dest_data.get("port") if isinstance(dest_data, dict) else source.get("destination.port"),
2629
+ "protocol": network.get("protocol") or network.get("transport") or source.get("protocol"),
2630
+ "bytes_sent": network.get("bytes") or source.get("bytes"),
2631
+ "bytes_received": network.get("bytes") or source.get("bytes"), # May need adjustment based on direction
2632
+ "packets_sent": network.get("packets") or source.get("packets"),
2633
+ "packets_received": network.get("packets") or source.get("packets"), # May need adjustment
2634
+ "connection_duration": source.get("duration") or source.get("connection_duration"),
2635
+ "action": source.get("event", {}).get("action") or source.get("action"),
2636
+ "event_type": "firewall" if "firewall" in str(source).lower() else ("netflow" if "flow" in str(source).lower() else "proxy"),
2637
+ "hostname": source.get("host", {}).get("name") if isinstance(source.get("host"), dict) else source.get("host"),
2638
+ "domain": dest_data.get("domain") if isinstance(dest_data, dict) else source.get("destination.domain"),
2639
+ }
2640
+ events.append(event)
2641
+
2642
+ return {
2643
+ "total_count": total_count,
2644
+ "events": events,
2645
+ }
2646
+ except Exception as e:
2647
+ logger.exception(f"Error getting network events: {e}")
2648
+ raise IntegrationError(f"Failed to get network events: {e}") from e
2649
+
2650
+ def get_dns_events(
2651
+ self,
2652
+ domain: Optional[str] = None,
2653
+ ip_address: Optional[str] = None,
2654
+ resolved_ip: Optional[str] = None,
2655
+ query_type: Optional[str] = None,
2656
+ hours_back: int = 24,
2657
+ limit: int = 100,
2658
+ ) -> Dict[str, Any]:
2659
+ """
2660
+ Retrieve DNS query and response events with structured fields.
2661
+
2662
+ Returns DNS events with domain, query type, resolved IP, source IP, and response codes.
2663
+ """
2664
+ try:
2665
+ # Build Elasticsearch query for DNS events
2666
+ must_clauses = [
2667
+ {"range": {"@timestamp": {"gte": f"now-{hours_back}h"}}},
2668
+ {"match": {"event.category": "network"}},
2669
+ ]
2670
+
2671
+ # DNS-specific filters
2672
+ dns_should = [
2673
+ {"exists": {"field": "dns.question.name"}},
2674
+ {"exists": {"field": "dns.question.type"}},
2675
+ ]
2676
+
2677
+ if domain:
2678
+ # Support both exact matches and subdomain matches
2679
+ # e.g., "example.com" should match "example.com", "www.example.com", "mail.example.com", etc.
2680
+ domain_normalized = domain.lower().strip()
2681
+ # Remove leading dot if present
2682
+ if domain_normalized.startswith('.'):
2683
+ domain_normalized = domain_normalized[1:]
2684
+
2685
+ # Escape dots for regex
2686
+ domain_escaped = domain_normalized.replace('.', r'\.')
2687
+
2688
+ must_clauses.append({
2689
+ "bool": {
2690
+ "should": [
2691
+ {"match_phrase": {"dns.question.name": domain_normalized}}, # Exact phrase match
2692
+ {"wildcard": {"dns.question.name": f"*{domain_normalized}"}}, # Subdomain match (e.g., *.example.com or www.example.com)
2693
+ {"regexp": {"dns.question.name": f".*{domain_escaped}"}}, # Regex match for flexibility
2694
+ ],
2695
+ "minimum_should_match": 1,
2696
+ }
2697
+ })
2698
+
2699
+ if ip_address:
2700
+ must_clauses.append({
2701
+ "bool": {
2702
+ "should": [
2703
+ {"match": {"source.ip": ip_address}},
2704
+ {"match": {"client.ip": ip_address}},
2705
+ ]
2706
+ }
2707
+ })
2708
+
2709
+ if resolved_ip:
2710
+ must_clauses.append({
2711
+ "bool": {
2712
+ "should": [
2713
+ {"match": {"dns.answers.data": resolved_ip}},
2714
+ {"match": {"dns.response_code": "NOERROR"}},
2715
+ ]
2716
+ }
2717
+ })
2718
+
2719
+ if query_type:
2720
+ must_clauses.append({"match": {"dns.question.type": query_type}})
2721
+
2722
+ query = {
2723
+ "query": {
2724
+ "bool": {
2725
+ "must": must_clauses,
2726
+ "should": dns_should,
2727
+ "minimum_should_match": 1,
2728
+ }
2729
+ },
2730
+ "size": limit,
2731
+ "sort": [{"@timestamp": {"order": "desc"}}]
2732
+ }
2733
+
2734
+ # Search with fallback index patterns
2735
+ indices_patterns = [
2736
+ "logs-*,security-*,filebeat-*,packetbeat-*",
2737
+ "_all", # Fallback to all indices if specific patterns fail
2738
+ ]
2739
+ response = self._search_with_fallback(indices_patterns, query)
2740
+
2741
+ hits = response.get("hits", {}).get("hits", [])
2742
+ total = response.get("hits", {}).get("total", {})
2743
+ if isinstance(total, dict):
2744
+ total_count = total.get("value", len(hits))
2745
+ else:
2746
+ total_count = total
2747
+
2748
+ events = []
2749
+ for hit in hits[:limit]:
2750
+ source = hit.get("_source", {})
2751
+ timestamp_str = source.get("@timestamp") or source.get("timestamp")
2752
+ timestamp = None
2753
+ if timestamp_str:
2754
+ try:
2755
+ timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
2756
+ except Exception:
2757
+ timestamp = datetime.utcnow()
2758
+ else:
2759
+ timestamp = datetime.utcnow()
2760
+
2761
+ # Extract DNS fields
2762
+ dns = source.get("dns", {})
2763
+ question = dns.get("question", {}) if isinstance(dns, dict) else {}
2764
+ answers = dns.get("answers", []) if isinstance(dns, dict) else []
2765
+ source_data = source.get("source", {})
2766
+
2767
+ # Get first resolved IP from answers
2768
+ resolved_ip_value = None
2769
+ if answers and isinstance(answers, list) and len(answers) > 0:
2770
+ first_answer = answers[0] if isinstance(answers[0], dict) else {}
2771
+ resolved_ip_value = first_answer.get("data") or first_answer.get("address")
2772
+
2773
+ event = {
2774
+ "id": hit.get("_id", ""),
2775
+ "timestamp": timestamp.isoformat(),
2776
+ "domain": question.get("name") if isinstance(question, dict) else dns.get("question.name"),
2777
+ "query_type": question.get("type") if isinstance(question, dict) else dns.get("question.type"),
2778
+ "resolved_ip": resolved_ip_value,
2779
+ "source_ip": source_data.get("ip") if isinstance(source_data, dict) else source.get("source.ip"),
2780
+ "hostname": source.get("host", {}).get("name") if isinstance(source.get("host"), dict) else source.get("host"),
2781
+ "response_code": dns.get("response_code") if isinstance(dns, dict) else source.get("dns.response_code"),
2782
+ "response_time": dns.get("response_time") if isinstance(dns, dict) else source.get("dns.response_time"),
2783
+ "record_count": len(answers) if isinstance(answers, list) else 0,
2784
+ }
2785
+ events.append(event)
2786
+
2787
+ return {
2788
+ "total_count": total_count,
2789
+ "events": events,
2790
+ }
2791
+ except Exception as e:
2792
+ logger.exception(f"Error getting DNS events: {e}")
2793
+ raise IntegrationError(f"Failed to get DNS events: {e}") from e
2794
+
2795
+ def get_alerts_by_entity(
2796
+ self,
2797
+ entity_value: str,
2798
+ entity_type: Optional[str] = None,
2799
+ hours_back: int = 24,
2800
+ limit: int = 50,
2801
+ severity: Optional[str] = None,
2802
+ ) -> Dict[str, Any]:
2803
+ """
2804
+ Retrieve alerts filtered by specific entity (IP, user, host, domain, hash) for correlation analysis.
2805
+ """
2806
+ try:
2807
+ # Auto-detect entity type if not provided
2808
+ if not entity_type:
2809
+ entity_type = self._detect_entity_type(entity_value)
2810
+
2811
+ # Build query to find alerts containing this entity
2812
+ must_clauses = [
2813
+ {"range": {"@timestamp": {"gte": f"now-{hours_back}h"}}}
2814
+ ]
2815
+
2816
+ # Entity-specific search
2817
+ if entity_type == "ip":
2818
+ must_clauses.append({
2819
+ "bool": {
2820
+ "should": [
2821
+ {"match": {"source.ip": entity_value}},
2822
+ {"match": {"destination.ip": entity_value}},
2823
+ {"match": {"client.ip": entity_value}},
2824
+ {"match": {"server.ip": entity_value}},
2825
+ ]
2826
+ }
2827
+ })
2828
+ elif entity_type == "user":
2829
+ must_clauses.append({
2830
+ "bool": {
2831
+ "should": [
2832
+ {"match": {"user.name": entity_value}},
2833
+ {"match": {"user": entity_value}},
2834
+ {"match": {"username": entity_value}},
2835
+ ]
2836
+ }
2837
+ })
2838
+ elif entity_type == "domain":
2839
+ must_clauses.append({
2840
+ "bool": {
2841
+ "should": [
2842
+ {"match": {"dns.question.name": entity_value}},
2843
+ {"match": {"url.domain": entity_value}},
2844
+ {"match": {"domain": entity_value}},
2845
+ ]
2846
+ }
2847
+ })
2848
+ elif entity_type == "hash":
2849
+ must_clauses.append({
2850
+ "bool": {
2851
+ "should": [
2852
+ {"match": {"file.hash.sha256": entity_value}},
2853
+ {"match": {"file.hash.sha1": entity_value}},
2854
+ {"match": {"file.hash.md5": entity_value}},
2855
+ {"match": {"hash": entity_value}},
2856
+ ]
2857
+ }
2858
+ })
2859
+ elif entity_type == "host":
2860
+ must_clauses.append({
2861
+ "bool": {
2862
+ "should": [
2863
+ {"match": {"host.name": entity_value}},
2864
+ {"match": {"hostname": entity_value}},
2865
+ {"match": {"host": entity_value}},
2866
+ ]
2867
+ }
2868
+ })
2869
+
2870
+ # Severity filter
2871
+ if severity:
2872
+ must_clauses.append({"match": {"signal.severity": severity}})
2873
+
2874
+ query = {
2875
+ "query": {
2876
+ "bool": {
2877
+ "must": must_clauses,
2878
+ }
2879
+ },
2880
+ "size": limit,
2881
+ "sort": [{"@timestamp": {"order": "desc"}}]
2882
+ }
2883
+
2884
+ # Search with fallback index patterns
2885
+ indices_patterns = [
2886
+ "alerts-*,.siem-signals-*,logs-endpoint.alerts-*",
2887
+ "alerts-*",
2888
+ "_all", # Fallback to all indices if specific patterns fail
2889
+ ]
2890
+ response = self._search_with_fallback(indices_patterns, query)
2891
+
2892
+ hits = response.get("hits", {}).get("hits", [])
2893
+ total = response.get("hits", {}).get("total", {})
2894
+ if isinstance(total, dict):
2895
+ total_count = total.get("value", len(hits))
2896
+ else:
2897
+ total_count = total
2898
+
2899
+ alerts = []
2900
+ for hit in hits:
2901
+ source = hit.get("_source", {})
2902
+ signal = source.get("signal", {})
2903
+ rule = signal.get("rule", {}) if isinstance(signal.get("rule"), dict) else {}
2904
+
2905
+ alert = {
2906
+ "id": hit.get("_id", ""),
2907
+ "title": rule.get("name", "") or source.get("event", {}).get("reason", ""),
2908
+ "severity": signal.get("severity", "medium"),
2909
+ "status": signal.get("status", "open"),
2910
+ "created_at": source.get("@timestamp", ""),
2911
+ "alert_type": rule.get("category", "") or source.get("event", {}).get("category", ""),
2912
+ "description": rule.get("description", "") or source.get("event", {}).get("reason", ""),
2913
+ "related_entities": self._extract_entities_from_alert(source),
2914
+ "source": "elastic",
2915
+ }
2916
+ alerts.append(alert)
2917
+
2918
+ return {
2919
+ "entity_value": entity_value,
2920
+ "entity_type": entity_type,
2921
+ "total_count": total_count,
2922
+ "returned_count": len(alerts),
2923
+ "alerts": alerts,
2924
+ }
2925
+ except Exception as e:
2926
+ logger.exception(f"Error getting alerts by entity: {e}")
2927
+ raise IntegrationError(f"Failed to get alerts by entity: {e}") from e
2928
+
2929
+ def get_all_uncertain_alerts_for_host(
2930
+ self,
2931
+ hostname: str,
2932
+ hours_back: int = 7 * 24, # Default 7 days
2933
+ limit: int = 100,
2934
+ ) -> Dict[str, Any]:
2935
+ """
2936
+ Retrieve all alerts with verdict="uncertain" for a specific host.
2937
+
2938
+ This is useful for pattern analysis when investigating uncertain alerts
2939
+ to determine if multiple uncertain alerts on the same host indicate a broader issue.
2940
+
2941
+ Args:
2942
+ hostname: The hostname to search for
2943
+ hours_back: How many hours to look back (default: 7 days = 168 hours)
2944
+ limit: Maximum number of alerts to return (default: 100)
2945
+
2946
+ Returns:
2947
+ Dictionary containing uncertain alerts for the host
2948
+ """
2949
+ try:
2950
+ must_clauses = [
2951
+ {"range": {"@timestamp": {"gte": f"now-{hours_back}h"}}},
2952
+ # Match hostname in various fields
2953
+ {
2954
+ "bool": {
2955
+ "should": [
2956
+ {"match": {"host.name": hostname}},
2957
+ {"match": {"hostname": hostname}},
2958
+ {"match": {"host": hostname}},
2959
+ {"match": {"host.hostname": hostname}},
2960
+ ]
2961
+ }
2962
+ },
2963
+ # Match verdict="uncertain"
2964
+ {
2965
+ "bool": {
2966
+ "should": [
2967
+ {"term": {"signal.ai.verdict": "uncertain"}},
2968
+ {"term": {"verdict": "uncertain"}},
2969
+ ]
2970
+ }
2971
+ }
2972
+ ]
2973
+
2974
+ query = {
2975
+ "query": {
2976
+ "bool": {
2977
+ "must": must_clauses,
2978
+ }
2979
+ },
2980
+ "size": limit,
2981
+ "sort": [{"@timestamp": {"order": "desc"}}]
2982
+ }
2983
+
2984
+ # Search with fallback index patterns
2985
+ indices_patterns = [
2986
+ "alerts-*,.siem-signals-*,logs-endpoint.alerts-*",
2987
+ "alerts-*",
2988
+ "_all", # Fallback to all indices if specific patterns fail
2989
+ ]
2990
+ response = self._search_with_fallback(indices_patterns, query)
2991
+
2992
+ hits = response.get("hits", {}).get("hits", [])
2993
+ total = response.get("hits", {}).get("total", {})
2994
+ if isinstance(total, dict):
2995
+ total_count = total.get("value", len(hits))
2996
+ else:
2997
+ total_count = total
2998
+
2999
+ alerts = []
3000
+ for hit in hits:
3001
+ source = hit.get("_source", {})
3002
+ signal = source.get("signal", {})
3003
+ rule = signal.get("rule", {}) if isinstance(signal.get("rule"), dict) else {}
3004
+
3005
+ # Extract verdict
3006
+ verdict = signal.get("ai", {}).get("verdict") or source.get("verdict")
3007
+
3008
+ alert = {
3009
+ "id": hit.get("_id", ""),
3010
+ "title": rule.get("name", "") or source.get("event", {}).get("reason", ""),
3011
+ "severity": signal.get("severity", "medium"),
3012
+ "status": signal.get("status", "open"),
3013
+ "created_at": source.get("@timestamp", ""),
3014
+ "alert_type": rule.get("category", "") or source.get("event", {}).get("category", ""),
3015
+ "description": rule.get("description", "") or source.get("event", {}).get("reason", ""),
3016
+ "verdict": verdict,
3017
+ "hostname": hostname,
3018
+ "related_entities": self._extract_entities_from_alert(source),
3019
+ "source": "elastic",
3020
+ }
3021
+ alerts.append(alert)
3022
+
3023
+ return {
3024
+ "hostname": hostname,
3025
+ "hours_back": hours_back,
3026
+ "total_count": total_count,
3027
+ "returned_count": len(alerts),
3028
+ "alerts": alerts,
3029
+ }
3030
+ except Exception as e:
3031
+ logger.exception(f"Error getting uncertain alerts for host: {e}")
3032
+ raise IntegrationError(f"Failed to get uncertain alerts for host: {e}") from e
3033
+
3034
+ def get_alerts_by_time_window(
3035
+ self,
3036
+ start_time: str,
3037
+ end_time: str,
3038
+ limit: int = 100,
3039
+ severity: Optional[str] = None,
3040
+ alert_type: Optional[str] = None,
3041
+ ) -> Dict[str, Any]:
3042
+ """
3043
+ Retrieve alerts within a specific time window for temporal correlation.
3044
+ """
3045
+ try:
3046
+ # Parse ISO timestamps
3047
+ try:
3048
+ start_dt = datetime.fromisoformat(start_time.replace("Z", "+00:00"))
3049
+ end_dt = datetime.fromisoformat(end_time.replace("Z", "+00:00"))
3050
+ except Exception as e:
3051
+ raise IntegrationError(f"Invalid time format: {e}")
3052
+
3053
+ must_clauses = [
3054
+ {"range": {"@timestamp": {"gte": start_time, "lte": end_time}}}
3055
+ ]
3056
+
3057
+ if severity:
3058
+ must_clauses.append({"match": {"signal.severity": severity}})
3059
+
3060
+ if alert_type:
3061
+ must_clauses.append({
3062
+ "bool": {
3063
+ "should": [
3064
+ {"match": {"signal.rule.category": alert_type}},
3065
+ {"match": {"event.category": alert_type}},
3066
+ ]
3067
+ }
3068
+ })
3069
+
3070
+ query = {
3071
+ "query": {
3072
+ "bool": {
3073
+ "must": must_clauses,
3074
+ }
3075
+ },
3076
+ "size": limit,
3077
+ "sort": [{"@timestamp": {"order": "desc"}}]
3078
+ }
3079
+
3080
+ # Search with fallback index patterns
3081
+ indices_patterns = [
3082
+ "alerts-*,.siem-signals-*,logs-endpoint.alerts-*",
3083
+ "alerts-*",
3084
+ "_all", # Fallback to all indices if specific patterns fail
3085
+ ]
3086
+ response = self._search_with_fallback(indices_patterns, query)
3087
+
3088
+ hits = response.get("hits", {}).get("hits", [])
3089
+ total = response.get("hits", {}).get("total", {})
3090
+ if isinstance(total, dict):
3091
+ total_count = total.get("value", len(hits))
3092
+ else:
3093
+ total_count = total
3094
+
3095
+ alerts = []
3096
+ for hit in hits:
3097
+ source = hit.get("_source", {})
3098
+ signal = source.get("signal", {})
3099
+ rule = signal.get("rule", {}) if isinstance(signal.get("rule"), dict) else {}
3100
+
3101
+ alert = {
3102
+ "id": hit.get("_id", ""),
3103
+ "title": rule.get("name", "") or source.get("event", {}).get("reason", ""),
3104
+ "severity": signal.get("severity", "medium"),
3105
+ "status": signal.get("status", "open"),
3106
+ "created_at": source.get("@timestamp", ""),
3107
+ "alert_type": rule.get("category", "") or source.get("event", {}).get("category", ""),
3108
+ "description": rule.get("description", "") or source.get("event", {}).get("reason", ""),
3109
+ "related_entities": self._extract_entities_from_alert(source),
3110
+ "source": "elastic",
3111
+ }
3112
+ alerts.append(alert)
3113
+
3114
+ return {
3115
+ "total_count": total_count,
3116
+ "returned_count": len(alerts),
3117
+ "alerts": alerts,
3118
+ }
3119
+ except Exception as e:
3120
+ logger.exception(f"Error getting alerts by time window: {e}")
3121
+ raise IntegrationError(f"Failed to get alerts by time window: {e}") from e
3122
+
3123
+ def get_email_events(
3124
+ self,
3125
+ sender_email: Optional[str] = None,
3126
+ recipient_email: Optional[str] = None,
3127
+ subject: Optional[str] = None,
3128
+ email_id: Optional[str] = None,
3129
+ hours_back: int = 24,
3130
+ limit: int = 100,
3131
+ event_type: Optional[str] = None,
3132
+ ) -> Dict[str, Any]:
3133
+ """
3134
+ Retrieve email security events with structured fields for phishing analysis.
3135
+
3136
+ Returns email events with sender, recipient, subject, headers, authentication, URLs, and attachments.
3137
+ """
3138
+ try:
3139
+ # Build Elasticsearch query for email events
3140
+ must_clauses = [
3141
+ {"range": {"@timestamp": {"gte": f"now-{hours_back}h"}}},
3142
+ ]
3143
+
3144
+ # Email event type filter
3145
+ email_should = [
3146
+ {"match": {"event.category": "email"}},
3147
+ {"match": {"event.dataset": "email"}},
3148
+ {"exists": {"field": "email.from.address"}},
3149
+ {"exists": {"field": "email.to.address"}},
3150
+ ]
3151
+
3152
+ if sender_email:
3153
+ must_clauses.append({
3154
+ "bool": {
3155
+ "should": [
3156
+ {"match": {"email.from.address": sender_email}},
3157
+ {"match": {"email.sender.address": sender_email}},
3158
+ {"match": {"sender.email": sender_email}},
3159
+ ]
3160
+ }
3161
+ })
3162
+
3163
+ if recipient_email:
3164
+ must_clauses.append({
3165
+ "bool": {
3166
+ "should": [
3167
+ {"match": {"email.to.address": recipient_email}},
3168
+ {"match": {"email.recipient.address": recipient_email}},
3169
+ {"match": {"recipient.email": recipient_email}},
3170
+ ]
3171
+ }
3172
+ })
3173
+
3174
+ if subject:
3175
+ must_clauses.append({
3176
+ "bool": {
3177
+ "should": [
3178
+ {"match": {"email.subject": subject}},
3179
+ {"wildcard": {"email.subject": f"*{subject}*"}},
3180
+ ]
3181
+ }
3182
+ })
3183
+
3184
+ if email_id:
3185
+ must_clauses.append({
3186
+ "bool": {
3187
+ "should": [
3188
+ {"match": {"email.message_id": email_id}},
3189
+ {"match": {"message_id": email_id}},
3190
+ ]
3191
+ }
3192
+ })
3193
+
3194
+ if event_type and event_type != "all":
3195
+ if event_type == "delivered":
3196
+ must_clauses.append({"match": {"event.action": "delivered"}})
3197
+ elif event_type == "blocked":
3198
+ must_clauses.append({"match": {"event.action": "blocked"}})
3199
+ elif event_type == "quarantined":
3200
+ must_clauses.append({"match": {"event.action": "quarantined"}})
3201
+
3202
+ query = {
3203
+ "query": {
3204
+ "bool": {
3205
+ "must": must_clauses,
3206
+ "should": email_should,
3207
+ "minimum_should_match": 1,
3208
+ }
3209
+ },
3210
+ "size": limit,
3211
+ "sort": [{"@timestamp": {"order": "desc"}}]
3212
+ }
3213
+
3214
+ # Search with fallback index patterns
3215
+ indices_patterns = [
3216
+ "logs-*,security-*,filebeat-*",
3217
+ "_all", # Fallback to all indices if specific patterns fail
3218
+ ]
3219
+ response = self._search_with_fallback(indices_patterns, query)
3220
+
3221
+ hits = response.get("hits", {}).get("hits", [])
3222
+ total = response.get("hits", {}).get("total", {})
3223
+ if isinstance(total, dict):
3224
+ total_count = total.get("value", len(hits))
3225
+ else:
3226
+ total_count = total
3227
+
3228
+ events = []
3229
+ for hit in hits[:limit]:
3230
+ source = hit.get("_source", {})
3231
+ timestamp_str = source.get("@timestamp") or source.get("timestamp")
3232
+ timestamp = None
3233
+ if timestamp_str:
3234
+ try:
3235
+ timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
3236
+ except Exception:
3237
+ timestamp = datetime.utcnow()
3238
+ else:
3239
+ timestamp = datetime.utcnow()
3240
+
3241
+ # Extract email fields
3242
+ email = source.get("email", {})
3243
+ from_addr = email.get("from", {}) if isinstance(email.get("from"), dict) else {}
3244
+ to_addr = email.get("to", {}) if isinstance(email.get("to"), dict) else {}
3245
+
3246
+ # Extract headers
3247
+ headers = {}
3248
+ if isinstance(email.get("headers"), dict):
3249
+ headers = email.get("headers", {})
3250
+
3251
+ # Extract authentication
3252
+ auth = {}
3253
+ if isinstance(email.get("authentication"), dict):
3254
+ auth = email.get("authentication", {})
3255
+
3256
+ # Extract URLs (from email body or headers)
3257
+ urls = []
3258
+ if isinstance(email.get("urls"), list):
3259
+ urls = email.get("urls", [])
3260
+ elif source.get("urls"):
3261
+ urls = source.get("urls", []) if isinstance(source.get("urls"), list) else []
3262
+
3263
+ # Extract attachments
3264
+ attachments = []
3265
+ if isinstance(email.get("attachments"), list):
3266
+ attachments = email.get("attachments", [])
3267
+ elif source.get("attachments"):
3268
+ attachments = source.get("attachments", []) if isinstance(source.get("attachments"), list) else []
3269
+
3270
+ event = {
3271
+ "id": hit.get("_id", ""),
3272
+ "timestamp": timestamp.isoformat(),
3273
+ "sender_email": from_addr.get("address") if isinstance(from_addr, dict) else email.get("from.address") or email.get("sender.email"),
3274
+ "sender_domain": from_addr.get("address", "").split("@")[-1] if isinstance(from_addr, dict) and from_addr.get("address") else "",
3275
+ "recipient_email": to_addr.get("address") if isinstance(to_addr, dict) else email.get("to.address") or email.get("recipient.email"),
3276
+ "subject": email.get("subject") or source.get("email.subject"),
3277
+ "message_id": email.get("message_id") or headers.get("Message-ID") or source.get("message_id"),
3278
+ "headers": {
3279
+ "from": headers.get("From") or from_addr.get("address") if isinstance(from_addr, dict) else None,
3280
+ "reply_to": headers.get("Reply-To") or email.get("reply_to"),
3281
+ "return_path": headers.get("Return-Path") or email.get("return_path"),
3282
+ "received": headers.get("Received") if isinstance(headers.get("Received"), list) else [headers.get("Received")] if headers.get("Received") else [],
3283
+ },
3284
+ "authentication": {
3285
+ "spf_status": auth.get("spf") or email.get("spf.status"),
3286
+ "dkim_status": auth.get("dkim") or email.get("dkim.status"),
3287
+ "dmarc_status": auth.get("dmarc") or email.get("dmarc.status"),
3288
+ },
3289
+ "urls": [
3290
+ {
3291
+ "url": url.get("url") if isinstance(url, dict) else url,
3292
+ "domain": url.get("domain") if isinstance(url, dict) else None,
3293
+ "text": url.get("text") if isinstance(url, dict) else None,
3294
+ }
3295
+ for url in urls[:20] # Limit to top 20 URLs
3296
+ ],
3297
+ "attachments": [
3298
+ {
3299
+ "filename": att.get("filename") if isinstance(att, dict) else att,
3300
+ "file_hash": att.get("hash") or att.get("sha256") if isinstance(att, dict) else None,
3301
+ "file_type": att.get("type") or att.get("mime_type") if isinstance(att, dict) else None,
3302
+ "file_size": att.get("size") if isinstance(att, dict) else None,
3303
+ }
3304
+ for att in attachments[:20] # Limit to top 20 attachments
3305
+ ],
3306
+ "event_type": source.get("event", {}).get("action") or email.get("action") or "delivered",
3307
+ "threat_score": email.get("threat_score") or source.get("threat_score"),
3308
+ }
3309
+ events.append(event)
3310
+
3311
+ return {
3312
+ "total_count": total_count,
3313
+ "returned_count": len(events),
3314
+ "events": events,
3315
+ }
3316
+ except Exception as e:
3317
+ logger.exception(f"Error getting email events: {e}")
3318
+ raise IntegrationError(f"Failed to get email events: {e}") from e
3319
+