cyvest 4.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cyvest might be problematic. Click here for more details.

@@ -0,0 +1,459 @@
1
+ """
2
+ Serialization and deserialization for Cyvest investigations.
3
+
4
+ Provides JSON export/import and Markdown generation for LLM consumption.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from decimal import Decimal
11
+ from pathlib import Path
12
+ from typing import TYPE_CHECKING, Any
13
+
14
+ from cyvest.levels import Level, normalize_level
15
+ from cyvest.model import AuditEvent, Check, Container, Enrichment, Observable, Relationship, ThreatIntel
16
+ from cyvest.model_enums import ObservableType
17
+ from cyvest.model_schema import InvestigationSchema
18
+ from cyvest.score import ScoreMode
19
+
20
+ if TYPE_CHECKING:
21
+ from cyvest.cyvest import Cyvest
22
+ from cyvest.investigation import Investigation
23
+
24
+
25
+ def serialize_investigation(inv: Investigation, *, include_audit_log: bool = True) -> InvestigationSchema:
26
+ """
27
+ Serialize a complete investigation to an InvestigationSchema.
28
+
29
+ Uses InvestigationSchema for validation and automatic serialization via
30
+ Pydantic's field_serializer decorators.
31
+
32
+ Args:
33
+ inv: Investigation to serialize
34
+ include_audit_log: Include audit log in serialization (default: True).
35
+ When False, audit_log is set to None for compact, deterministic output.
36
+
37
+ Returns:
38
+ InvestigationSchema instance (use .model_dump() for dict)
39
+ """
40
+ inv._refresh_check_links()
41
+ observables = dict(inv.get_all_observables())
42
+ threat_intels = dict(inv.get_all_threat_intels())
43
+ enrichments = dict(inv.get_all_enrichments())
44
+ containers = dict(inv.get_all_containers())
45
+
46
+ # Build checks organized by scope (resolve proxies)
47
+ checks_by_scope: dict[str, list[Check]] = {}
48
+ for check in inv.get_all_checks().values():
49
+ if check.scope not in checks_by_scope:
50
+ checks_by_scope[check.scope] = []
51
+ checks_by_scope[check.scope].append(check)
52
+
53
+ # Get root type
54
+ root = inv.get_root()
55
+ root_type_value = root.obs_type.value
56
+
57
+ # Build and validate using Pydantic model
58
+ investigation = InvestigationSchema(
59
+ investigation_id=inv.investigation_id,
60
+ investigation_name=inv.investigation_name,
61
+ score=inv.get_global_score(),
62
+ level=inv.get_global_level(),
63
+ whitelisted=inv.is_whitelisted(),
64
+ whitelists=list(inv.get_whitelists()),
65
+ audit_log=inv.get_audit_log() if include_audit_log else None,
66
+ observables=observables,
67
+ checks=checks_by_scope,
68
+ threat_intels=threat_intels,
69
+ enrichments=enrichments,
70
+ containers=containers,
71
+ stats=inv.get_statistics(),
72
+ data_extraction={
73
+ "root_type": root_type_value,
74
+ "score_mode_obs": inv._score_engine._score_mode_obs.value,
75
+ },
76
+ )
77
+
78
+ return investigation
79
+
80
+
81
+ def save_investigation_json(inv: Investigation, filepath: str | Path, *, include_audit_log: bool = True) -> None:
82
+ """
83
+ Save an investigation to a JSON file.
84
+
85
+ Args:
86
+ inv: Investigation to save
87
+ filepath: Path to save the JSON file
88
+ include_audit_log: Include audit log in output (default: True).
89
+ When False, audit_log is set to null for compact, deterministic output.
90
+ """
91
+ data = serialize_investigation(inv, include_audit_log=include_audit_log)
92
+ with open(filepath, "w", encoding="utf-8") as f:
93
+ f.write(data.model_dump_json(indent=2, by_alias=True))
94
+
95
+
96
+ def generate_markdown_report(
97
+ inv: Investigation,
98
+ include_containers: bool = False,
99
+ include_enrichments: bool = False,
100
+ include_observables: bool = True,
101
+ ) -> str:
102
+ """
103
+ Generate a Markdown report of the investigation for LLM consumption.
104
+
105
+ Args:
106
+ inv: Investigation
107
+ include_containers: Include containers section in the report (default: False)
108
+ include_enrichments: Include enrichments section in the report (default: False)
109
+ include_observables: Include observables section in the report (default: True)
110
+
111
+ Returns:
112
+ Markdown formatted report
113
+ """
114
+ lines = []
115
+
116
+ # Header
117
+ lines.append("# Cybersecurity Investigation Report")
118
+ lines.append("")
119
+ if getattr(inv, "investigation_name", None):
120
+ lines.append(f"**Investigation Name:** {inv.investigation_name}")
121
+ lines.append(f"**Global Score:** {inv.get_global_score():.2f}")
122
+ lines.append(f"**Global Level:** {inv.get_global_level().name}")
123
+ whitelists = inv.get_whitelists()
124
+ whitelist_status = "Yes" if whitelists else "No"
125
+ lines.append(f"**Whitelisted Investigation:** {whitelist_status}")
126
+ if whitelists:
127
+ lines.append(f"**Whitelist Entries:** {len(whitelists)}")
128
+ lines.append("")
129
+
130
+ # Statistics
131
+ lines.append("## Statistics")
132
+ lines.append("")
133
+ stats = inv.get_statistics()
134
+ lines.append(f"- **Total Observables:** {stats.total_observables}")
135
+ lines.append(f"- **Internal Observables:** {stats.internal_observables}")
136
+ lines.append(f"- **External Observables:** {stats.external_observables}")
137
+ lines.append(f"- **Whitelisted Observables:** {stats.whitelisted_observables}")
138
+ lines.append(f"- **Total Checks:** {stats.total_checks}")
139
+ lines.append(f"- **Applied Checks:** {stats.applied_checks}")
140
+ lines.append(f"- **Total Threat Intel:** {stats.total_threat_intel}")
141
+ lines.append("")
142
+
143
+ # Whitelists
144
+ if whitelists:
145
+ lines.append("## Whitelists")
146
+ lines.append("")
147
+ for entry in whitelists:
148
+ lines.append(f"- **{entry.identifier}** - {entry.name}")
149
+ if entry.justification:
150
+ lines.append(f" - Justification: {entry.justification}")
151
+ lines.append("")
152
+
153
+ # Checks by Scope
154
+ lines.append("## Checks by Scope")
155
+ lines.append("")
156
+ for scope, _count in inv.get_statistics().checks_by_scope.items():
157
+ lines.append(f"### {scope}")
158
+ lines.append("")
159
+ for check in inv.get_all_checks().values():
160
+ if check.scope == scope and check.level != Level.NONE:
161
+ lines.append(f"- **{check.check_id}**: Score: {check.score_display}, Level: {check.level.name}")
162
+ lines.append(f" - Description: {check.description}")
163
+ if check.comment:
164
+ lines.append(f" - Comment: {check.comment}")
165
+ lines.append("")
166
+
167
+ # Observables
168
+ if include_observables and inv.get_all_observables():
169
+ lines.append("## Observables")
170
+ lines.append("")
171
+ for obs in inv.get_all_observables().values():
172
+ lines.append(f"### {obs.obs_type}: {obs.value}")
173
+ lines.append(f"- **Key:** {obs.key}")
174
+ lines.append(f"- **Score:** {obs.score_display}")
175
+ lines.append(f"- **Level:** {obs.level.name}")
176
+ lines.append(f"- **Internal:** {obs.internal}")
177
+ lines.append(f"- **Whitelisted:** {obs.whitelisted}")
178
+ if obs.comment:
179
+ lines.append(f"- **Comment:** {obs.comment}")
180
+ if obs.relationships:
181
+ lines.append("- **Relationships:**")
182
+ for rel in obs.relationships:
183
+ direction_symbol = {
184
+ "outbound": "→",
185
+ "inbound": "←",
186
+ "bidirectional": "↔",
187
+ }.get(rel.direction if isinstance(rel.direction, str) else rel.direction.value, "→")
188
+ lines.append(f" - {rel.relationship_type} {direction_symbol} {rel.target_key}")
189
+ if obs.threat_intels:
190
+ lines.append("- **Threat Intelligence:**")
191
+ for ti in obs.threat_intels:
192
+ lines.append(f" - {ti.source}: Score {ti.score_display}, Level {ti.level.name}")
193
+ if ti.comment:
194
+ lines.append(f" - {ti.comment}")
195
+ lines.append("")
196
+
197
+ # Enrichments
198
+ if include_enrichments and inv.get_all_enrichments():
199
+ lines.append("## Enrichments")
200
+ lines.append("")
201
+ for enr in inv.get_all_enrichments().values():
202
+ lines.append(f"### {enr.name}")
203
+ if enr.context:
204
+ lines.append(f"- **Context:** {enr.context}")
205
+ lines.append(f"- **Data:** {json.dumps(enr.data, indent=2)}")
206
+ lines.append("")
207
+
208
+ # Containers
209
+ if include_containers and inv.get_all_containers():
210
+ lines.append("## Containers")
211
+ lines.append("")
212
+ for ctr in inv.get_all_containers().values():
213
+ lines.append(f"### {ctr.path}")
214
+ lines.append(f"- **Description:** {ctr.description}")
215
+ lines.append(f"- **Aggregated Score:** {ctr.get_aggregated_score():.2f}")
216
+ lines.append(f"- **Aggregated Level:** {ctr.get_aggregated_level().name}")
217
+ lines.append(f"- **Checks:** {len(ctr.checks)}")
218
+ lines.append(f"- **Sub-containers:** {len(ctr.sub_containers)}")
219
+ lines.append("")
220
+
221
+ return "\n".join(lines)
222
+
223
+
224
+ def save_investigation_markdown(
225
+ inv: Investigation,
226
+ filepath: str | Path,
227
+ include_containers: bool = False,
228
+ include_enrichments: bool = False,
229
+ include_observables: bool = True,
230
+ ) -> None:
231
+ """
232
+ Save an investigation as a Markdown report.
233
+
234
+ Args:
235
+ inv: Investigation to save
236
+ filepath: Path to save the Markdown file
237
+ include_containers: Include containers section in the report (default: False)
238
+ include_enrichments: Include enrichments section in the report (default: False)
239
+ include_observables: Include observables section in the report (default: True)
240
+ """
241
+ markdown = generate_markdown_report(inv, include_containers, include_enrichments, include_observables)
242
+ with open(filepath, "w", encoding="utf-8") as f:
243
+ f.write(markdown)
244
+
245
+
246
+ def load_investigation_json(filepath: str | Path) -> Cyvest:
247
+ """
248
+ Load an investigation from a JSON file into a Cyvest object.
249
+
250
+ Args:
251
+ filepath: Path to the JSON file
252
+
253
+ Returns:
254
+ Reconstructed Cyvest investigation
255
+ """
256
+ from cyvest.cyvest import Cyvest
257
+ from cyvest.investigation import Investigation
258
+
259
+ with open(filepath, encoding="utf-8") as handle:
260
+ data = json.load(handle)
261
+
262
+ investigation_id = data.get("investigation_id")
263
+ if not isinstance(investigation_id, str) or not investigation_id.strip():
264
+ raise ValueError("Serialized investigation must include 'investigation_id'.")
265
+
266
+ root_data = data.get("root_data")
267
+ extraction = data.get("data_extraction", {})
268
+
269
+ root_type_raw = extraction.get("root_type")
270
+ try:
271
+ root_type = ObservableType.normalize_root_type(root_type_raw)
272
+ except (TypeError, ValueError):
273
+ root_type = ObservableType.FILE
274
+
275
+ score_mode_raw = extraction.get("score_mode_obs")
276
+ try:
277
+ score_mode = ScoreMode(score_mode_raw) if score_mode_raw else ScoreMode.MAX
278
+ except (TypeError, ValueError):
279
+ score_mode = ScoreMode.MAX
280
+
281
+ cv = Cyvest(root_data=root_data, root_type=root_type, score_mode_obs=score_mode)
282
+
283
+ # Reset internal state to avoid default root pollution
284
+ cv._investigation = Investigation(
285
+ root_data,
286
+ root_type=root_type,
287
+ score_mode_obs=score_mode,
288
+ investigation_id=investigation_id,
289
+ )
290
+ cv._investigation._audit_enabled = False
291
+ cv._investigation._audit_log = []
292
+
293
+ investigation_name = data.get("investigation_name")
294
+ if isinstance(investigation_name, str):
295
+ cv._investigation.investigation_name = investigation_name
296
+
297
+ # Load whitelists using Pydantic validation
298
+ whitelists = data.get("whitelists") or []
299
+ for whitelist_info in whitelists:
300
+ try:
301
+ identifier = str(whitelist_info.get("identifier", "")).strip()
302
+ name = str(whitelist_info.get("name", "")).strip()
303
+ if identifier and name:
304
+ cv._investigation.add_whitelist(
305
+ identifier,
306
+ name,
307
+ whitelist_info.get("justification"),
308
+ )
309
+ except ValueError:
310
+ continue
311
+
312
+ # Observables - leverage Pydantic model_validate (two-pass so root can merge after others exist)
313
+ new_root_key = cv._investigation.get_root().key
314
+ root_obs_info: dict[str, Any] | None = None
315
+ other_obs_infos: list[dict[str, Any]] = []
316
+ for obs_info in data.get("observables", {}).values():
317
+ obs_key = obs_info.get("key", "")
318
+ if obs_key == new_root_key:
319
+ root_obs_info = obs_info
320
+ continue
321
+ other_obs_infos.append(obs_info)
322
+
323
+ for obs_info in other_obs_infos:
324
+ # Prepare data for Pydantic validation
325
+ obs_data = {
326
+ "obs_type": obs_info.get("type", "unknown"),
327
+ "value": obs_info.get("value", ""),
328
+ "internal": obs_info.get("internal", True),
329
+ "whitelisted": obs_info.get("whitelisted", False),
330
+ "comment": obs_info.get("comment", ""),
331
+ "extra": obs_info.get("extra", {}),
332
+ "score": Decimal(str(obs_info.get("score", 0))),
333
+ "level": obs_info.get("level", "INFO"),
334
+ "key": obs_info.get("key", ""),
335
+ "relationships": [Relationship.model_validate(rel) for rel in obs_info.get("relationships", [])],
336
+ }
337
+ obs = Observable.model_validate(obs_data)
338
+ cv._investigation.add_observable(obs)
339
+
340
+ if root_obs_info is not None:
341
+ # Merge serialized root into the live root (preserves relationships, etc.).
342
+ root_data = {
343
+ "obs_type": root_obs_info.get("type", root_type),
344
+ "value": "root",
345
+ "internal": root_obs_info.get("internal", False),
346
+ "whitelisted": root_obs_info.get("whitelisted", False),
347
+ "comment": root_obs_info.get("comment", ""),
348
+ "extra": root_obs_info.get("extra", root_data),
349
+ "score": Decimal(str(root_obs_info.get("score", 0))),
350
+ "level": root_obs_info.get("level", "INFO"),
351
+ "key": new_root_key,
352
+ "relationships": [Relationship.model_validate(rel) for rel in root_obs_info.get("relationships", [])],
353
+ }
354
+ root_obs = Observable.model_validate(root_data)
355
+ cv._investigation.add_observable(root_obs)
356
+
357
+ # Threat intel - leverage Pydantic model_validate
358
+ for ti_info in data.get("threat_intels", {}).values():
359
+ raw_taxonomies = ti_info.get("taxonomies", []) or []
360
+ normalized_taxonomies: list[Any] = []
361
+ for taxonomy in raw_taxonomies:
362
+ if isinstance(taxonomy, dict) and "level" in taxonomy:
363
+ taxonomy = dict(taxonomy)
364
+ taxonomy["level"] = normalize_level(taxonomy["level"])
365
+ normalized_taxonomies.append(taxonomy)
366
+
367
+ ti_data = {
368
+ "source": ti_info.get("source", ""),
369
+ "observable_key": ti_info.get("observable_key", ""),
370
+ "comment": ti_info.get("comment", ""),
371
+ "extra": ti_info.get("extra", {}),
372
+ "score": Decimal(str(ti_info.get("score", 0))),
373
+ "level": ti_info.get("level", "INFO"),
374
+ "taxonomies": normalized_taxonomies,
375
+ "key": ti_info.get("key", ""),
376
+ }
377
+ ti = ThreatIntel.model_validate(ti_data)
378
+ observable = cv._investigation.get_observable(ti.observable_key)
379
+ if observable:
380
+ cv._investigation.add_threat_intel(ti, observable)
381
+
382
+ # Checks - leverage Pydantic model_validate
383
+ for scope_checks in data.get("checks", {}).values():
384
+ for check_info in scope_checks:
385
+ raw_links = check_info.get("observable_links", []) or []
386
+ normalized_links = []
387
+ for link in raw_links:
388
+ if isinstance(link, dict):
389
+ normalized_links.append(
390
+ {
391
+ "observable_key": link.get("observable_key", ""),
392
+ "propagation_mode": link.get("propagation_mode", "LOCAL_ONLY"),
393
+ }
394
+ )
395
+ else:
396
+ normalized_links.append(link)
397
+ check_data = {
398
+ "check_id": check_info.get("check_id", ""),
399
+ "scope": check_info.get("scope", ""),
400
+ "description": check_info.get("description", ""),
401
+ "comment": check_info.get("comment", ""),
402
+ "extra": check_info.get("extra", {}),
403
+ "score": Decimal(str(check_info.get("score", 0))),
404
+ "level": check_info.get("level", "NONE"),
405
+ "origin_investigation_id": check_info.get("origin_investigation_id")
406
+ or cv._investigation.investigation_id,
407
+ "observable_links": normalized_links,
408
+ "key": check_info.get("key", ""),
409
+ }
410
+ check = Check.model_validate(check_data)
411
+ cv._investigation.add_check(check)
412
+
413
+ # Enrichments - leverage Pydantic model_validate
414
+ for enr_info in data.get("enrichments", {}).values():
415
+ enr_data = {
416
+ "name": enr_info.get("name", ""),
417
+ "data": enr_info.get("data", {}),
418
+ "context": enr_info.get("context", ""),
419
+ "key": enr_info.get("key", ""),
420
+ }
421
+ enrichment = Enrichment.model_validate(enr_data)
422
+ cv._investigation.add_enrichment(enrichment)
423
+
424
+ # Containers
425
+ def build_container(container_info: dict[str, Any]) -> Container:
426
+ container_data = {
427
+ "path": container_info.get("path", ""),
428
+ "description": container_info.get("description", ""),
429
+ "key": container_info.get("key", ""),
430
+ }
431
+ container = Container.model_validate(container_data)
432
+ container = cv._investigation.add_container(container)
433
+
434
+ for check_key in container_info.get("checks", []):
435
+ check = cv._investigation.get_check(check_key)
436
+ if check:
437
+ cv._investigation.add_check_to_container(container.key, check.key)
438
+
439
+ for sub_info in container_info.get("sub_containers", {}).values():
440
+ sub_container = build_container(sub_info)
441
+ cv._investigation.add_sub_container(container.key, sub_container.key)
442
+
443
+ return container
444
+
445
+ for container_info in data.get("containers", {}).values():
446
+ build_container(container_info)
447
+
448
+ cv._investigation._refresh_check_links()
449
+
450
+ audit_log = []
451
+ for event_info in data.get("audit_log", []) or []:
452
+ try:
453
+ audit_log.append(AuditEvent.model_validate(event_info))
454
+ except Exception:
455
+ continue
456
+ cv._investigation._audit_log = audit_log
457
+ cv._investigation._audit_enabled = True
458
+
459
+ return cv