cyvest 0.1.0__py3-none-any.whl → 5.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cyvest/io_schema.py ADDED
@@ -0,0 +1,35 @@
1
+ """
2
+ JSON Schema definition for serialized Cyvest investigations.
3
+
4
+ The schema mirrors the structure emitted by `serialize_investigation` in
5
+ `cyvest.io_serialization` so consumers can validate exports or generate
6
+ typed bindings.
7
+
8
+ This module uses Pydantic's `model_json_schema(mode='serialization')` to generate
9
+ schemas that match the actual serialized output (respecting field_serializer decorators).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Any
15
+
16
+ from cyvest.model_schema import InvestigationSchema
17
+
18
+
19
+ def get_investigation_schema() -> dict[str, Any]:
20
+ """
21
+ Get the JSON Schema for serialized investigations.
22
+
23
+ Generates a JSON Schema (Draft 2020-12) that describes the output of
24
+ `serialize_investigation()`. The schema uses Pydantic's `model_json_schema`
25
+ with `mode='serialization'`, which respects field_serializer decorators and
26
+ matches the actual `model_dump()` output structure.
27
+
28
+ The returned schema automatically includes all referenced entity types
29
+ (Observable, Check, ThreatIntel, Enrichment, Tag, InvestigationWhitelist)
30
+ in the `$defs` section.
31
+
32
+ Returns:
33
+ dict[str, Any]: Schema dictionary compliant with JSON Schema Draft 2020-12.
34
+ """
35
+ return InvestigationSchema.model_json_schema(mode="serialization", by_alias=True)
@@ -0,0 +1,465 @@
1
+ """
2
+ Serialization and deserialization for Cyvest investigations.
3
+
4
+ Provides JSON export/import and Markdown generation for LLM consumption.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from decimal import Decimal
11
+ from pathlib import Path
12
+ from typing import TYPE_CHECKING, Any
13
+
14
+ from cyvest.levels import Level, normalize_level
15
+ from cyvest.model import AuditEvent, Check, Enrichment, Observable, Relationship, Tag, ThreatIntel
16
+ from cyvest.model_enums import ObservableType
17
+ from cyvest.model_schema import InvestigationSchema
18
+ from cyvest.score import ScoreMode
19
+
20
+ if TYPE_CHECKING:
21
+ from cyvest.cyvest import Cyvest
22
+ from cyvest.investigation import Investigation
23
+
24
+
25
+ def serialize_investigation(inv: Investigation, *, include_audit_log: bool = True) -> InvestigationSchema:
26
+ """
27
+ Serialize a complete investigation to an InvestigationSchema.
28
+
29
+ Uses InvestigationSchema for validation and automatic serialization via
30
+ Pydantic's field_serializer decorators.
31
+
32
+ Args:
33
+ inv: Investigation to serialize
34
+ include_audit_log: Include audit log in serialization (default: True).
35
+ When False, audit_log is set to None for compact, deterministic output.
36
+
37
+ Returns:
38
+ InvestigationSchema instance (use .model_dump() for dict)
39
+ """
40
+ inv._rebuild_all_check_links()
41
+ observables = dict(inv.get_all_observables())
42
+ threat_intels = dict(inv.get_all_threat_intels())
43
+ enrichments = dict(inv.get_all_enrichments())
44
+ tags = dict(inv.get_all_tags())
45
+
46
+ # Get all checks
47
+ checks = dict(inv.get_all_checks())
48
+
49
+ # Get root type
50
+ root = inv.get_root()
51
+ root_type_value = root.obs_type.value
52
+
53
+ # Build and validate using Pydantic model
54
+ investigation = InvestigationSchema(
55
+ investigation_id=inv.investigation_id,
56
+ investigation_name=inv.investigation_name,
57
+ score=inv.get_global_score(),
58
+ level=inv.get_global_level(),
59
+ whitelisted=inv.is_whitelisted(),
60
+ whitelists=list(inv.get_whitelists()),
61
+ audit_log=inv.get_audit_log() if include_audit_log else None,
62
+ observables=observables,
63
+ checks=checks,
64
+ threat_intels=threat_intels,
65
+ enrichments=enrichments,
66
+ tags=tags,
67
+ stats=inv.get_statistics(),
68
+ data_extraction={
69
+ "root_type": root_type_value,
70
+ "score_mode_obs": inv._score_engine._score_mode_obs.value,
71
+ },
72
+ )
73
+
74
+ return investigation
75
+
76
+
77
+ def save_investigation_json(inv: Investigation, filepath: str | Path, *, include_audit_log: bool = True) -> None:
78
+ """
79
+ Save an investigation to a JSON file.
80
+
81
+ Args:
82
+ inv: Investigation to save
83
+ filepath: Path to save the JSON file
84
+ include_audit_log: Include audit log in output (default: True).
85
+ When False, audit_log is set to null for compact, deterministic output.
86
+ """
87
+ data = serialize_investigation(inv, include_audit_log=include_audit_log)
88
+ with open(filepath, "w", encoding="utf-8") as f:
89
+ f.write(data.model_dump_json(indent=2, by_alias=True))
90
+
91
+
92
+ def generate_markdown_report(
93
+ inv: Investigation,
94
+ include_tags: bool = False,
95
+ include_enrichments: bool = False,
96
+ include_observables: bool = True,
97
+ exclude_levels: set[Level] | None = None,
98
+ ) -> str:
99
+ """
100
+ Generate a Markdown report of the investigation for LLM consumption.
101
+
102
+ Args:
103
+ inv: Investigation
104
+ include_tags: Include tags section in the report (default: False)
105
+ include_enrichments: Include enrichments section in the report (default: False)
106
+ include_observables: Include observables section in the report (default: True)
107
+ exclude_levels: Set of levels to exclude from checks section (default: {Level.NONE})
108
+
109
+ Returns:
110
+ Markdown formatted report
111
+ """
112
+ if exclude_levels is None:
113
+ exclude_levels = {Level.NONE}
114
+
115
+ lines = []
116
+
117
+ # Header
118
+ lines.append("# Cybersecurity Investigation Report")
119
+ lines.append("")
120
+ if getattr(inv, "investigation_name", None):
121
+ lines.append(f"**Investigation Name:** {inv.investigation_name}")
122
+ lines.append(f"**Global Score:** {inv.get_global_score():.2f}")
123
+ lines.append(f"**Global Level:** {inv.get_global_level().name}")
124
+ whitelists = inv.get_whitelists()
125
+ whitelist_status = "Yes" if whitelists else "No"
126
+ lines.append(f"**Whitelisted Investigation:** {whitelist_status}")
127
+ if whitelists:
128
+ lines.append(f"**Whitelist Entries:** {len(whitelists)}")
129
+ lines.append("")
130
+
131
+ # Statistics
132
+ lines.append("## Statistics")
133
+ lines.append("")
134
+ stats = inv.get_statistics()
135
+ lines.append(f"- **Total Observables:** {stats.total_observables}")
136
+ lines.append(f"- **Internal Observables:** {stats.internal_observables}")
137
+ lines.append(f"- **External Observables:** {stats.external_observables}")
138
+ lines.append(f"- **Whitelisted Observables:** {stats.whitelisted_observables}")
139
+ lines.append(f"- **Total Checks:** {stats.total_checks}")
140
+ lines.append(f"- **Applied Checks:** {stats.applied_checks}")
141
+ lines.append(f"- **Total Threat Intel:** {stats.total_threat_intel}")
142
+ lines.append("")
143
+
144
+ # Whitelists
145
+ if whitelists:
146
+ lines.append("## Whitelists")
147
+ lines.append("")
148
+ for entry in whitelists:
149
+ lines.append(f"- **{entry.identifier}** - {entry.name}")
150
+ if entry.justification:
151
+ lines.append(f" - Justification: {entry.justification}")
152
+ lines.append("")
153
+
154
+ # Checks
155
+ lines.append("## Checks")
156
+ lines.append("")
157
+ for check in inv.get_all_checks().values():
158
+ if check.level not in exclude_levels:
159
+ lines.append(f"- **{check.check_name}**: Score: {check.score_display}, Level: {check.level.name}")
160
+ lines.append(f" - Description: {check.description}")
161
+ if check.comment:
162
+ lines.append(f" - Comment: {check.comment}")
163
+ lines.append("")
164
+
165
+ # Observables
166
+ if include_observables and inv.get_all_observables():
167
+ lines.append("## Observables")
168
+ lines.append("")
169
+ for obs in inv.get_all_observables().values():
170
+ lines.append(f"### {obs.obs_type}: {obs.value}")
171
+ lines.append(f"- **Key:** {obs.key}")
172
+ lines.append(f"- **Score:** {obs.score_display}")
173
+ lines.append(f"- **Level:** {obs.level.name}")
174
+ lines.append(f"- **Internal:** {obs.internal}")
175
+ lines.append(f"- **Whitelisted:** {obs.whitelisted}")
176
+ if obs.comment:
177
+ lines.append(f"- **Comment:** {obs.comment}")
178
+ if obs.relationships:
179
+ lines.append("- **Relationships:**")
180
+ for rel in obs.relationships:
181
+ direction_symbol = {
182
+ "outbound": "→",
183
+ "inbound": "←",
184
+ "bidirectional": "↔",
185
+ }.get(rel.direction if isinstance(rel.direction, str) else rel.direction.value, "→")
186
+ lines.append(f" - {rel.relationship_type} {direction_symbol} {rel.target_key}")
187
+ if obs.threat_intels:
188
+ lines.append("- **Threat Intelligence:**")
189
+ for ti in obs.threat_intels:
190
+ lines.append(f" - {ti.source}: Score {ti.score_display}, Level {ti.level.name}")
191
+ if ti.comment:
192
+ lines.append(f" - {ti.comment}")
193
+ lines.append("")
194
+
195
+ # Enrichments
196
+ if include_enrichments and inv.get_all_enrichments():
197
+ lines.append("## Enrichments")
198
+ lines.append("")
199
+ for enr in inv.get_all_enrichments().values():
200
+ lines.append(f"### {enr.name}")
201
+ if enr.context:
202
+ lines.append(f"- **Context:** {enr.context}")
203
+ lines.append(f"- **Data:** {json.dumps(enr.data, indent=2)}")
204
+ lines.append("")
205
+
206
+ # Tags
207
+ if include_tags and inv.get_all_tags():
208
+ lines.append("## Tags")
209
+ lines.append("")
210
+ for tag in inv.get_all_tags().values():
211
+ lines.append(f"### {tag.name}")
212
+ lines.append(f"- **Description:** {tag.description}")
213
+ lines.append(f"- **Direct Score:** {tag.get_direct_score():.2f}")
214
+ lines.append(f"- **Aggregated Score:** {inv.get_tag_aggregated_score(tag.name):.2f}")
215
+ lines.append(f"- **Aggregated Level:** {inv.get_tag_aggregated_level(tag.name).name}")
216
+ lines.append(f"- **Direct Checks:** {len(tag.checks)}")
217
+ lines.append("")
218
+
219
+ return "\n".join(lines)
220
+
221
+
222
+ def save_investigation_markdown(
223
+ inv: Investigation,
224
+ filepath: str | Path,
225
+ include_tags: bool = False,
226
+ include_enrichments: bool = False,
227
+ include_observables: bool = True,
228
+ exclude_levels: set[Level] | None = None,
229
+ ) -> None:
230
+ """
231
+ Save an investigation as a Markdown report.
232
+
233
+ Args:
234
+ inv: Investigation to save
235
+ filepath: Path to save the Markdown file
236
+ include_tags: Include tags section in the report (default: False)
237
+ include_enrichments: Include enrichments section in the report (default: False)
238
+ include_observables: Include observables section in the report (default: True)
239
+ exclude_levels: Set of levels to exclude from checks section (default: {Level.NONE})
240
+ """
241
+ markdown = generate_markdown_report(inv, include_tags, include_enrichments, include_observables, exclude_levels)
242
+ with open(filepath, "w", encoding="utf-8") as f:
243
+ f.write(markdown)
244
+
245
+
246
+ def load_investigation_dict(data: dict[str, Any]) -> Cyvest:
247
+ """
248
+ Load an investigation from a dictionary (parsed JSON) into a Cyvest object.
249
+
250
+ Args:
251
+ data: Dictionary containing the serialized investigation data
252
+
253
+ Returns:
254
+ Reconstructed Cyvest investigation
255
+ """
256
+ from cyvest.cyvest import Cyvest
257
+ from cyvest.investigation import Investigation
258
+
259
+ investigation_id = data.get("investigation_id")
260
+ if not isinstance(investigation_id, str) or not investigation_id.strip():
261
+ raise ValueError("Serialized investigation must include 'investigation_id'.")
262
+
263
+ root_data = data.get("root_data")
264
+ extraction = data.get("data_extraction", {})
265
+
266
+ root_type_raw = extraction.get("root_type")
267
+ try:
268
+ root_type = ObservableType.normalize_root_type(root_type_raw)
269
+ except (TypeError, ValueError):
270
+ root_type = ObservableType.FILE
271
+
272
+ score_mode_raw = extraction.get("score_mode_obs")
273
+ try:
274
+ score_mode = ScoreMode(score_mode_raw) if score_mode_raw else ScoreMode.MAX
275
+ except (TypeError, ValueError):
276
+ score_mode = ScoreMode.MAX
277
+
278
+ cv = Cyvest(root_data=root_data, root_type=root_type, score_mode_obs=score_mode)
279
+
280
+ # Reset internal state to avoid default root pollution
281
+ cv._investigation = Investigation(
282
+ root_data,
283
+ root_type=root_type,
284
+ score_mode_obs=score_mode,
285
+ investigation_id=investigation_id,
286
+ )
287
+ cv._investigation._audit_enabled = False
288
+ cv._investigation._audit_log = []
289
+
290
+ investigation_name = data.get("investigation_name")
291
+ if isinstance(investigation_name, str):
292
+ cv._investigation.investigation_name = investigation_name
293
+
294
+ # Load whitelists using Pydantic validation
295
+ whitelists = data.get("whitelists") or []
296
+ for whitelist_info in whitelists:
297
+ try:
298
+ identifier = str(whitelist_info.get("identifier", "")).strip()
299
+ name = str(whitelist_info.get("name", "")).strip()
300
+ if identifier and name:
301
+ cv._investigation.add_whitelist(
302
+ identifier,
303
+ name,
304
+ whitelist_info.get("justification"),
305
+ )
306
+ except ValueError:
307
+ continue
308
+
309
+ # Observables - leverage Pydantic model_validate (two-pass so root can merge after others exist)
310
+ new_root_key = cv._investigation.get_root().key
311
+ root_obs_info: dict[str, Any] | None = None
312
+ other_obs_infos: list[dict[str, Any]] = []
313
+ for obs_info in data.get("observables", {}).values():
314
+ obs_key = obs_info.get("key", "")
315
+ if obs_key == new_root_key:
316
+ root_obs_info = obs_info
317
+ continue
318
+ other_obs_infos.append(obs_info)
319
+
320
+ for obs_info in other_obs_infos:
321
+ # Prepare data for Pydantic validation
322
+ obs_data = {
323
+ "obs_type": obs_info.get("type", "unknown"),
324
+ "value": obs_info.get("value", ""),
325
+ "internal": obs_info.get("internal", True),
326
+ "whitelisted": obs_info.get("whitelisted", False),
327
+ "comment": obs_info.get("comment", ""),
328
+ "extra": obs_info.get("extra", {}),
329
+ "score": Decimal(str(obs_info.get("score", 0))),
330
+ "level": obs_info.get("level", "INFO"),
331
+ "key": obs_info.get("key", ""),
332
+ "relationships": [Relationship.model_validate(rel) for rel in obs_info.get("relationships", [])],
333
+ }
334
+ obs = Observable.model_validate(obs_data)
335
+ cv._investigation.add_observable(obs)
336
+
337
+ if root_obs_info is not None:
338
+ # Merge serialized root into the live root (preserves relationships, etc.).
339
+ root_data = {
340
+ "obs_type": root_obs_info.get("type", root_type),
341
+ "value": "root",
342
+ "internal": root_obs_info.get("internal", False),
343
+ "whitelisted": root_obs_info.get("whitelisted", False),
344
+ "comment": root_obs_info.get("comment", ""),
345
+ "extra": root_obs_info.get("extra", root_data),
346
+ "score": Decimal(str(root_obs_info.get("score", 0))),
347
+ "level": root_obs_info.get("level", "INFO"),
348
+ "key": new_root_key,
349
+ "relationships": [Relationship.model_validate(rel) for rel in root_obs_info.get("relationships", [])],
350
+ }
351
+ root_obs = Observable.model_validate(root_data)
352
+ cv._investigation.add_observable(root_obs)
353
+
354
+ # Threat intel - leverage Pydantic model_validate
355
+ for ti_info in data.get("threat_intels", {}).values():
356
+ raw_taxonomies = ti_info.get("taxonomies", []) or []
357
+ normalized_taxonomies: list[Any] = []
358
+ for taxonomy in raw_taxonomies:
359
+ if isinstance(taxonomy, dict) and "level" in taxonomy:
360
+ taxonomy = dict(taxonomy)
361
+ taxonomy["level"] = normalize_level(taxonomy["level"])
362
+ normalized_taxonomies.append(taxonomy)
363
+
364
+ ti_data = {
365
+ "source": ti_info.get("source", ""),
366
+ "observable_key": ti_info.get("observable_key", ""),
367
+ "comment": ti_info.get("comment", ""),
368
+ "extra": ti_info.get("extra", {}),
369
+ "score": Decimal(str(ti_info.get("score", 0))),
370
+ "level": ti_info.get("level", "INFO"),
371
+ "taxonomies": normalized_taxonomies,
372
+ "key": ti_info.get("key", ""),
373
+ }
374
+ ti = ThreatIntel.model_validate(ti_data)
375
+ observable = cv._investigation.get_observable(ti.observable_key)
376
+ if observable:
377
+ cv._investigation.add_threat_intel(ti, observable)
378
+
379
+ # Checks - leverage Pydantic model_validate
380
+ for check_info in data.get("checks", {}).values():
381
+ raw_links = check_info.get("observable_links", []) or []
382
+ normalized_links = []
383
+ for link in raw_links:
384
+ if isinstance(link, dict):
385
+ normalized_links.append(
386
+ {
387
+ "observable_key": link.get("observable_key", ""),
388
+ "propagation_mode": link.get("propagation_mode", "LOCAL_ONLY"),
389
+ }
390
+ )
391
+ else:
392
+ normalized_links.append(link)
393
+ check_data = {
394
+ "check_name": check_info.get("check_name", ""),
395
+ "description": check_info.get("description", ""),
396
+ "comment": check_info.get("comment", ""),
397
+ "extra": check_info.get("extra", {}),
398
+ "score": Decimal(str(check_info.get("score", 0))),
399
+ "level": check_info.get("level", "NONE"),
400
+ "origin_investigation_id": check_info.get("origin_investigation_id") or cv._investigation.investigation_id,
401
+ "observable_links": normalized_links,
402
+ "key": check_info.get("key", ""),
403
+ }
404
+ check = Check.model_validate(check_data)
405
+ cv._investigation.add_check(check)
406
+
407
+ # Enrichments - leverage Pydantic model_validate
408
+ for enr_info in data.get("enrichments", {}).values():
409
+ enr_data = {
410
+ "name": enr_info.get("name", ""),
411
+ "data": enr_info.get("data", {}),
412
+ "context": enr_info.get("context", ""),
413
+ "key": enr_info.get("key", ""),
414
+ }
415
+ enrichment = Enrichment.model_validate(enr_data)
416
+ cv._investigation.add_enrichment(enrichment)
417
+
418
+ # Tags
419
+ def build_tag(tag_info: dict[str, Any]) -> Tag:
420
+ tag_data = {
421
+ "name": tag_info.get("name", ""),
422
+ "description": tag_info.get("description", ""),
423
+ "key": tag_info.get("key", ""),
424
+ }
425
+ tag = Tag.model_validate(tag_data)
426
+ tag = cv._investigation.add_tag(tag)
427
+
428
+ for check_key in tag_info.get("checks", []):
429
+ check = cv._investigation.get_check(check_key)
430
+ if check:
431
+ cv._investigation.add_check_to_tag(tag.key, check.key)
432
+
433
+ return tag
434
+
435
+ for tag_info in data.get("tags", {}).values():
436
+ build_tag(tag_info)
437
+
438
+ cv._investigation._rebuild_all_check_links()
439
+
440
+ audit_log = []
441
+ for event_info in data.get("audit_log", []) or []:
442
+ try:
443
+ audit_log.append(AuditEvent.model_validate(event_info))
444
+ except Exception:
445
+ continue
446
+ cv._investigation._audit_log = audit_log
447
+ cv._investigation._audit_enabled = True
448
+
449
+ return cv
450
+
451
+
452
+ def load_investigation_json(filepath: str | Path) -> Cyvest:
453
+ """
454
+ Load an investigation from a JSON file into a Cyvest object.
455
+
456
+ Args:
457
+ filepath: Path to the JSON file
458
+
459
+ Returns:
460
+ Reconstructed Cyvest investigation
461
+ """
462
+ with open(filepath, encoding="utf-8") as handle:
463
+ data = json.load(handle)
464
+
465
+ return load_investigation_dict(data)