cyvest 4.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cyvest might be problematic. Click here for more details.

cyvest/model_schema.py ADDED
@@ -0,0 +1,164 @@
1
+ """
2
+ Pydantic models for JSON Schema generation of serialized Cyvest investigations.
3
+
4
+ These models describe the output structure of `serialize_investigation()` and other
5
+ serialization functions. They are used with `model_json_schema(mode='serialization')`
6
+ to generate JSON Schema that matches the actual serialized output.
7
+
8
+ Entity types reference the runtime models directly from `model.py`. When generating
9
+ schemas with `mode='serialization'`, Pydantic respects field_serializer decorators
10
+ and produces schemas matching the actual model_dump() output.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from decimal import Decimal
16
+ from typing import Annotated, Any, Literal
17
+
18
+ from pydantic import BaseModel, ConfigDict, Field, computed_field, field_serializer, model_validator
19
+
20
+ from cyvest.levels import Level
21
+ from cyvest.model import (
22
+ AuditEvent,
23
+ Check,
24
+ Container,
25
+ Enrichment,
26
+ InvestigationWhitelist,
27
+ Observable,
28
+ ThreatIntel,
29
+ _format_score_decimal,
30
+ )
31
+ from cyvest.model_enums import ObservableType
32
+ from cyvest.score import ScoreMode
33
+
34
+
35
+ class StatisticsSchema(BaseModel):
36
+ """
37
+ Schema for investigation statistics.
38
+
39
+ Mirrors the output of `InvestigationStats.get_summary()`.
40
+ """
41
+
42
+ model_config = ConfigDict(extra="forbid", frozen=True)
43
+
44
+ total_observables: Annotated[int, Field(ge=0)]
45
+ internal_observables: Annotated[int, Field(ge=0)]
46
+ external_observables: Annotated[int, Field(ge=0)]
47
+ whitelisted_observables: Annotated[int, Field(ge=0)]
48
+ observables_by_type: dict[str, Annotated[int, Field(ge=0)]] = Field(default_factory=dict)
49
+ observables_by_level: dict[str, Annotated[int, Field(ge=0)]] = Field(default_factory=dict)
50
+ observables_by_type_and_level: dict[str, dict[str, Annotated[int, Field(ge=0)]]] = Field(default_factory=dict)
51
+ total_checks: Annotated[int, Field(ge=0)]
52
+ applied_checks: Annotated[int, Field(ge=0)]
53
+ checks_by_scope: dict[str, list[str]] = Field(default_factory=dict)
54
+ checks_by_level: dict[str, list[str]] = Field(default_factory=dict)
55
+ total_threat_intel: Annotated[int, Field(ge=0)]
56
+ threat_intel_by_source: dict[str, Annotated[int, Field(ge=0)]] = Field(default_factory=dict)
57
+ threat_intel_by_level: dict[str, Annotated[int, Field(ge=0)]] = Field(default_factory=dict)
58
+ total_containers: Annotated[int, Field(ge=0)]
59
+
60
+
61
+ class DataExtractionSchema(BaseModel):
62
+ """Schema for data extraction metadata."""
63
+
64
+ model_config = ConfigDict(extra="forbid", frozen=True)
65
+
66
+ root_type: Literal[ObservableType.FILE, ObservableType.ARTIFACT] | None = Field(
67
+ default=None,
68
+ description="Root observable type used during data extraction.",
69
+ )
70
+ score_mode_obs: ScoreMode = Field(
71
+ description="Observable score aggregation mode: 'max' takes highest score, 'sum' adds all scores.",
72
+ )
73
+
74
+
75
+ class InvestigationSchema(BaseModel):
76
+ """
77
+ Schema for a complete serialized investigation.
78
+
79
+ This model describes the output of `serialize_investigation()` from
80
+ `cyvest.io_serialization`. It is the top-level schema for exported investigations.
81
+
82
+ Entity types reference the runtime models directly. When generating schemas with
83
+ `mode='serialization'`, Pydantic respects field_serializer decorators and produces
84
+ schemas matching the actual model_dump() output.
85
+ """
86
+
87
+ model_config = ConfigDict(
88
+ extra="forbid",
89
+ frozen=True,
90
+ json_schema_extra={
91
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
92
+ "$id": "https://cyvest.io/schema/investigation.json",
93
+ "title": "Cyvest Investigation",
94
+ },
95
+ )
96
+
97
+ investigation_id: str = Field(..., description="Stable investigation identity (ULID).")
98
+ investigation_name: str | None = Field(
99
+ default=None,
100
+ description="Optional human-readable investigation name.",
101
+ )
102
+ score: Decimal = Field(..., description="Global investigation score.")
103
+ level: Level = Field(
104
+ ...,
105
+ description="Security level classification from NONE (lowest) to MALICIOUS (highest).",
106
+ )
107
+ whitelisted: bool = Field(description="Whether the investigation is whitelisted.")
108
+ whitelists: list[InvestigationWhitelist] = Field(
109
+ ...,
110
+ description="List of whitelist entries applied to this investigation.",
111
+ )
112
+ audit_log: list[AuditEvent] | None = Field(
113
+ default_factory=list,
114
+ description="Append-only investigation audit log. Null when serialization disabled audit.",
115
+ )
116
+ observables: dict[str, Observable] = Field(
117
+ ...,
118
+ description="Observables keyed by their unique key.",
119
+ )
120
+ checks: dict[str, list[Check]] = Field(
121
+ ...,
122
+ description="Checks organized by scope.",
123
+ )
124
+ threat_intels: dict[str, ThreatIntel] = Field(
125
+ ...,
126
+ description="Threat intelligence entries keyed by their unique key.",
127
+ )
128
+ enrichments: dict[str, Enrichment] = Field(
129
+ ...,
130
+ description="Enrichment entries keyed by their unique key.",
131
+ )
132
+ containers: dict[str, Container] = Field(
133
+ ...,
134
+ description="Containers keyed by their unique key.",
135
+ )
136
+ stats: StatisticsSchema = Field(description="Investigation statistics summary.")
137
+ data_extraction: DataExtractionSchema = Field(description="Data extraction metadata.")
138
+
139
+ @field_serializer("score")
140
+ def serialize_score(self, v: Decimal) -> float:
141
+ return float(v)
142
+
143
+ @computed_field(return_type=str)
144
+ @property
145
+ def score_display(self) -> str:
146
+ """Global investigation score formatted as fixed-point x.xx."""
147
+ return _format_score_decimal(self.score)
148
+
149
+ @model_validator(mode="before")
150
+ @classmethod
151
+ def ensure_defaults(cls, v: Any) -> Any:
152
+ if not isinstance(v, dict):
153
+ return v
154
+
155
+ v.setdefault("level", Level.NONE)
156
+ v.setdefault("whitelists", [])
157
+ v.setdefault("audit_log", [])
158
+ v.setdefault("observables", {})
159
+ v.setdefault("checks", {})
160
+ v.setdefault("threat_intels", {})
161
+ v.setdefault("enrichments", {})
162
+ v.setdefault("containers", {})
163
+
164
+ return v