cyvest 2.0.0__tar.gz → 3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: cyvest
3
- Version: 2.0.0
3
+ Version: 3.0.0
4
4
  Summary: Cybersecurity investigation model
5
5
  Keywords: cybersecurity,investigation,threat-intel,security-analysis
6
6
  Author: PakitoSec
@@ -16,7 +16,9 @@ Classifier: Programming Language :: Python :: 3.12
16
16
  Classifier: Topic :: Security
17
17
  Requires-Dist: click>=8
18
18
  Requires-Dist: logurich[click]>=0.1
19
+ Requires-Dist: pydantic>=2.12.5
19
20
  Requires-Dist: rich>=13
21
+ Requires-Dist: typing-extensions>=4.15
20
22
  Requires-Dist: pyvis>=0.3.2 ; extra == 'visualization'
21
23
  Requires-Python: >=3.10
22
24
  Project-URL: Homepage, https://github.com/PakitoSec/cyvest
@@ -353,8 +355,8 @@ cyvest merge inv1.json inv2.json -o merged.json -f rich --stats
353
355
  # Generate an interactive visualization (requires visualization extra)
354
356
  cyvest visualize investigation.json --min-level SUSPICIOUS --group-by-type
355
357
 
356
- # Output the JSON Schema describing serialized investigations
357
- cyvest schema > schema.json
358
+ # Output the JSON Schema describing serialized investigations and generate types
359
+ uv run cyvest schema -o ./schema/cyvest.schema.json && pnpm -C js/packages/cyvest-js run generate:types
358
360
  ```
359
361
 
360
362
  ## Development
@@ -326,8 +326,8 @@ cyvest merge inv1.json inv2.json -o merged.json -f rich --stats
326
326
  # Generate an interactive visualization (requires visualization extra)
327
327
  cyvest visualize investigation.json --min-level SUSPICIOUS --group-by-type
328
328
 
329
- # Output the JSON Schema describing serialized investigations
330
- cyvest schema > schema.json
329
+ # Output the JSON Schema describing serialized investigations and generate types
330
+ uv run cyvest schema -o ./schema/cyvest.schema.json && pnpm -C js/packages/cyvest-js run generate:types
331
331
  ```
332
332
 
333
333
  ## Development
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "cyvest"
3
- version = "2.0.0"
3
+ version = "3.0.0"
4
4
  description = "Cybersecurity investigation model"
5
5
  readme = {file = "README.md", content-type = "text/markdown"}
6
6
  requires-python = ">=3.10"
@@ -11,7 +11,9 @@ authors = [
11
11
  dependencies = [
12
12
  "click>=8",
13
13
  "logurich[click]>=0.1",
14
+ "pydantic>=2.12.5",
14
15
  "rich>=13",
16
+ "typing-extensions>=4.15",
15
17
  ]
16
18
  keywords = ["cybersecurity", "investigation", "threat-intel", "security-analysis"]
17
19
  classifiers = [
@@ -8,12 +8,17 @@ programmatically with automatic scoring, level calculation, and rich reporting c
8
8
  from logurich import logger
9
9
 
10
10
  from cyvest.cyvest import Cyvest
11
- from cyvest.investigation import InvestigationWhitelist
12
11
  from cyvest.levels import Level
13
- from cyvest.model import CheckScorePolicy, ObservableType, RelationshipDirection, RelationshipType
12
+ from cyvest.model import (
13
+ CheckScorePolicy,
14
+ InvestigationWhitelist,
15
+ ObservableType,
16
+ RelationshipDirection,
17
+ RelationshipType,
18
+ )
14
19
  from cyvest.proxies import CheckProxy, ContainerProxy, EnrichmentProxy, ObservableProxy, ThreatIntelProxy
15
20
 
16
- __version__ = "2.0.0"
21
+ __version__ = "3.0.0"
17
22
 
18
23
  logger.disable("cyvest")
19
24
 
@@ -166,10 +166,10 @@ def merge(inputs: tuple[Path, ...], output: Path, output_format: str, stats: boo
166
166
  if stats:
167
167
  logger.info("[bold]Merged Investigation Statistics:[/bold]")
168
168
  investigation_stats = main_investigation.get_statistics()
169
- logger.info(f" Total Observables: {investigation_stats.get('total_observables', 0)}")
170
- logger.info(f" Total Checks: {investigation_stats.get('total_checks', 0)}")
171
- logger.info(f" Total Threat Intel: {investigation_stats.get('total_threat_intel', 0)}")
172
- logger.info(f" Total Containers: {investigation_stats.get('total_containers', 0)}")
169
+ logger.info(f" Total Observables: {investigation_stats.total_observables}")
170
+ logger.info(f" Total Checks: {investigation_stats.total_checks}")
171
+ logger.info(f" Total Threat Intel: {investigation_stats.total_threat_intel}")
172
+ logger.info(f" Total Containers: {investigation_stats.total_containers}")
173
173
  logger.info(f" Global Score: {main_investigation.get_global_score()}")
174
174
  logger.info(f" Global Level: {main_investigation.get_global_level()}\n")
175
175
 
@@ -236,7 +236,7 @@ def schema_cmd(output: Path | None) -> None:
236
236
  if output:
237
237
  output_path = output.resolve()
238
238
  output_path.parent.mkdir(parents=True, exist_ok=True)
239
- output_path.write_text(json.dumps(schema, indent=2), encoding="utf-8")
239
+ output_path.write_text(json.dumps(schema, indent=2) + "\n", encoding="utf-8")
240
240
  logger.info(f"[green]Schema written to: {output_path}[/green]")
241
241
  return
242
242
 
@@ -26,10 +26,11 @@ from cyvest.io_serialization import (
26
26
  save_investigation_markdown,
27
27
  serialize_investigation,
28
28
  )
29
- from cyvest.levels import Level, normalize_level
29
+ from cyvest.levels import Level
30
30
  from cyvest.model import Check, CheckScorePolicy, Container, Enrichment, Observable, ThreatIntel
31
+ from cyvest.model_schema import InvestigationSchema, StatisticsSchema
31
32
  from cyvest.proxies import CheckProxy, ContainerProxy, EnrichmentProxy, ObservableProxy, ThreatIntelProxy
32
- from cyvest.score import ScoreMode, normalize_score_mode
33
+ from cyvest.score import ScoreMode
33
34
 
34
35
 
35
36
  class Cyvest:
@@ -54,7 +55,7 @@ class Cyvest:
54
55
  root_type: Type of root observable ("file" or "artifact")
55
56
  score_mode: Score calculation mode (MAX or SUM)
56
57
  """
57
- normalized_score_mode = normalize_score_mode(score_mode)
58
+ normalized_score_mode = ScoreMode.normalize(score_mode)
58
59
  self._investigation = Investigation(data, root_type=root_type, score_mode=normalized_score_mode)
59
60
 
60
61
  def __enter__(self) -> Cyvest:
@@ -214,8 +215,6 @@ class Cyvest:
214
215
  Returns:
215
216
  The created or existing observable
216
217
  """
217
- resolved_level = normalize_level(level) if level is not None else Level.INFO
218
-
219
218
  obs = Observable(
220
219
  obs_type=obs_type,
221
220
  value=value,
@@ -224,7 +223,7 @@ class Cyvest:
224
223
  comment=comment,
225
224
  extra=extra or {},
226
225
  score=Decimal(str(score)) if score is not None else Decimal("0"),
227
- level=resolved_level,
226
+ level=level if level is not None else Level.INFO,
228
227
  )
229
228
  # Unwrap tuple - facade returns only Observable, discards deferred relationships
230
229
  obs_result, _ = self._investigation.add_observable(obs)
@@ -304,15 +303,13 @@ class Cyvest:
304
303
  if not observable:
305
304
  return None
306
305
 
307
- resolved_level = normalize_level(level) if level is not None else Level.INFO
308
-
309
306
  ti = ThreatIntel(
310
307
  source=source,
311
308
  observable_key=observable_key,
312
309
  comment=comment,
313
310
  extra=extra or {},
314
311
  score=Decimal(str(score)),
315
- level=resolved_level,
312
+ level=level if level is not None else Level.INFO,
316
313
  taxonomies=taxonomies or [],
317
314
  )
318
315
  result = self._investigation.add_threat_intel(ti, observable)
@@ -332,7 +329,7 @@ class Cyvest:
332
329
  observable = self._investigation.get_observable(observable_key)
333
330
  if not observable:
334
331
  return None
335
- observable.set_level(normalize_level(level))
332
+ observable.set_level(level)
336
333
  return self._observable_proxy(observable)
337
334
 
338
335
  def observable_finalize_relationships(self) -> None:
@@ -372,9 +369,6 @@ class Cyvest:
372
369
  Returns:
373
370
  The created check
374
371
  """
375
- resolved_level = normalize_level(level) if level is not None else Level.NONE
376
- resolved_policy = CheckScorePolicy(score_policy) if score_policy is not None else CheckScorePolicy.AUTO
377
-
378
372
  check = Check(
379
373
  check_id=check_id,
380
374
  scope=scope,
@@ -382,8 +376,8 @@ class Cyvest:
382
376
  comment=comment,
383
377
  extra=extra or {},
384
378
  score=Decimal(str(score)) if score is not None else Decimal("0"),
385
- level=resolved_level,
386
- score_policy=resolved_policy,
379
+ level=level if level is not None else Level.NONE,
380
+ score_policy=score_policy if score_policy is not None else CheckScorePolicy.AUTO,
387
381
  )
388
382
  return self._check_proxy(self._investigation.add_check(check))
389
383
 
@@ -532,12 +526,12 @@ class Cyvest:
532
526
  """
533
527
  return self._investigation.get_global_level()
534
528
 
535
- def get_statistics(self) -> dict[str, Any]:
529
+ def get_statistics(self) -> StatisticsSchema:
536
530
  """
537
531
  Get comprehensive investigation statistics.
538
532
 
539
533
  Returns:
540
- Statistics dictionary
534
+ Statistics schema with typed fields
541
535
  """
542
536
  return self._investigation.get_statistics()
543
537
 
@@ -626,18 +620,18 @@ class Cyvest:
626
620
  """
627
621
  return generate_markdown_report(self, include_containers, include_enrichments, include_observables)
628
622
 
629
- def io_to_dict(self) -> dict[str, Any]:
623
+ def io_to_dict(self) -> InvestigationSchema:
630
624
  """
631
- Serialize the investigation to a dictionary.
625
+ Serialize the investigation to an InvestigationSchema.
632
626
 
633
627
  Returns:
634
- Dictionary representation suitable for JSON export
628
+ InvestigationSchema instance (use .model_dump() for dict)
635
629
 
636
630
  Examples:
637
631
  >>> cv = Cyvest()
638
- >>> data = cv.io_to_dict()
639
- >>> print(data.keys())
640
- dict_keys(['score', 'level', 'observables', 'checks', ...])
632
+ >>> schema = cv.io_to_dict()
633
+ >>> print(schema.score, schema.level)
634
+ >>> dict_data = schema.model_dump(by_alias=True)
641
635
  """
642
636
  return serialize_investigation(self)
643
637
 
@@ -748,13 +742,11 @@ class Cyvest:
748
742
  if observable_types is not None:
749
743
  obs_types_enum = [ObservableType(t) for t in observable_types]
750
744
 
751
- normalized_min_level = normalize_level(min_level) if min_level is not None else None
752
-
753
745
  return generate_network_graph(
754
746
  self,
755
747
  output_dir=output_dir,
756
748
  open_browser=open_browser,
757
- min_level=normalized_min_level,
749
+ min_level=min_level,
758
750
  observable_types=obs_types_enum,
759
751
  physics=physics,
760
752
  group_by_type=group_by_type,
@@ -9,7 +9,6 @@ from __future__ import annotations
9
9
 
10
10
  import threading
11
11
  from copy import deepcopy
12
- from dataclasses import dataclass
13
12
  from decimal import Decimal
14
13
  from pathlib import Path
15
14
  from typing import TYPE_CHECKING, Any, Literal, overload
@@ -18,12 +17,22 @@ from logurich import logger
18
17
 
19
18
  from cyvest import keys
20
19
  from cyvest.levels import Level, get_level_from_score, normalize_level
21
- from cyvest.model import Check, CheckScorePolicy, Container, Enrichment, Observable, ObservableType, ThreatIntel
22
- from cyvest.score import ScoreEngine, ScoreMode, normalize_score_mode
20
+ from cyvest.model import (
21
+ Check,
22
+ CheckScorePolicy,
23
+ Container,
24
+ Enrichment,
25
+ InvestigationWhitelist,
26
+ Observable,
27
+ ObservableType,
28
+ ThreatIntel,
29
+ )
30
+ from cyvest.score import ScoreEngine, ScoreMode
23
31
  from cyvest.stats import InvestigationStats
24
32
 
25
33
  if TYPE_CHECKING:
26
34
  from cyvest import Cyvest
35
+ from cyvest.model_schema import InvestigationSchema, StatisticsSchema
27
36
 
28
37
 
29
38
  class SharedInvestigationContext:
@@ -156,15 +165,15 @@ class SharedInvestigationContext:
156
165
  # Refresh registries from canonical, post-merge investigation state
157
166
  self._observable_registry = {}
158
167
  for obs in self._main_investigation.get_all_observables().values():
159
- copy = deepcopy(obs)
168
+ copy = obs.model_copy(deep=True)
160
169
  copy._from_shared_context = True
161
170
  self._observable_registry[obs.key] = copy
162
171
 
163
172
  self._check_registry = {
164
- check.key: deepcopy(check) for check in self._main_investigation.get_all_checks().values()
173
+ check.key: check.model_copy(deep=True) for check in self._main_investigation.get_all_checks().values()
165
174
  }
166
175
  self._enrichment_registry = {
167
- enrichment.key: deepcopy(enrichment)
176
+ enrichment.key: enrichment.model_copy(deep=True)
168
177
  for enrichment in self._main_investigation.get_all_enrichments().values()
169
178
  }
170
179
 
@@ -252,7 +261,7 @@ class SharedInvestigationContext:
252
261
  with self._lock:
253
262
  obs = self._observable_registry.get(key)
254
263
  if obs:
255
- copy = deepcopy(obs)
264
+ copy = obs.model_copy(deep=True)
256
265
  # Mark this as a copy from shared context to prevent misuse in relationships
257
266
  copy._from_shared_context = True
258
267
  return copy
@@ -310,7 +319,7 @@ class SharedInvestigationContext:
310
319
  with self._lock:
311
320
  check = self._check_registry.get(key)
312
321
  if check:
313
- return deepcopy(check)
322
+ return check.model_copy(deep=True)
314
323
  return None
315
324
 
316
325
  @overload
@@ -376,7 +385,7 @@ class SharedInvestigationContext:
376
385
  with self._lock:
377
386
  enrichment = self._enrichment_registry.get(key)
378
387
  if enrichment:
379
- return deepcopy(enrichment)
388
+ return enrichment.model_copy(deep=True)
380
389
  return None
381
390
 
382
391
  def get_global_score(self) -> Decimal:
@@ -461,7 +470,7 @@ class SharedInvestigationContext:
461
470
  matches = []
462
471
  for obs in self._observable_registry.values():
463
472
  if obs.obs_type == obs_type:
464
- matches.append(deepcopy(obs))
473
+ matches.append(obs.model_copy(deep=True))
465
474
  return matches
466
475
 
467
476
  def find_observables_by_value(self, value: str) -> list[Observable]:
@@ -478,7 +487,7 @@ class SharedInvestigationContext:
478
487
  matches = []
479
488
  for obs in self._observable_registry.values():
480
489
  if obs.value == value:
481
- matches.append(deepcopy(obs))
490
+ matches.append(obs.model_copy(deep=True))
482
491
  return matches
483
492
 
484
493
  @overload
@@ -645,20 +654,19 @@ class SharedInvestigationContext:
645
654
  save_investigation_markdown(temp_cy, filepath, include_containers, include_enrichments, include_observables)
646
655
  return str(Path(filepath).resolve())
647
656
 
648
- def io_to_dict(self) -> dict[str, Any]:
657
+ def io_to_dict(self) -> InvestigationSchema:
649
658
  """
650
- Serialize the shared investigation to a dictionary.
659
+ Serialize the shared investigation to an InvestigationSchema.
651
660
 
652
661
  Thread-safe: Uses lock to ensure consistent read of investigation state.
653
662
 
654
663
  Returns:
655
- Dictionary representation suitable for JSON export
664
+ InvestigationSchema instance (use .model_dump() for dict)
656
665
 
657
666
  Example:
658
667
  >>> shared = SharedInvestigationContext(main_inv)
659
- >>> data = shared.io_to_dict()
660
- >>> print(data.keys())
661
- dict_keys(['score', 'level', 'whitelisted', 'observables', 'checks', ...])
668
+ >>> schema = shared.io_to_dict()
669
+ >>> dict_data = schema.model_dump(by_alias=True)
662
670
  """
663
671
  from cyvest import Cyvest
664
672
  from cyvest.io_serialization import serialize_investigation
@@ -704,35 +712,6 @@ class SharedInvestigationContext:
704
712
  return str(Path(filepath).resolve())
705
713
 
706
714
 
707
- @dataclass
708
- class InvestigationWhitelist:
709
- """Represents a whitelist entry on an investigation."""
710
-
711
- identifier: str
712
- name: str
713
- justification: str | None = None
714
-
715
- def to_dict(self) -> dict[str, str | None]:
716
- """Serialize whitelist entry to a dictionary."""
717
- return {
718
- "identifier": self.identifier,
719
- "name": self.name,
720
- "justification": self.justification,
721
- }
722
-
723
- @classmethod
724
- def from_dict(cls, data: dict[str, Any]) -> InvestigationWhitelist:
725
- """Construct a whitelist entry from a dictionary."""
726
- justification = data.get("justification")
727
- if justification is not None:
728
- justification = str(justification)
729
- return cls(
730
- identifier=str(data.get("identifier", "")).strip(),
731
- name=str(data.get("name", "")).strip(),
732
- justification=justification,
733
- )
734
-
735
-
736
715
  class Investigation:
737
716
  """
738
717
  Core investigation state and operations.
@@ -782,7 +761,7 @@ class Investigation:
782
761
  self._containers: dict[str, Container] = {}
783
762
 
784
763
  # Internal components
785
- normalized_score_mode = normalize_score_mode(score_mode)
764
+ normalized_score_mode = ScoreMode.normalize(score_mode)
786
765
  self._score_engine = ScoreEngine(score_mode=normalized_score_mode)
787
766
  self._stats = InvestigationStats()
788
767
  self._whitelists: dict[str, InvestigationWhitelist] = {}
@@ -843,7 +822,7 @@ class Investigation:
843
822
  if existing.extra:
844
823
  existing.extra.update(incoming.extra)
845
824
  elif incoming.extra:
846
- existing.extra = dict().update(incoming.extra)
825
+ existing.extra = dict(incoming.extra)
847
826
 
848
827
  # Concatenate comments
849
828
  if incoming.comment:
@@ -1578,9 +1557,9 @@ class Investigation:
1578
1557
 
1579
1558
  def get_whitelists(self) -> list[InvestigationWhitelist]:
1580
1559
  """Return a copy of all whitelist entries."""
1581
- return deepcopy(list(self._whitelists.values()))
1560
+ return [w.model_copy(deep=True) for w in self._whitelists.values()]
1582
1561
 
1583
- def get_statistics(self) -> dict[str, Any]:
1562
+ def get_statistics(self) -> StatisticsSchema:
1584
1563
  """Get comprehensive investigation statistics."""
1585
1564
  return self._stats.get_summary()
1586
1565
 
@@ -176,11 +176,11 @@ def display_summary(
176
176
 
177
177
  stats = cv.get_statistics()
178
178
  stat_items = [
179
- ("Total Observables", stats.get("total_observables", 0)),
180
- ("Internal Observables", stats.get("internal_observables", 0)),
181
- ("External Observables", stats.get("external_observables", 0)),
182
- ("Whitelisted Observables", stats.get("whitelisted_observables", 0)),
183
- ("Total Threat Intel", stats.get("total_threat_intel", 0)),
179
+ ("Total Observables", stats.total_observables),
180
+ ("Internal Observables", stats.internal_observables),
181
+ ("External Observables", stats.external_observables),
182
+ ("Whitelisted Observables", stats.whitelisted_observables),
183
+ ("Total Threat Intel", stats.total_threat_intel),
184
184
  ]
185
185
 
186
186
  for stat_name, stat_value in stat_items:
@@ -301,8 +301,8 @@ def display_statistics(cv: Cyvest, rich_print: Callable[[Any], None]) -> None:
301
301
  obs_table.add_column("SUSPICIOUS", justify="right", style="orange3")
302
302
  obs_table.add_column("MALICIOUS", justify="right", style="red")
303
303
 
304
- obs_by_type_level = stats.get("observables_by_type_and_level", {})
305
- for obs_type, count in stats.get("observables_by_type", {}).items():
304
+ obs_by_type_level = stats.observables_by_type_and_level
305
+ for obs_type, count in stats.observables_by_type.items():
306
306
  levels = obs_by_type_level.get(obs_type, {})
307
307
  obs_table.add_row(
308
308
  obs_type.upper(),
@@ -321,19 +321,19 @@ def display_statistics(cv: Cyvest, rich_print: Callable[[Any], None]) -> None:
321
321
  check_table.add_column("Scope", style="cyan")
322
322
  check_table.add_column("Count", justify="right")
323
323
 
324
- for scope, count in stats.get("checks_by_scope", {}).items():
324
+ for scope, count in stats.checks_by_scope.items():
325
325
  check_table.add_row(scope, str(count))
326
326
 
327
327
  rich_print(check_table)
328
328
 
329
329
  # Threat intel statistics
330
- if stats.get("total_threat_intel", 0) > 0:
330
+ if stats.total_threat_intel > 0:
331
331
  rich_print("")
332
332
  ti_table = Table(title="Threat Intelligence Statistics")
333
333
  ti_table.add_column("Source", style="cyan")
334
334
  ti_table.add_column("Count", justify="right")
335
335
 
336
- for source, count in stats.get("threat_intel_by_source", {}).items():
336
+ for source, count in stats.threat_intel_by_source.items():
337
337
  ti_table.add_row(source, str(count))
338
338
 
339
339
  rich_print(ti_table)
@@ -0,0 +1,35 @@
1
+ """
2
+ JSON Schema definition for serialized Cyvest investigations.
3
+
4
+ The schema mirrors the structure emitted by `serialize_investigation` in
5
+ `cyvest.io_serialization` so consumers can validate exports or generate
6
+ typed bindings.
7
+
8
+ This module uses Pydantic's `model_json_schema(mode='serialization')` to generate
9
+ schemas that match the actual serialized output (respecting field_serializer decorators).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Any
15
+
16
+ from cyvest.model_schema import InvestigationSchema
17
+
18
+
19
+ def get_investigation_schema() -> dict[str, Any]:
20
+ """
21
+ Get the JSON Schema for serialized investigations.
22
+
23
+ Generates a JSON Schema (Draft 2020-12) that describes the output of
24
+ `serialize_investigation()`. The schema uses Pydantic's `model_json_schema`
25
+ with `mode='serialization'`, which respects field_serializer decorators and
26
+ matches the actual `model_dump()` output structure.
27
+
28
+ The returned schema automatically includes all referenced entity types
29
+ (Observable, Check, ThreatIntel, Enrichment, Container, InvestigationWhitelist)
30
+ in the `$defs` section.
31
+
32
+ Returns:
33
+ dict[str, Any]: Schema dictionary compliant with JSON Schema Draft 2020-12.
34
+ """
35
+ return InvestigationSchema.model_json_schema(mode="serialization", by_alias=True)