contentctl 5.0.0a0__py3-none-any.whl → 5.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. contentctl/__init__.py +1 -1
  2. contentctl/actions/build.py +88 -55
  3. contentctl/actions/deploy_acs.py +29 -24
  4. contentctl/actions/detection_testing/DetectionTestingManager.py +66 -41
  5. contentctl/actions/detection_testing/GitService.py +134 -76
  6. contentctl/actions/detection_testing/generate_detection_coverage_badge.py +48 -30
  7. contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py +163 -124
  8. contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructureContainer.py +45 -32
  9. contentctl/actions/detection_testing/progress_bar.py +3 -0
  10. contentctl/actions/detection_testing/views/DetectionTestingView.py +15 -18
  11. contentctl/actions/detection_testing/views/DetectionTestingViewCLI.py +1 -5
  12. contentctl/actions/detection_testing/views/DetectionTestingViewFile.py +2 -2
  13. contentctl/actions/detection_testing/views/DetectionTestingViewWeb.py +1 -4
  14. contentctl/actions/doc_gen.py +9 -5
  15. contentctl/actions/initialize.py +45 -33
  16. contentctl/actions/inspect.py +118 -61
  17. contentctl/actions/new_content.py +78 -50
  18. contentctl/actions/release_notes.py +276 -146
  19. contentctl/actions/reporting.py +23 -19
  20. contentctl/actions/test.py +31 -25
  21. contentctl/actions/validate.py +54 -34
  22. contentctl/api.py +54 -45
  23. contentctl/contentctl.py +12 -13
  24. contentctl/enrichments/attack_enrichment.py +112 -72
  25. contentctl/enrichments/cve_enrichment.py +34 -28
  26. contentctl/enrichments/splunk_app_enrichment.py +38 -36
  27. contentctl/helper/link_validator.py +101 -78
  28. contentctl/helper/splunk_app.py +69 -41
  29. contentctl/helper/utils.py +58 -39
  30. contentctl/input/director.py +69 -37
  31. contentctl/input/new_content_questions.py +26 -34
  32. contentctl/input/yml_reader.py +22 -17
  33. contentctl/objects/abstract_security_content_objects/detection_abstract.py +250 -314
  34. contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py +58 -36
  35. contentctl/objects/alert_action.py +8 -8
  36. contentctl/objects/annotated_types.py +1 -1
  37. contentctl/objects/atomic.py +64 -54
  38. contentctl/objects/base_test.py +2 -1
  39. contentctl/objects/base_test_result.py +16 -8
  40. contentctl/objects/baseline.py +41 -30
  41. contentctl/objects/baseline_tags.py +29 -22
  42. contentctl/objects/config.py +772 -560
  43. contentctl/objects/constants.py +29 -58
  44. contentctl/objects/correlation_search.py +75 -55
  45. contentctl/objects/dashboard.py +55 -41
  46. contentctl/objects/data_source.py +13 -13
  47. contentctl/objects/deployment.py +44 -37
  48. contentctl/objects/deployment_email.py +1 -1
  49. contentctl/objects/deployment_notable.py +2 -1
  50. contentctl/objects/deployment_phantom.py +5 -5
  51. contentctl/objects/deployment_rba.py +1 -1
  52. contentctl/objects/deployment_scheduling.py +1 -1
  53. contentctl/objects/deployment_slack.py +1 -1
  54. contentctl/objects/detection.py +5 -2
  55. contentctl/objects/detection_metadata.py +1 -0
  56. contentctl/objects/detection_stanza.py +7 -2
  57. contentctl/objects/detection_tags.py +54 -64
  58. contentctl/objects/drilldown.py +66 -35
  59. contentctl/objects/enums.py +61 -43
  60. contentctl/objects/errors.py +16 -24
  61. contentctl/objects/integration_test.py +3 -3
  62. contentctl/objects/integration_test_result.py +1 -0
  63. contentctl/objects/investigation.py +41 -26
  64. contentctl/objects/investigation_tags.py +29 -17
  65. contentctl/objects/lookup.py +234 -113
  66. contentctl/objects/macro.py +55 -38
  67. contentctl/objects/manual_test.py +3 -3
  68. contentctl/objects/manual_test_result.py +1 -0
  69. contentctl/objects/mitre_attack_enrichment.py +17 -16
  70. contentctl/objects/notable_action.py +2 -1
  71. contentctl/objects/notable_event.py +1 -3
  72. contentctl/objects/playbook.py +37 -35
  73. contentctl/objects/playbook_tags.py +22 -16
  74. contentctl/objects/rba.py +14 -8
  75. contentctl/objects/risk_analysis_action.py +15 -11
  76. contentctl/objects/risk_event.py +27 -20
  77. contentctl/objects/risk_object.py +1 -0
  78. contentctl/objects/savedsearches_conf.py +9 -7
  79. contentctl/objects/security_content_object.py +5 -2
  80. contentctl/objects/story.py +45 -44
  81. contentctl/objects/story_tags.py +56 -44
  82. contentctl/objects/test_group.py +5 -2
  83. contentctl/objects/threat_object.py +1 -0
  84. contentctl/objects/throttling.py +27 -18
  85. contentctl/objects/unit_test.py +3 -4
  86. contentctl/objects/unit_test_baseline.py +4 -5
  87. contentctl/objects/unit_test_result.py +6 -6
  88. contentctl/output/api_json_output.py +22 -22
  89. contentctl/output/attack_nav_output.py +21 -21
  90. contentctl/output/attack_nav_writer.py +29 -37
  91. contentctl/output/conf_output.py +230 -174
  92. contentctl/output/data_source_writer.py +38 -25
  93. contentctl/output/doc_md_output.py +53 -27
  94. contentctl/output/jinja_writer.py +19 -15
  95. contentctl/output/json_writer.py +20 -8
  96. contentctl/output/svg_output.py +56 -38
  97. contentctl/output/templates/savedsearches_detections.j2 +1 -1
  98. contentctl/output/templates/transforms.j2 +2 -2
  99. contentctl/output/yml_writer.py +18 -24
  100. {contentctl-5.0.0a0.dist-info → contentctl-5.0.0a3.dist-info}/METADATA +1 -1
  101. contentctl-5.0.0a3.dist-info/RECORD +168 -0
  102. contentctl/actions/initialize_old.py +0 -245
  103. contentctl/objects/observable.py +0 -39
  104. contentctl-5.0.0a0.dist-info/RECORD +0 -170
  105. {contentctl-5.0.0a0.dist-info → contentctl-5.0.0a3.dist-info}/LICENSE.md +0 -0
  106. {contentctl-5.0.0a0.dist-info → contentctl-5.0.0a3.dist-info}/WHEEL +0 -0
  107. {contentctl-5.0.0a0.dist-info → contentctl-5.0.0a3.dist-info}/entry_points.txt +0 -0
@@ -1,33 +1,45 @@
1
1
  from __future__ import annotations
2
2
  from typing import List
3
- from pydantic import BaseModel, Field, field_validator, ValidationInfo, model_serializer,ConfigDict
3
+ from pydantic import (
4
+ BaseModel,
5
+ Field,
6
+ field_validator,
7
+ ValidationInfo,
8
+ model_serializer,
9
+ ConfigDict,
10
+ )
4
11
  from contentctl.objects.story import Story
5
- from contentctl.objects.enums import SecurityContentInvestigationProductName, SecurityDomain
12
+ from contentctl.objects.enums import (
13
+ SecurityContentInvestigationProductName,
14
+ SecurityDomain,
15
+ )
16
+
6
17
 
7
18
  class InvestigationTags(BaseModel):
8
19
  model_config = ConfigDict(extra="forbid")
9
- analytic_story: List[Story] = Field([],min_length=1)
10
- product: List[SecurityContentInvestigationProductName] = Field(...,min_length=1)
20
+ analytic_story: List[Story] = Field([], min_length=1)
21
+ product: List[SecurityContentInvestigationProductName] = Field(..., min_length=1)
11
22
  security_domain: SecurityDomain = Field(...)
12
23
 
13
-
14
- @field_validator('analytic_story',mode="before")
24
+ @field_validator("analytic_story", mode="before")
15
25
  @classmethod
16
- def mapStoryNamesToStoryObjects(cls, v:list[str], info:ValidationInfo)->list[Story]:
17
- return Story.mapNamesToSecurityContentObjects(v, info.context.get("output_dto",None))
18
-
26
+ def mapStoryNamesToStoryObjects(
27
+ cls, v: list[str], info: ValidationInfo
28
+ ) -> list[Story]:
29
+ return Story.mapNamesToSecurityContentObjects(
30
+ v, info.context.get("output_dto", None)
31
+ )
19
32
 
20
33
  @model_serializer
21
34
  def serialize_model(self):
22
- #All fields custom to this model
23
- model= {
35
+ # All fields custom to this model
36
+ model = {
24
37
  "analytic_story": [story.name for story in self.analytic_story],
25
38
  "product": self.product,
26
39
  "security_domain": self.security_domain,
27
40
  }
28
-
29
- #Combine fields from this model with fields from parent
30
-
31
-
32
- #return the model
33
- return model
41
+
42
+ # Combine fields from this model with fields from parent
43
+
44
+ # return the model
45
+ return model
@@ -1,35 +1,64 @@
1
1
  from __future__ import annotations
2
2
 
3
- from pydantic import field_validator, ValidationInfo, model_validator, FilePath, model_serializer, Field, NonNegativeInt, computed_field, TypeAdapter
4
- from enum import StrEnum, auto
5
- from typing import TYPE_CHECKING, Optional, Any, Union, Literal, Annotated, Self
6
- import re
7
- import csv
8
3
  import abc
9
- from functools import cached_property
4
+ import csv
10
5
  import pathlib
6
+ import re
7
+ from enum import StrEnum, auto
8
+ from functools import cached_property
9
+ from typing import TYPE_CHECKING, Annotated, Any, Literal, Optional, Self
10
+
11
+ from pydantic import (
12
+ Field,
13
+ FilePath,
14
+ NonNegativeInt,
15
+ TypeAdapter,
16
+ ValidationInfo,
17
+ computed_field,
18
+ field_validator,
19
+ model_serializer,
20
+ model_validator,
21
+ )
22
+
11
23
  if TYPE_CHECKING:
12
24
  from contentctl.input.director import DirectorOutputDto
13
25
  from contentctl.objects.config import validate
26
+
14
27
  from contentctl.objects.security_content_object import SecurityContentObject
15
28
 
16
29
  # This section is used to ignore lookups that are NOT shipped with ESCU app but are used in the detections. Adding exclusions here will so that contentctl builds will not fail.
17
30
  LOOKUPS_TO_IGNORE = set(["outputlookup"])
18
- LOOKUPS_TO_IGNORE.add("ut_shannon_lookup") #In the URL toolbox app which is recommended for ESCU
19
- LOOKUPS_TO_IGNORE.add("identity_lookup_expanded") #Shipped with the Asset and Identity Framework
20
- LOOKUPS_TO_IGNORE.add("cim_corporate_web_domain_lookup") #Shipped with the Asset and Identity Framework
21
- LOOKUPS_TO_IGNORE.add("cim_corporate_email_domain_lookup") #Shipped with the Enterprise Security
22
- LOOKUPS_TO_IGNORE.add("cim_cloud_domain_lookup") #Shipped with the Enterprise Security
23
-
24
- LOOKUPS_TO_IGNORE.add("alexa_lookup_by_str") #Shipped with the Asset and Identity Framework
25
- LOOKUPS_TO_IGNORE.add("interesting_ports_lookup") #Shipped with the Asset and Identity Framework
26
- LOOKUPS_TO_IGNORE.add("asset_lookup_by_str") #Shipped with the Asset and Identity Framework
27
- LOOKUPS_TO_IGNORE.add("admon_groups_def") #Shipped with the SA-admon addon
28
- LOOKUPS_TO_IGNORE.add("identity_lookup_expanded") #Shipped with the Enterprise Security
29
-
30
- #Special case for the Detection "Exploit Public Facing Application via Apache Commons Text"
31
- LOOKUPS_TO_IGNORE.add("=")
32
- LOOKUPS_TO_IGNORE.add("other_lookups")
31
+ LOOKUPS_TO_IGNORE.add(
32
+ "ut_shannon_lookup"
33
+ ) # In the URL toolbox app which is recommended for ESCU
34
+ LOOKUPS_TO_IGNORE.add(
35
+ "identity_lookup_expanded"
36
+ ) # Shipped with the Asset and Identity Framework
37
+ LOOKUPS_TO_IGNORE.add(
38
+ "cim_corporate_web_domain_lookup"
39
+ ) # Shipped with the Asset and Identity Framework
40
+ LOOKUPS_TO_IGNORE.add(
41
+ "cim_corporate_email_domain_lookup"
42
+ ) # Shipped with the Enterprise Security
43
+ LOOKUPS_TO_IGNORE.add("cim_cloud_domain_lookup") # Shipped with the Enterprise Security
44
+
45
+ LOOKUPS_TO_IGNORE.add(
46
+ "alexa_lookup_by_str"
47
+ ) # Shipped with the Asset and Identity Framework
48
+ LOOKUPS_TO_IGNORE.add(
49
+ "interesting_ports_lookup"
50
+ ) # Shipped with the Asset and Identity Framework
51
+ LOOKUPS_TO_IGNORE.add(
52
+ "asset_lookup_by_str"
53
+ ) # Shipped with the Asset and Identity Framework
54
+ LOOKUPS_TO_IGNORE.add("admon_groups_def") # Shipped with the SA-admon addon
55
+ LOOKUPS_TO_IGNORE.add(
56
+ "identity_lookup_expanded"
57
+ ) # Shipped with the Enterprise Security
58
+
59
+ # Special case for the Detection "Exploit Public Facing Application via Apache Commons Text"
60
+ LOOKUPS_TO_IGNORE.add("=")
61
+ LOOKUPS_TO_IGNORE.add("other_lookups")
33
62
 
34
63
 
35
64
  class Lookup_Type(StrEnum):
@@ -38,164 +67,224 @@ class Lookup_Type(StrEnum):
38
67
  mlmodel = auto()
39
68
 
40
69
 
41
-
42
70
  # TODO (#220): Split Lookup into 2 classes
43
- class Lookup(SecurityContentObject, abc.ABC):
71
+ class Lookup(SecurityContentObject, abc.ABC):
44
72
  default_match: Optional[bool] = None
45
73
  # Per the documentation for transforms.conf, EXACT should not be specified in this list,
46
74
  # so we include only WILDCARD and CIDR
47
- match_type: list[Annotated[str, Field(pattern=r"(^WILDCARD|CIDR)\(.+\)$")]] = Field(default=[])
75
+ match_type: list[Annotated[str, Field(pattern=r"(^WILDCARD|CIDR)\(.+\)$")]] = Field(
76
+ default=[]
77
+ )
48
78
  min_matches: None | NonNegativeInt = Field(default=None)
49
- max_matches: None | Annotated[NonNegativeInt, Field(ge=1, le=1000)] = Field(default=None)
79
+ max_matches: None | Annotated[NonNegativeInt, Field(ge=1, le=1000)] = Field(
80
+ default=None
81
+ )
50
82
  case_sensitive_match: None | bool = Field(default=None)
51
-
52
-
53
-
54
-
55
83
 
56
84
  @model_serializer
57
85
  def serialize_model(self):
58
- #Call parent serializer
86
+ # Call parent serializer
59
87
  super_fields = super().serialize_model()
60
88
 
61
- #All fields custom to this model
62
- model= {
63
-
89
+ # All fields custom to this model
90
+ model = {
64
91
  "default_match": "true" if self.default_match is True else "false",
65
92
  "match_type": self.match_type_to_conf_format,
66
93
  "min_matches": self.min_matches,
67
94
  "max_matches": self.max_matches,
68
- "case_sensitive_match": "true" if self.case_sensitive_match is True else "false",
95
+ "case_sensitive_match": "true"
96
+ if self.case_sensitive_match is True
97
+ else "false",
69
98
  }
70
-
71
- #return the model
99
+
100
+ # return the model
72
101
  model.update(super_fields)
73
102
  return model
74
103
 
75
104
  @model_validator(mode="before")
76
- def fix_lookup_path(cls, data:Any, info: ValidationInfo)->Any:
105
+ def fix_lookup_path(cls, data: Any, info: ValidationInfo) -> Any:
77
106
  if data.get("filename"):
78
- config:validate = info.context.get("config",None)
107
+ config: validate = info.context.get("config", None)
79
108
  if config is not None:
80
109
  data["filename"] = config.path / "lookups/" / data["filename"]
81
110
  else:
82
- raise ValueError("config required for constructing lookup filename, but it was not")
111
+ raise ValueError(
112
+ "config required for constructing lookup filename, but it was not"
113
+ )
83
114
  return data
84
115
 
85
-
86
116
  @computed_field
87
117
  @cached_property
88
- def match_type_to_conf_format(self)->str:
89
- return ', '.join(self.match_type)
90
-
91
-
118
+ def match_type_to_conf_format(self) -> str:
119
+ return ", ".join(self.match_type)
120
+
92
121
  @staticmethod
93
- def get_lookups(text_field: str, director:DirectorOutputDto, ignore_lookups:set[str]=LOOKUPS_TO_IGNORE)->list[Lookup]:
122
+ def get_lookups(
123
+ text_field: str,
124
+ director: DirectorOutputDto,
125
+ ignore_lookups: set[str] = LOOKUPS_TO_IGNORE,
126
+ ) -> list[Lookup]:
94
127
  # Comprehensively match all kinds of lookups, including inputlookup and outputlookup
95
- inputLookupsToGet = set(re.findall(r'[^\w]inputlookup(?:\s*(?:(?:append|strict|start|max)\s*=\s*(?:true|t|false|f))){0,4}\s+([\w]+)', text_field, re.IGNORECASE))
96
- outputLookupsToGet = set(re.findall(r'[^\w]outputlookup(?:\s*(?:(?:append|create_empty|override_if_empty|max|key_field|allow_updates|createinapp|create_context|output_format)\s*=\s*[^\s]*))*\s+([\w]+)',text_field,re.IGNORECASE))
97
- lookupsToGet = set(re.findall(r'[^\w](?:(?<!output)(?<!input))lookup(?:\s*(?:(?:local|update)\s*=\s*(?:true|t|false|f))){0,2}\s+([\w]+)', text_field, re.IGNORECASE))
98
-
99
-
100
- input_lookups = Lookup.mapNamesToSecurityContentObjects(list(inputLookupsToGet-LOOKUPS_TO_IGNORE), director)
101
- output_lookups = Lookup.mapNamesToSecurityContentObjects(list(outputLookupsToGet-LOOKUPS_TO_IGNORE), director)
102
- lookups = Lookup.mapNamesToSecurityContentObjects(list(lookupsToGet-LOOKUPS_TO_IGNORE), director)
128
+ inputLookupsToGet = set(
129
+ re.findall(
130
+ r"[^\w]inputlookup(?:\s*(?:(?:append|strict|start|max)\s*=\s*(?:true|t|false|f))){0,4}\s+([\w]+)",
131
+ text_field,
132
+ re.IGNORECASE,
133
+ )
134
+ )
135
+ outputLookupsToGet = set(
136
+ re.findall(
137
+ r"[^\w]outputlookup(?:\s*(?:(?:append|create_empty|override_if_empty|max|key_field|allow_updates|createinapp|create_context|output_format)\s*=\s*[^\s]*))*\s+([\w]+)",
138
+ text_field,
139
+ re.IGNORECASE,
140
+ )
141
+ )
142
+ lookupsToGet = set(
143
+ re.findall(
144
+ r"[^\w](?:(?<!output)(?<!input))lookup(?:\s*(?:(?:local|update)\s*=\s*(?:true|t|false|f))){0,2}\s+([\w]+)",
145
+ text_field,
146
+ re.IGNORECASE,
147
+ )
148
+ )
149
+
150
+ input_lookups = Lookup.mapNamesToSecurityContentObjects(
151
+ list(inputLookupsToGet - LOOKUPS_TO_IGNORE), director
152
+ )
153
+ output_lookups = Lookup.mapNamesToSecurityContentObjects(
154
+ list(outputLookupsToGet - LOOKUPS_TO_IGNORE), director
155
+ )
156
+ lookups = Lookup.mapNamesToSecurityContentObjects(
157
+ list(lookupsToGet - LOOKUPS_TO_IGNORE), director
158
+ )
103
159
 
104
160
  all_lookups = set(input_lookups + output_lookups + lookups)
105
161
 
106
162
  return list(all_lookups)
107
163
 
108
164
 
109
-
110
-
111
165
  class FileBackedLookup(Lookup, abc.ABC):
112
166
  # For purposes of the disciminated union, the child classes which
113
167
  # inherit from this class must declare the typing of lookup_type
114
168
  # themselves, hence it is not defined in the Lookup class
115
169
 
116
170
  @model_validator(mode="after")
117
- def ensure_lookup_file_exists(self)->Self:
171
+ def ensure_lookup_file_exists(self) -> Self:
118
172
  if not self.filename.exists():
119
173
  raise ValueError(f"Expected lookup filename {self.filename} does not exist")
120
174
  return self
121
175
 
122
176
  @computed_field
123
177
  @cached_property
124
- def filename(self)->FilePath:
125
- if self.file_path is None:
126
- raise ValueError(f"Cannot get the filename of the lookup {self.lookup_type} because the YML file_path attribute is None") #type: ignore
127
-
128
- csv_file = self.file_path.parent / f"{self.file_path.stem}.{self.lookup_type}" #type: ignore
129
- return csv_file
130
-
178
+ @abc.abstractmethod
179
+ def filename(self) -> FilePath:
180
+ """
181
+ This function computes the backing file for the lookup. It is abstract because different types of lookups
182
+ (CSV for MlModel) backing files have different name format.
183
+ """
184
+ pass
185
+
131
186
  @computed_field
132
187
  @cached_property
133
- def app_filename(self)->FilePath:
134
- '''
135
- We may consider two options:
136
- 1. Always apply the datetime stamp to the end of the file. This makes the code easier
137
- 2. Only apply the datetime stamp if it is version > 1. This makes the code a small fraction
138
- more complicated, but preserves longstanding CSV that have not been modified in a long time
139
- '''
140
- return pathlib.Path(f"{self.filename.stem}_{self.date.year}{self.date.month:02}{self.date.day:02}.{self.lookup_type}") #type: ignore
188
+ @abc.abstractmethod
189
+ def app_filename(self) -> FilePath:
190
+ """
191
+ This function computes the filenames to write into the app itself. This is abstract because
192
+ CSV and MLmodel requirements are different.
193
+ """
194
+ pass
195
+
141
196
 
142
197
  class CSVLookup(FileBackedLookup):
143
- lookup_type:Literal[Lookup_Type.csv]
144
-
198
+ lookup_type: Literal[Lookup_Type.csv]
199
+
145
200
  @model_serializer
146
201
  def serialize_model(self):
147
- #Call parent serializer
202
+ # Call parent serializer
148
203
  super_fields = super().serialize_model()
149
204
 
150
- #All fields custom to this model
151
- model= {
152
- "filename": self.app_filename.name
153
- }
154
-
155
- #return the model
205
+ # All fields custom to this model
206
+ model = {"filename": self.app_filename.name}
207
+
208
+ # return the model
156
209
  model.update(super_fields)
157
210
  return model
158
-
211
+
212
+ @computed_field
213
+ @cached_property
214
+ def filename(self) -> FilePath:
215
+ """
216
+ This function computes the backing file for the lookup. The names of CSV files must EXACTLY match the
217
+ names of their lookup definitions except with the CSV file extension rather than the YML file extension.
218
+ """
219
+ if self.file_path is None:
220
+ raise ValueError(
221
+ f"Cannot get the filename of the lookup {self.lookup_type} because the YML file_path attribute is None"
222
+ ) # type: ignore
223
+
224
+ csv_file = self.file_path.parent / f"{self.file_path.stem}.{self.lookup_type}" # type: ignore
225
+
226
+ return csv_file
227
+
228
+ @computed_field
229
+ @cached_property
230
+ def app_filename(self) -> FilePath:
231
+ """
232
+ This function computes the filenames to write into the app itself. This is abstract because
233
+ CSV and MLmodel requirements are different.
234
+ """
235
+ return pathlib.Path(
236
+ f"{self.filename.stem}_{self.date.year}{self.date.month:02}{self.date.day:02}.{self.lookup_type}"
237
+ )
238
+
159
239
  @model_validator(mode="after")
160
- def ensure_correct_csv_structure(self)->Self:
240
+ def ensure_correct_csv_structure(self) -> Self:
161
241
  # https://docs.python.org/3/library/csv.html#csv.DictReader
162
242
  # Column Names (fieldnames) determine by the number of columns in the first row.
163
243
  # If a row has MORE fields than fieldnames, they will be dumped in a list under the key 'restkey' - this should throw an Exception
164
- # If a row has LESS fields than fieldnames, then the field should contain None by default. This should also throw an exception.
165
- csv_errors:list[str] = []
244
+ # If a row has LESS fields than fieldnames, then the field should contain None by default. This should also throw an exception.
245
+ csv_errors: list[str] = []
166
246
  with open(self.filename, "r") as csv_fp:
167
247
  RESTKEY = "extra_fields_in_a_row"
168
- csv_dict = csv.DictReader(csv_fp, restkey=RESTKEY)
248
+ csv_dict = csv.DictReader(csv_fp, restkey=RESTKEY)
169
249
  if csv_dict.fieldnames is None:
170
- raise ValueError(f"Error validating the CSV referenced by the lookup: {self.filename}:\n\t"
171
- "Unable to read fieldnames from CSV. Is the CSV empty?\n"
172
- " Please try opening the file with a CSV Editor to ensure that it is correct.")
250
+ raise ValueError(
251
+ f"Error validating the CSV referenced by the lookup: {self.filename}:\n\t"
252
+ "Unable to read fieldnames from CSV. Is the CSV empty?\n"
253
+ " Please try opening the file with a CSV Editor to ensure that it is correct."
254
+ )
173
255
  # Remember that row 1 has the headers and we do not iterate over it in the loop below
174
256
  # CSVs are typically indexed starting a row 1 for the header.
175
257
  for row_index, data_row in enumerate(csv_dict):
176
- row_index+=2
177
- if len(data_row.get(RESTKEY,[])) > 0:
178
- csv_errors.append(f"row [{row_index}] should have [{len(csv_dict.fieldnames)}] columns,"
179
- f" but instead had [{len(csv_dict.fieldnames) + len(data_row.get(RESTKEY,[]))}].")
180
-
258
+ row_index += 2
259
+ if len(data_row.get(RESTKEY, [])) > 0:
260
+ csv_errors.append(
261
+ f"row [{row_index}] should have [{len(csv_dict.fieldnames)}] columns,"
262
+ f" but instead had [{len(csv_dict.fieldnames) + len(data_row.get(RESTKEY, []))}]."
263
+ )
264
+
181
265
  for column_index, column_name in enumerate(data_row):
182
266
  if data_row[column_name] is None:
183
- csv_errors.append(f"row [{row_index}] should have [{len(csv_dict.fieldnames)}] columns, "
184
- f"but instead had [{column_index}].")
267
+ csv_errors.append(
268
+ f"row [{row_index}] should have [{len(csv_dict.fieldnames)}] columns, "
269
+ f"but instead had [{column_index}]."
270
+ )
185
271
  if len(csv_errors) > 0:
186
- err_string = '\n\t'.join(csv_errors)
187
- raise ValueError(f"Error validating the CSV referenced by the lookup: {self.filename}:\n\t{err_string}\n"
188
- f" Please try opening the file with a CSV Editor to ensure that it is correct.")
189
-
190
- return self
272
+ err_string = "\n\t".join(csv_errors)
273
+ raise ValueError(
274
+ f"Error validating the CSV referenced by the lookup: {self.filename}:\n\t{err_string}\n"
275
+ f" Please try opening the file with a CSV Editor to ensure that it is correct."
276
+ )
191
277
 
278
+ return self
192
279
 
193
280
 
194
281
  class KVStoreLookup(Lookup):
195
282
  lookup_type: Literal[Lookup_Type.kvstore]
196
- fields: list[str] = Field(description="The names of the fields/headings for the KVStore.", min_length=1)
283
+ fields: list[str] = Field(
284
+ description="The names of the fields/headings for the KVStore.", min_length=1
285
+ )
197
286
 
198
- @field_validator("fields", mode='after')
287
+ @field_validator("fields", mode="after")
199
288
  @classmethod
200
289
  def ensure_key(cls, values: list[str]):
201
290
  if values[0] != "_key":
@@ -204,32 +293,64 @@ class KVStoreLookup(Lookup):
204
293
 
205
294
  @computed_field
206
295
  @cached_property
207
- def collection(self)->str:
296
+ def collection(self) -> str:
208
297
  return self.name
209
298
 
210
299
  @computed_field
211
300
  @cached_property
212
- def fields_to_fields_list_conf_format(self)->str:
213
- return ', '.join(self.fields)
301
+ def fields_to_fields_list_conf_format(self) -> str:
302
+ return ", ".join(self.fields)
214
303
 
215
304
  @model_serializer
216
305
  def serialize_model(self):
217
- #Call parent serializer
306
+ # Call parent serializer
218
307
  super_fields = super().serialize_model()
219
308
 
220
- #All fields custom to this model
221
- model= {
309
+ # All fields custom to this model
310
+ model = {
222
311
  "collection": self.collection,
223
- "fields_list": self.fields_to_fields_list_conf_format
312
+ "fields_list": self.fields_to_fields_list_conf_format,
224
313
  }
225
-
226
- #return the model
314
+
315
+ # return the model
227
316
  model.update(super_fields)
228
317
  return model
229
318
 
319
+
230
320
  class MlModel(FileBackedLookup):
231
321
  lookup_type: Literal[Lookup_Type.mlmodel]
232
-
233
322
 
234
- LookupAdapter = TypeAdapter(Annotated[CSVLookup | KVStoreLookup | MlModel, Field(discriminator="lookup_type")])
323
+ @computed_field
324
+ @cached_property
325
+ def filename(self) -> FilePath:
326
+ """
327
+ This function computes the backing file for the lookup. The names of mlmodel files must EXACTLY match the
328
+ names of their lookup definitions except with:
329
+ - __mlspl_ prefix
330
+ - .mlmodel file extension rather than the YML file extension.
331
+ """
332
+ if self.file_path is None:
333
+ raise ValueError(
334
+ f"Cannot get the filename of the lookup {self.lookup_type} because the YML file_path attribute is None"
335
+ ) # type: ignore
336
+
337
+ if not self.file_path.stem.startswith("__mlspl_"):
338
+ raise ValueError(
339
+ f"The file_path for ML Model {self.name} MUST start with '__mlspl_', but it does not."
340
+ )
341
+
342
+ return self.file_path.parent / f"{self.file_path.stem}.{self.lookup_type}"
343
+
344
+ @computed_field
345
+ @cached_property
346
+ def app_filename(self) -> FilePath:
347
+ """
348
+ This function computes the filenames to write into the app itself. This is abstract because
349
+ CSV and MLmodel requirements are different.
350
+ """
351
+ return pathlib.Path(f"{self.filename.stem}.{self.lookup_type}")
352
+
235
353
 
354
+ LookupAdapter = TypeAdapter(
355
+ Annotated[CSVLookup | KVStoreLookup | MlModel, Field(discriminator="lookup_type")]
356
+ )
@@ -1,4 +1,4 @@
1
- # Used so that we can have a staticmethod that takes the class
1
+ # Used so that we can have a staticmethod that takes the class
2
2
  # type Macro as an argument
3
3
  from __future__ import annotations
4
4
  from typing import TYPE_CHECKING, List
@@ -6,18 +6,21 @@ import re
6
6
  from pydantic import Field, model_serializer, NonNegativeInt
7
7
  import uuid
8
8
  import datetime
9
+
9
10
  if TYPE_CHECKING:
10
11
  from contentctl.input.director import DirectorOutputDto
11
12
  from contentctl.objects.security_content_object import SecurityContentObject
12
13
 
13
- #The following macros are included in commonly-installed apps.
14
- #As such, we will ignore if they are missing from our app.
15
- #Included in
16
- MACROS_TO_IGNORE = set(["drop_dm_object_name"]) # Part of CIM/Splunk_SA_CIM
17
- MACROS_TO_IGNORE.add("get_asset") #SA-IdentityManagement, part of Enterprise Security
18
- MACROS_TO_IGNORE.add("get_risk_severity") #SA-ThreatIntelligence, part of Enterprise Security
19
- MACROS_TO_IGNORE.add("cim_corporate_web_domain_search") #Part of CIM/Splunk_SA_CIM
20
- #MACROS_TO_IGNORE.add("prohibited_processes")
14
+ # The following macros are included in commonly-installed apps.
15
+ # As such, we will ignore if they are missing from our app.
16
+ # Included in
17
+ MACROS_TO_IGNORE = set(["drop_dm_object_name"]) # Part of CIM/Splunk_SA_CIM
18
+ MACROS_TO_IGNORE.add("get_asset") # SA-IdentityManagement, part of Enterprise Security
19
+ MACROS_TO_IGNORE.add(
20
+ "get_risk_severity"
21
+ ) # SA-ThreatIntelligence, part of Enterprise Security
22
+ MACROS_TO_IGNORE.add("cim_corporate_web_domain_search") # Part of CIM/Splunk_SA_CIM
23
+ # MACROS_TO_IGNORE.add("prohibited_processes")
21
24
 
22
25
 
23
26
  class Macro(SecurityContentObject):
@@ -26,48 +29,62 @@ class Macro(SecurityContentObject):
26
29
  # TODO: Add id field to all macro ymls
27
30
  id: uuid.UUID = Field(default_factory=uuid.uuid4)
28
31
  date: datetime.date = Field(datetime.date.today())
29
- author: str = Field("NO AUTHOR DEFINED",max_length=255)
32
+ author: str = Field("NO AUTHOR DEFINED", max_length=255)
30
33
  version: NonNegativeInt = 1
31
-
32
-
33
34
 
34
35
  @model_serializer
35
36
  def serialize_model(self):
36
- #Call serializer for parent
37
+ # Call serializer for parent
37
38
  super_fields = super().serialize_model()
38
39
 
39
- #All fields custom to this model
40
- model= {
40
+ # All fields custom to this model
41
+ model = {
41
42
  "definition": self.definition,
42
43
  "description": self.description,
43
44
  }
44
-
45
- #return the model
45
+
46
+ # return the model
46
47
  model.update(super_fields)
47
-
48
+
48
49
  return model
49
-
50
+
50
51
  @staticmethod
51
- def get_macros(text_field:str, director:DirectorOutputDto , ignore_macros:set[str]=MACROS_TO_IGNORE)->list[Macro]:
52
- #Remove any comments, allowing there to be macros (which have a single backtick) inside those comments
53
- #If a comment ENDS in a macro, for example ```this is a comment with a macro `macro_here````
54
- #then there is a small edge case where the regex below does not work properly. If that is
55
- #the case, we edit the search slightly to insert a space
52
+ def get_macros(
53
+ text_field: str,
54
+ director: DirectorOutputDto,
55
+ ignore_macros: set[str] = MACROS_TO_IGNORE,
56
+ ) -> list[Macro]:
57
+ # Remove any comments, allowing there to be macros (which have a single backtick) inside those comments
58
+ # If a comment ENDS in a macro, for example ```this is a comment with a macro `macro_here````
59
+ # then there is a small edge case where the regex below does not work properly. If that is
60
+ # the case, we edit the search slightly to insert a space
56
61
  if re.findall(r"\`\`\`\`", text_field):
57
- raise ValueError("Search contained four or more '`' characters in a row which is invalid SPL"
58
- "This may have occurred when a macro was commented out.\n"
59
- "Please ammend your search to remove the substring '````'")
62
+ raise ValueError(
63
+ "Search contained four or more '`' characters in a row which is invalid SPL"
64
+ "This may have occurred when a macro was commented out.\n"
65
+ "Please ammend your search to remove the substring '````'"
66
+ )
67
+
68
+ # Replace all the comments with a space. This prevents a comment from looking like a macro to the parser below
69
+ text_field = re.sub(r"\`\`\`[\s\S]*?\`\`\`", " ", text_field)
70
+
71
+ # Find all the macros, which start and end with a '`' character
72
+ macros_to_get = re.findall(r"`([^\s]+)`", text_field)
73
+ # If macros take arguments, stop at the first argument. We just want the name of the macro
74
+ macros_to_get = set(
75
+ [
76
+ macro[: macro.find("(")] if macro.find("(") != -1 else macro
77
+ for macro in macros_to_get
78
+ ]
79
+ )
60
80
 
61
- # Replace all the comments with a space. This prevents a comment from looking like a macro to the parser below
62
- text_field = re.sub(r"\`\`\`[\s\S]*?\`\`\`", " ", text_field)
63
-
64
- # Find all the macros, which start and end with a '`' character
65
- macros_to_get = re.findall(r'`([^\s]+)`', text_field)
66
- #If macros take arguments, stop at the first argument. We just want the name of the macro
67
- macros_to_get = set([macro[:macro.find('(')] if macro.find('(') != -1 else macro for macro in macros_to_get])
68
-
69
- macros_to_ignore = set([macro for macro in macros_to_get if any(to_ignore in macro for to_ignore in ignore_macros)])
70
- #remove the ones that we will ignore
81
+ macros_to_ignore = set(
82
+ [
83
+ macro
84
+ for macro in macros_to_get
85
+ if any(to_ignore in macro for to_ignore in ignore_macros)
86
+ ]
87
+ )
88
+ # remove the ones that we will ignore
71
89
  macros_to_get -= macros_to_ignore
72
90
  return Macro.mapNamesToSecurityContentObjects(list(macros_to_get), director)
73
-